Search in sources :

Example 76 with ApplicationId

use of co.cask.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class AppLifecycleHttpHandlerTest method testDelete.

/**
   * Tests deleting applications with versioned and non-versioned API.
   */
@Test
public void testDelete() throws Exception {
    // Delete an non-existing app
    HttpResponse response = doDelete(getVersionedAPIPath("apps/XYZ", Constants.Gateway.API_VERSION_3_TOKEN, TEST_NAMESPACE1));
    Assert.assertEquals(404, response.getStatusLine().getStatusCode());
    // Start a fow for the App
    deploy(WordCountApp.class, Constants.Gateway.API_VERSION_3_TOKEN, TEST_NAMESPACE1);
    Id.Program program = Id.Program.from(TEST_NAMESPACE1, "WordCountApp", ProgramType.FLOW, "WordCountFlow");
    startProgram(program);
    waitState(program, "RUNNING");
    // Try to delete an App while its flow is running
    response = doDelete(getVersionedAPIPath("apps/WordCountApp", Constants.Gateway.API_VERSION_3_TOKEN, TEST_NAMESPACE1));
    Assert.assertEquals(409, response.getStatusLine().getStatusCode());
    Assert.assertEquals("'" + program.getApplication() + "' could not be deleted. Reason: The following programs are still running: " + program.getId(), readResponse(response));
    stopProgram(program);
    waitState(program, "STOPPED");
    startProgram(program);
    waitState(program, "RUNNING");
    // Try to delete all Apps while flow is running
    response = doDelete(getVersionedAPIPath("apps", Constants.Gateway.API_VERSION_3_TOKEN, TEST_NAMESPACE1));
    Assert.assertEquals(409, response.getStatusLine().getStatusCode());
    Assert.assertEquals("'" + program.getNamespace() + "' could not be deleted. Reason: The following programs are still running: " + program.getApplicationId() + ": " + program.getId(), readResponse(response));
    stopProgram(program);
    waitState(program, "STOPPED");
    // Delete the app in the wrong namespace
    response = doDelete(getVersionedAPIPath("apps/WordCountApp", Constants.Gateway.API_VERSION_3_TOKEN, TEST_NAMESPACE2));
    Assert.assertEquals(404, response.getStatusLine().getStatusCode());
    // Delete an non-existing app with version
    response = doDelete(getVersionedAPIPath("apps/XYZ/versions/" + VERSION1, Constants.Gateway.API_VERSION_3_TOKEN, TEST_NAMESPACE1));
    Assert.assertEquals(404, response.getStatusLine().getStatusCode());
    // Deploy an app with version
    Id.Artifact wordCountArtifactId = Id.Artifact.from(Id.Namespace.DEFAULT, "wordcountapp", VERSION1);
    addAppArtifact(wordCountArtifactId, WordCountApp.class);
    AppRequest<? extends Config> wordCountRequest = new AppRequest<>(new ArtifactSummary(wordCountArtifactId.getName(), wordCountArtifactId.getVersion().getVersion()));
    ApplicationId wordCountApp1 = NamespaceId.DEFAULT.app("WordCountApp", VERSION1);
    Assert.assertEquals(200, deploy(wordCountApp1, wordCountRequest).getStatusLine().getStatusCode());
    // Start a flow for the App
    ProgramId program1 = wordCountApp1.program(ProgramType.FLOW, "WordCountFlow");
    startProgram(program1, 200);
    waitState(program1, "RUNNING");
    // Try to delete an App while its flow is running
    response = doDelete(getVersionedAPIPath(String.format("apps/%s/versions/%s", wordCountApp1.getApplication(), wordCountApp1.getVersion()), Constants.Gateway.API_VERSION_3_TOKEN, wordCountApp1.getNamespace()));
    Assert.assertEquals(409, response.getStatusLine().getStatusCode());
    Assert.assertEquals("'" + program1.getParent() + "' could not be deleted. Reason: The following programs" + " are still running: " + program1.getProgram(), readResponse(response));
    stopProgram(program1, null, 200, null);
    waitState(program1, "STOPPED");
    // Delete the app with version in the wrong namespace
    response = doDelete(getVersionedAPIPath(String.format("apps/%s/versions/%s", wordCountApp1.getApplication(), wordCountApp1.getVersion()), Constants.Gateway.API_VERSION_3_TOKEN, TEST_NAMESPACE2));
    Assert.assertEquals(404, response.getStatusLine().getStatusCode());
    //Delete the app with version after stopping the flow
    response = doDelete(getVersionedAPIPath(String.format("apps/%s/versions/%s", wordCountApp1.getApplication(), wordCountApp1.getVersion()), Constants.Gateway.API_VERSION_3_TOKEN, wordCountApp1.getNamespace()));
    Assert.assertEquals(200, response.getStatusLine().getStatusCode());
    response = doDelete(getVersionedAPIPath(String.format("apps/%s/versions/%s", wordCountApp1.getApplication(), wordCountApp1.getVersion()), Constants.Gateway.API_VERSION_3_TOKEN, wordCountApp1.getNamespace()));
    Assert.assertEquals(404, response.getStatusLine().getStatusCode());
    //Delete the App after stopping the flow
    response = doDelete(getVersionedAPIPath("apps/WordCountApp/", Constants.Gateway.API_VERSION_3_TOKEN, TEST_NAMESPACE1));
    Assert.assertEquals(200, response.getStatusLine().getStatusCode());
    response = doDelete(getVersionedAPIPath("apps/WordCountApp/", Constants.Gateway.API_VERSION_3_TOKEN, TEST_NAMESPACE1));
    Assert.assertEquals(404, response.getStatusLine().getStatusCode());
    // deleting the app should not delete the artifact
    response = doGet(getVersionedAPIPath("artifacts/WordCountApp", Constants.Gateway.API_VERSION_3_TOKEN, TEST_NAMESPACE1));
    Assert.assertEquals(200, response.getStatusLine().getStatusCode());
    List<ArtifactSummary> summaries = readResponse(response, new TypeToken<List<ArtifactSummary>>() {
    }.getType());
    Assert.assertFalse(summaries.isEmpty());
    // cleanup
    deleteNamespace(NamespaceId.DEFAULT.getNamespace());
}
Also used : ArtifactSummary(co.cask.cdap.api.artifact.ArtifactSummary) TypeToken(com.google.gson.reflect.TypeToken) HttpResponse(org.apache.http.HttpResponse) NamespaceId(co.cask.cdap.proto.id.NamespaceId) Id(co.cask.cdap.proto.Id) ProgramId(co.cask.cdap.proto.id.ProgramId) ArtifactId(co.cask.cdap.proto.id.ArtifactId) ApplicationId(co.cask.cdap.proto.id.ApplicationId) ApplicationId(co.cask.cdap.proto.id.ApplicationId) ProgramId(co.cask.cdap.proto.id.ProgramId) AppRequest(co.cask.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Example 77 with ApplicationId

use of co.cask.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineTest method testPipelineWithAllActions.

@Test
public void testPipelineWithAllActions() throws Exception {
    String actionTable = "actionTable";
    String action1RowKey = "action1.row";
    String action1ColumnKey = "action1.column";
    String action1Value = "action1.value";
    String action2RowKey = "action2.row";
    String action2ColumnKey = "action2.column";
    String action2Value = "action2.value";
    String action3RowKey = "action3.row";
    String action3ColumnKey = "action3.column";
    String action3Value = "action3.value";
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("action1", MockAction.getPlugin(actionTable, action1RowKey, action1ColumnKey, action1Value))).addStage(new ETLStage("action2", MockAction.getPlugin(actionTable, action2RowKey, action2ColumnKey, action2Value))).addStage(new ETLStage("action3", MockAction.getPlugin(actionTable, action3RowKey, action3ColumnKey, action3Value))).addConnection("action1", "action2").addConnection("action1", "action3").setEngine(Engine.MAPREDUCE).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("MyActionOnlyApp");
    ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> actionTableDS = getDataset(actionTable);
    Assert.assertEquals(action1Value, MockAction.readOutput(actionTableDS, action1RowKey, action1ColumnKey));
    Assert.assertEquals(action2Value, MockAction.readOutput(actionTableDS, action2RowKey, action2ColumnKey));
    Assert.assertEquals(action3Value, MockAction.readOutput(actionTableDS, action3RowKey, action3ColumnKey));
    List<RunRecord> history = workflowManager.getHistory(ProgramRunStatus.COMPLETED);
    Assert.assertEquals(1, history.size());
    String runId = history.get(0).getPid();
    WorkflowTokenDetail tokenDetail = workflowManager.getToken(runId, WorkflowToken.Scope.USER, action1RowKey + action1ColumnKey);
    validateToken(tokenDetail, action1RowKey + action1ColumnKey, action1Value);
    tokenDetail = workflowManager.getToken(runId, WorkflowToken.Scope.USER, action2RowKey + action2ColumnKey);
    validateToken(tokenDetail, action2RowKey + action2ColumnKey, action2Value);
    tokenDetail = workflowManager.getToken(runId, WorkflowToken.Scope.USER, action3RowKey + action3ColumnKey);
    validateToken(tokenDetail, action3RowKey + action3ColumnKey, action3Value);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) WorkflowManager(co.cask.cdap.test.WorkflowManager) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) RunRecord(co.cask.cdap.proto.RunRecord) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) ApplicationId(co.cask.cdap.proto.id.ApplicationId) WorkflowTokenDetail(co.cask.cdap.proto.WorkflowTokenDetail) Test(org.junit.Test)

Example 78 with ApplicationId

use of co.cask.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineTest method testTableLookup.

@Test
public void testTableLookup() throws Exception {
    addDatasetInstance(Table.class.getName(), "personTable");
    DataSetManager<Table> lookupTableManager = getDataset("personTable");
    Table lookupTable = lookupTableManager.get();
    lookupTable.put("samuel".getBytes(Charsets.UTF_8), "age".getBytes(Charsets.UTF_8), "12".getBytes(Charsets.UTF_8));
    lookupTable.put("samuel".getBytes(Charsets.UTF_8), "gender".getBytes(Charsets.UTF_8), "m".getBytes(Charsets.UTF_8));
    lookupTable.put("bob".getBytes(Charsets.UTF_8), "age".getBytes(Charsets.UTF_8), "36".getBytes(Charsets.UTF_8));
    lookupTable.put("bob".getBytes(Charsets.UTF_8), "gender".getBytes(Charsets.UTF_8), "m".getBytes(Charsets.UTF_8));
    lookupTable.put("jane".getBytes(Charsets.UTF_8), "age".getBytes(Charsets.UTF_8), "25".getBytes(Charsets.UTF_8));
    lookupTable.put("jane".getBytes(Charsets.UTF_8), "gender".getBytes(Charsets.UTF_8), "f".getBytes(Charsets.UTF_8));
    lookupTableManager.flush();
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin("inputTable"))).addStage(new ETLStage("transform", LookupTransform.getPlugin("person", "age", "personTable"))).addStage(new ETLStage("sink", MockSink.getPlugin("outputTable"))).addConnection("source", "transform").addConnection("transform", "sink").build();
    ApplicationId appId = NamespaceId.DEFAULT.app("testTableLookup");
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // set up input data
    Schema inputSchema = Schema.recordOf("person", Schema.Field.of("person", Schema.of(Schema.Type.STRING)));
    StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema).set("person", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(inputSchema).set("person", "bob").build();
    StructuredRecord recordJane = StructuredRecord.builder(inputSchema).set("person", "jane").build();
    DataSetManager<Table> inputTable = getDataset("inputTable");
    MockSource.writeInput(inputTable, ImmutableList.of(recordSamuel, recordBob, recordJane));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME).start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    Schema schema = Schema.recordOf("person", Schema.Field.of("person", Schema.of(Schema.Type.STRING)), Schema.Field.of("age", Schema.of(Schema.Type.STRING)), Schema.Field.of("gender", Schema.of(Schema.Type.STRING)));
    Set<StructuredRecord> expected = new HashSet<>();
    expected.add(StructuredRecord.builder(schema).set("person", "samuel").set("age", "12").set("gender", "m").build());
    expected.add(StructuredRecord.builder(schema).set("person", "bob").set("age", "36").set("gender", "m").build());
    expected.add(StructuredRecord.builder(schema).set("person", "jane").set("age", "25").set("gender", "f").build());
    DataSetManager<Table> outputTable = getDataset("outputTable");
    Set<StructuredRecord> actual = new HashSet<>(MockSink.readOutput(outputTable));
    Assert.assertEquals(expected, actual);
    validateMetric(3, appId, "source.records.out");
    validateMetric(3, appId, "sink.records.in");
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset("inputTable"));
    deleteDatasetInstance(NamespaceId.DEFAULT.dataset("outputTable"));
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) WorkflowManager(co.cask.cdap.test.WorkflowManager) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) ApplicationId(co.cask.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 79 with ApplicationId

use of co.cask.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineTest method testErrorTransform.

private void testErrorTransform(Engine engine) throws Exception {
    String source1TableName = "errTestIn1-" + engine;
    String source2TableName = "errTestIn2-" + engine;
    String sink1TableName = "errTestOut1-" + engine;
    String sink2TableName = "errTestOut2-" + engine;
    Schema inputSchema = Schema.recordOf("user", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("id", Schema.of(Schema.Type.INT)));
    /*
     *
     * source1 --> filter1 --> filter2 --> agg1 --> agg2
     *                |           |         |        |
     *                |-----------|---------|--------|--------|--> errorflatten --> sink1
     *                |                                       |
     *                |                                       |--> errorfilter --> sink2
     *                |
     * source2 --> dropnull
     *
     * arrows coming out the right represent output records
     * arrows coming out the bottom represent error records
     * this will test multiple stages from multiple phases emitting errors to the same stage
     * as well as errors from one stage going to multiple stages
     * and transforms that have an error schema different from their output schema
     */
    ETLBatchConfig config = ETLBatchConfig.builder("* * * * *").setEngine(engine).addStage(new ETLStage("source1", MockSource.getPlugin(source1TableName, inputSchema))).addStage(new ETLStage("source2", MockSource.getPlugin(source2TableName, inputSchema))).addStage(new ETLStage("filter1", StringValueFilterTransform.getPlugin("name", "Leo"))).addStage(new ETLStage("filter2", StringValueFilterTransform.getPlugin("name", "Ralph"))).addStage(new ETLStage("agg1", GroupFilterAggregator.getPlugin("name", "Don"))).addStage(new ETLStage("agg2", GroupFilterAggregator.getPlugin("name", "Mike"))).addStage(new ETLStage("errorflatten", FlattenErrorTransform.getPlugin())).addStage(new ETLStage("errorfilter", FilterErrorTransform.getPlugin(3))).addStage(new ETLStage("dropnull", DropNullTransform.getPlugin("name"))).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1TableName))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2TableName))).addConnection("source1", "filter1").addConnection("source2", "dropnull").addConnection("filter1", "filter2").addConnection("filter2", "agg1").addConnection("agg1", "agg2").addConnection("filter1", "errorflatten").addConnection("filter1", "errorfilter").addConnection("filter2", "errorflatten").addConnection("filter2", "errorfilter").addConnection("agg1", "errorflatten").addConnection("agg1", "errorfilter").addConnection("agg2", "errorflatten").addConnection("agg2", "errorfilter").addConnection("dropnull", "errorflatten").addConnection("dropnull", "errorfilter").addConnection("errorflatten", "sink1").addConnection("errorfilter", "sink2").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app("ErrTransformTest-" + engine);
    ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
    List<StructuredRecord> input = ImmutableList.of(StructuredRecord.builder(inputSchema).set("name", "Leo").set("id", 1).build(), StructuredRecord.builder(inputSchema).set("name", "Ralph").set("id", 2).build(), StructuredRecord.builder(inputSchema).set("name", "Don").set("id", 3).build(), StructuredRecord.builder(inputSchema).set("name", "Mike").set("id", 4).build());
    DataSetManager<Table> source1Table = getDataset(source1TableName);
    MockSource.writeInput(source1Table, input);
    input = ImmutableList.of(StructuredRecord.builder(inputSchema).set("name", "April").set("id", 5).build());
    DataSetManager<Table> source2Table = getDataset(source2TableName);
    MockSource.writeInput(source2Table, input);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    Schema flattenSchema = Schema.recordOf("erroruser", Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("errMsg", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("errCode", Schema.nullableOf(Schema.of(Schema.Type.INT))), Schema.Field.of("errStage", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
    Set<StructuredRecord> expected = ImmutableSet.of(StructuredRecord.builder(flattenSchema).set("name", "Leo").set("id", 1).set("errMsg", "bad string value").set("errCode", 1).set("errStage", "filter1").build(), StructuredRecord.builder(flattenSchema).set("name", "Ralph").set("id", 2).set("errMsg", "bad string value").set("errCode", 1).set("errStage", "filter2").build(), StructuredRecord.builder(flattenSchema).set("name", "Don").set("id", 3).set("errMsg", "bad val").set("errCode", 3).set("errStage", "agg1").build(), StructuredRecord.builder(flattenSchema).set("name", "Mike").set("id", 4).set("errMsg", "bad val").set("errCode", 3).set("errStage", "agg2").build(), StructuredRecord.builder(flattenSchema).set("name", "April").set("id", 5).set("errMsg", "Field name was not null").set("errCode", 5).set("errStage", "dropnull").build());
    DataSetManager<Table> sink1Table = getDataset(sink1TableName);
    Assert.assertEquals(expected, ImmutableSet.copyOf(MockSink.readOutput(sink1Table)));
    expected = ImmutableSet.of(StructuredRecord.builder(inputSchema).set("name", "Leo").set("id", 1).build(), StructuredRecord.builder(inputSchema).set("name", "Ralph").set("id", 2).build(), StructuredRecord.builder(inputSchema).set("name", "April").set("id", 5).build());
    DataSetManager<Table> sink2Table = getDataset(sink2TableName);
    Assert.assertEquals(expected, ImmutableSet.copyOf(MockSink.readOutput(sink2Table)));
    /*
     *
     * source1 (4) --> filter1 (3) --> filter2 (2) --> agg1 (1) --> agg2
     *                   |                |              |            |
     *                  (1)              (1)            (1)          (1)
     *                   |----------------|--------------|------------|--------|--> errorflatten (5) --> sink1
     *                   |                                                     |
     *                  (1)                                                    |--> errorfilter (3) --> sink2
     *                   |
     * source2 --> dropnull
     */
    validateMetric(4, appId, "source1.records.out");
    validateMetric(1, appId, "source2.records.out");
    validateMetric(1, appId, "dropnull.records.error");
    validateMetric(3, appId, "filter1.records.out");
    validateMetric(1, appId, "filter1.records.error");
    validateMetric(2, appId, "filter2.records.out");
    validateMetric(1, appId, "filter2.records.error");
    validateMetric(1, appId, "agg1.records.out");
    validateMetric(1, appId, "agg1.records.error");
    validateMetric(1, appId, "agg2.records.error");
    validateMetric(5, appId, "errorflatten.records.out");
    validateMetric(3, appId, "errorfilter.records.out");
    validateMetric(5, appId, "sink1.records.in");
    validateMetric(3, appId, "sink2.records.in");
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) WorkflowManager(co.cask.cdap.test.WorkflowManager) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) ApplicationId(co.cask.cdap.proto.id.ApplicationId)

Example 80 with ApplicationId

use of co.cask.cdap.proto.id.ApplicationId in project cdap by caskdata.

the class DataPipelineTest method testMultiSource.

private void testMultiSource(Engine engine) throws Exception {
    /*
     * source1 --|                 |--> sink1
     *           |--> transform1 --|
     * source2 --|                 |
     *                             |--> transform2 --> sink2
     *                                     ^
     *                                     |
     * source3 ----------------------------|
     */
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    String source1Name = String.format("msInput1-%s", engine);
    String source2Name = String.format("msInput2-%s", engine);
    String source3Name = String.format("msInput3-%s", engine);
    String sink1Name = String.format("msOutput1-%s", engine);
    String sink2Name = String.format("msOutput2-%s", engine);
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source1", MockSource.getPlugin(source1Name, schema))).addStage(new ETLStage("source2", MockSource.getPlugin(source2Name, schema))).addStage(new ETLStage("source3", MockSource.getPlugin(source3Name, schema))).addStage(new ETLStage("transform1", IdentityTransform.getPlugin())).addStage(new ETLStage("transform2", IdentityTransform.getPlugin())).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1Name))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2Name))).addConnection("source1", "transform1").addConnection("source2", "transform1").addConnection("transform1", "sink1").addConnection("transform1", "transform2").addConnection("transform2", "sink2").addConnection("source3", "transform2").setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("MultiSourceApp-" + engine);
    ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
    // there should be only two programs - one workflow and one mapreduce/spark
    Assert.assertEquals(2, appManager.getInfo().getPrograms().size());
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    StructuredRecord recordJane = StructuredRecord.builder(schema).set("name", "jane").build();
    // write one record to each source
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source1Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel));
    inputManager = getDataset(NamespaceId.DEFAULT.dataset(source2Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordBob));
    inputManager = getDataset(NamespaceId.DEFAULT.dataset(source3Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordJane));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // sink1 should get records from source1 and source2
    DataSetManager<Table> sinkManager = getDataset(sink1Name);
    Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
    Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
    Assert.assertEquals(expected, actual);
    // sink2 should get all records
    sinkManager = getDataset(sink2Name);
    expected = ImmutableSet.of(recordSamuel, recordBob, recordJane);
    actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
    Assert.assertEquals(expected, actual);
    validateMetric(1, appId, "source1.records.out");
    validateMetric(1, appId, "source2.records.out");
    validateMetric(1, appId, "source3.records.out");
    validateMetric(2, appId, "transform1.records.in");
    validateMetric(2, appId, "transform1.records.out");
    validateMetric(3, appId, "transform2.records.in");
    validateMetric(3, appId, "transform2.records.out");
    validateMetric(2, appId, "sink1.records.in");
    validateMetric(3, appId, "sink2.records.in");
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) Table(co.cask.cdap.api.dataset.table.Table) Schema(co.cask.cdap.api.data.schema.Schema) WorkflowManager(co.cask.cdap.test.WorkflowManager) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) ApplicationId(co.cask.cdap.proto.id.ApplicationId)

Aggregations

ApplicationId (co.cask.cdap.proto.id.ApplicationId)234 Test (org.junit.Test)123 ProgramId (co.cask.cdap.proto.id.ProgramId)73 AppRequest (co.cask.cdap.proto.artifact.AppRequest)64 ApplicationManager (co.cask.cdap.test.ApplicationManager)49 ApplicationSpecification (co.cask.cdap.api.app.ApplicationSpecification)45 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)44 NamespaceId (co.cask.cdap.proto.id.NamespaceId)43 Table (co.cask.cdap.api.dataset.table.Table)37 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)36 Schema (co.cask.cdap.api.data.schema.Schema)34 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)32 WorkflowManager (co.cask.cdap.test.WorkflowManager)31 Path (javax.ws.rs.Path)31 ArtifactSummary (co.cask.cdap.api.artifact.ArtifactSummary)29 StreamId (co.cask.cdap.proto.id.StreamId)28 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)26 ArrayList (java.util.ArrayList)25 HashSet (java.util.HashSet)24 NotFoundException (co.cask.cdap.common.NotFoundException)23