Search in sources :

Example 21 with WorkflowManager

use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.

the class DataPipelineTest method testSimpleMultiSource.

private void testSimpleMultiSource(Engine engine) throws Exception {
    /*
     * source1 --|
     *           |--> sleep --> sink
     * source2 --|
     */
    String source1Name = String.format("simpleMSInput1-%s", engine);
    String source2Name = String.format("simpleMSInput2-%s", engine);
    String sinkName = String.format("simpleMSOutput-%s", engine);
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source1", MockSource.getPlugin(source1Name))).addStage(new ETLStage("source2", MockSource.getPlugin(source2Name))).addStage(new ETLStage("sleep", SleepTransform.getPlugin(2L))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source1", "sleep").addConnection("source2", "sleep").addConnection("sleep", "sink").setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("SimpleMultiSourceApp-" + engine);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // there should be only two programs - one workflow and one mapreduce/spark
    Assert.assertEquals(2, appManager.getInfo().getPrograms().size());
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    StructuredRecord recordVincent = StructuredRecord.builder(schema).set("name", "vincent").build();
    // write one record to each source
    DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source1Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordVincent));
    inputManager = getDataset(NamespaceId.DEFAULT.dataset(source2Name));
    MockSource.writeInput(inputManager, ImmutableList.of(recordBob));
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    // check sink
    DataSetManager<Table> sinkManager = getDataset(sinkName);
    Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob, recordVincent);
    Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
    Assert.assertEquals(expected, actual);
    validateMetric(2, appId, "source1.records.out");
    validateMetric(1, appId, "source2.records.out");
    validateMetric(3, appId, "sleep.records.in");
    validateMetric(3, appId, "sleep.records.out");
    validateMetric(3, appId, "sink.records.in");
    Assert.assertTrue(getMetric(appId, "sleep." + io.cdap.cdap.etl.common.Constants.Metrics.TOTAL_TIME) > 0L);
    try (CloseableIterator<Message> messages = getMessagingContext().getMessageFetcher().fetch(appId.getNamespace(), "sleepTopic", 10, null)) {
        Assert.assertTrue(messages.hasNext());
        Assert.assertEquals("2", messages.next().getPayloadAsString());
        Assert.assertFalse(messages.hasNext());
    }
    getMessagingAdmin(appId.getNamespace()).deleteTopic("sleepTopic");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Message(io.cdap.cdap.api.messaging.Message) SpamMessage(io.cdap.cdap.datapipeline.mock.SpamMessage) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId)

Example 22 with WorkflowManager

use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.

the class DataPipelineTest method testConditionsOnBranches.

@Test
public void testConditionsOnBranches() throws Exception {
    /*
     *                            |-- true --> sink1
     *          |--> condition1 --|
     * source --|                 |-- false --> sink2
     *          |
     *          |                              |-- true --> sink3
     *          |-- transform --> condition2 --|
     *                                         |-- false --> sink4
     */
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    String sourceName = "branchConditionsSource";
    String sink1Name = "branchConditionsSink1";
    String sink2Name = "branchConditionsSink2";
    String sink3Name = "branchConditionsSink3";
    String sink4Name = "branchConditionsSink4";
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(sourceName, schema))).addStage(new ETLStage("condition1", MockCondition.getPlugin("condition1"))).addStage(new ETLStage("transform", IdentityTransform.getPlugin())).addStage(new ETLStage("condition2", MockCondition.getPlugin("condition2"))).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1Name))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2Name))).addStage(new ETLStage("sink3", MockSink.getPlugin(sink3Name))).addStage(new ETLStage("sink4", MockSink.getPlugin(sink4Name))).addConnection("source", "condition1").addConnection("source", "transform").addConnection("condition1", "sink1", true).addConnection("condition1", "sink2", false).addConnection("transform", "condition2").addConnection("condition2", "sink3", true).addConnection("condition2", "sink4", false).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("branchConditions");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    List<StructuredRecord> records = Collections.singletonList(StructuredRecord.builder(schema).set("name", "samuel").build());
    DataSetManager<Table> inputManager = getDataset(sourceName);
    MockSource.writeInput(inputManager, records);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start(ImmutableMap.of("condition1.branch.to.execute", "true", "condition2.branch.to.execute", "false"));
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> sink1Manager = getDataset(sink1Name);
    DataSetManager<Table> sink2Manager = getDataset(sink2Name);
    DataSetManager<Table> sink3Manager = getDataset(sink3Name);
    DataSetManager<Table> sink4Manager = getDataset(sink4Name);
    Assert.assertEquals(records, MockSink.readOutput(sink1Manager));
    Assert.assertTrue(MockSink.readOutput(sink2Manager).isEmpty());
    Assert.assertTrue(MockSink.readOutput(sink3Manager).isEmpty());
    Assert.assertEquals(records, MockSink.readOutput(sink4Manager));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 23 with WorkflowManager

use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.

the class DataPipelineTest method testSimpleConditionWithMultipleOutputActions.

@Test
public void testSimpleConditionWithMultipleOutputActions() throws Exception {
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    /*
     *
     * condition --Action--> file ---> trueSink
     *       |
     *       |--->Action--->file----> falseSink
     *
     */
    String appName = "SimpleConditionWithMultipleOutputActions";
    String trueSource = "true" + appName + "Source";
    String falseSource = "false" + appName + "Source";
    String trueSink = "true" + appName + "Sink";
    String falseSink = "false" + appName + "Sink";
    String actionTable = "actionTable" + appName;
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("trueSource", MockSource.getPlugin(trueSource, schema))).addStage(new ETLStage("falseSource", MockSource.getPlugin(falseSource, schema))).addStage(new ETLStage("trueSink", MockSink.getPlugin(trueSink))).addStage(new ETLStage("falseSink", MockSink.getPlugin(falseSink))).addStage(new ETLStage("condition", MockCondition.getPlugin("condition"))).addStage(new ETLStage("action1", MockAction.getPlugin(actionTable, "row1", "key1", "val1"))).addStage(new ETLStage("action2", MockAction.getPlugin(actionTable, "row2", "key2", "val2"))).addConnection("condition", "action1", true).addConnection("action1", "trueSource").addConnection("trueSource", "trueSink").addConnection("condition", "action2", false).addConnection("action2", "falseSource").addConnection("falseSource", "falseSink").build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app(appName);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
    StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
    for (String branch : Arrays.asList("true", "false")) {
        String source = branch.equals("true") ? trueSource : falseSource;
        String sink = branch.equals("true") ? trueSink : falseSink;
        // write records to source
        DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source));
        MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
        WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
        workflowManager.start(ImmutableMap.of("condition.branch.to.execute", branch));
        if (branch.equals("true")) {
            workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
        } else {
            workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
        }
        // check sink
        DataSetManager<Table> sinkManager = getDataset(sink);
        Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
        Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
        Assert.assertEquals(expected, actual);
        validateMetric(2, appId, branch + "Source.records.out");
        validateMetric(2, appId, branch + "Sink.records.in");
        // check Action1 and Action2 is executed correctly
        DataSetManager<Table> actionTableDS = getDataset(actionTable);
        if (branch.equals("true")) {
            Assert.assertEquals("val1", MockAction.readOutput(actionTableDS, "row1", "key1"));
        } else {
            Assert.assertEquals("val2", MockAction.readOutput(actionTableDS, "row2", "key2"));
        }
    }
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 24 with WorkflowManager

use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.

the class DataPipelineTest method testScheduledPipelines.

@Test
public void testScheduledPipelines() throws Exception {
    // Deploy middle pipeline scheduled to be triggered by the completion of head pipeline
    String expectedValue1 = "headArgValue";
    String expectedValue2 = "headPluginValue";
    WorkflowManager middleWorkflowManagerMR = deployPipelineWithSchedule("middle", Engine.SPARK, "head", new ArgumentMapping("head-arg", "middle-arg"), expectedValue1, new PluginPropertyMapping("action1", "value", "middle-plugin"), expectedValue2);
    // Deploy tail pipeline scheduled to be triggered by the completion of middle pipeline
    WorkflowManager tailWorkflowManagerMR = deployPipelineWithSchedule("tail", Engine.MAPREDUCE, "middle", new ArgumentMapping("middle-arg", "tail-arg"), expectedValue1, new PluginPropertyMapping("action2", "value", "tail-plugin"), expectedValue2);
    // Run the head pipeline and wait for its completion
    runHeadTriggeringPipeline(Engine.MAPREDUCE, expectedValue1, expectedValue2);
    // After the completion of the head pipeline, verify the results of middle pipeline
    assertTriggeredPipelinesResult(middleWorkflowManagerMR, "middle", Engine.SPARK, expectedValue1, expectedValue2);
    // After the completion of the middle pipeline, verify the results of tail pipeline
    assertTriggeredPipelinesResult(tailWorkflowManagerMR, "tail", Engine.MAPREDUCE, expectedValue1, expectedValue2);
}
Also used : ArgumentMapping(io.cdap.cdap.etl.proto.v2.ArgumentMapping) WorkflowManager(io.cdap.cdap.test.WorkflowManager) PluginPropertyMapping(io.cdap.cdap.etl.proto.v2.PluginPropertyMapping) Test(org.junit.Test)

Example 25 with WorkflowManager

use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.

the class AutoJoinerTest method testTripleAutoJoin.

private void testTripleAutoJoin(List<String> required, List<String> broadcast, Set<StructuredRecord> expected, Engine engine, List<String> tablesInOrderToJoin) throws Exception {
    /*
         users ------|
                     |
         purchases --|--> join --> sink
                     |
         interests --|

         joinOn: users.region = purchases.region = interests.region and
                 users.user_id = purchases.user_id = interests.user_id
     */
    String userInput = UUID.randomUUID().toString();
    String purchaseInput = UUID.randomUUID().toString();
    String interestInput = UUID.randomUUID().toString();
    String output = UUID.randomUUID().toString();
    ETLBatchConfig config = ETLBatchConfig.builder().addStage(new ETLStage("users", MockSource.getPlugin(userInput, USER_SCHEMA))).addStage(new ETLStage("purchases", MockSource.getPlugin(purchaseInput, PURCHASE_SCHEMA))).addStage(new ETLStage("interests", MockSource.getPlugin(interestInput, INTEREST_SCHEMA))).addStage(new ETLStage("join", MockAutoJoiner.getPlugin(tablesInOrderToJoin, Arrays.asList("region", "user_id"), required, broadcast, Collections.emptyList(), true))).addStage(new ETLStage("sink", MockSink.getPlugin(output))).addConnection("users", "join").addConnection("purchases", "join").addConnection("interests", "join").addConnection("join", "sink").setEngine(engine).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
    ApplicationId appId = NamespaceId.DEFAULT.app(UUID.randomUUID().toString());
    ApplicationManager appManager = deployApplication(appId, appRequest);
    // write input data
    List<StructuredRecord> userData = Arrays.asList(USER_ALICE, USER_ALYCE, USER_BOB);
    DataSetManager<Table> inputManager = getDataset(userInput);
    MockSource.writeInput(inputManager, userData);
    List<StructuredRecord> purchaseData = new ArrayList<>();
    purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 0).set("purchase_id", 123).build());
    purchaseData.add(StructuredRecord.builder(PURCHASE_SCHEMA).set("region", "us").set("user_id", 2).set("purchase_id", 456).build());
    inputManager = getDataset(purchaseInput);
    MockSource.writeInput(inputManager, purchaseData);
    List<StructuredRecord> interestData = new ArrayList<>();
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 0).set("interest", "food").build());
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 0).set("interest", "sports").build());
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 1).set("interest", "gardening").build());
    interestData.add(StructuredRecord.builder(INTEREST_SCHEMA).set("region", "us").set("user_id", 2).set("interest", "gaming").build());
    inputManager = getDataset(interestInput);
    MockSource.writeInput(inputManager, interestData);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.startAndWaitForGoodRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(output);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Set<StructuredRecord> actual = new HashSet<>();
    Schema expectedSchema = expected.iterator().hasNext() ? expected.iterator().next().getSchema() : null;
    if (expectedSchema == null || expected.iterator().next().getSchema() == outputRecords.get(0).getSchema()) {
        actual = new HashSet<>(outputRecords);
    } else {
        // reorder the output columns of the join result (actual) to match the column order of expected
        for (StructuredRecord sr : outputRecords) {
            actual.add(StructuredRecord.builder(expectedSchema).set("purchases_region", sr.get("purchases_region")).set("purchases_purchase_id", sr.get("purchases_purchase_id")).set("purchases_user_id", sr.get("purchases_user_id")).set("users_region", sr.get("users_region")).set("users_user_id", sr.get("users_user_id")).set("users_name", sr.get("users_name")).set("interests_region", sr.get("interests_region")).set("interests_user_id", sr.get("interests_user_id")).set("interests_interest", sr.get("interests_interest")).build());
        }
    }
    Assert.assertEquals(expected, actual);
    validateMetric(9, appId, "join.records.in");
    validateMetric(expected.size(), appId, "join.records.out");
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) WorkflowManager(io.cdap.cdap.test.WorkflowManager) Schema(io.cdap.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Aggregations

WorkflowManager (io.cdap.cdap.test.WorkflowManager)80 ApplicationManager (io.cdap.cdap.test.ApplicationManager)78 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)69 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)67 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)67 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)63 Table (io.cdap.cdap.api.dataset.table.Table)59 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)54 Schema (io.cdap.cdap.api.data.schema.Schema)46 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)45 Test (org.junit.Test)40 ArrayList (java.util.ArrayList)23 HashMap (java.util.HashMap)16 HashSet (java.util.HashSet)15 ETLPlugin (io.cdap.cdap.etl.proto.v2.ETLPlugin)12 File (java.io.File)12 RunRecord (io.cdap.cdap.proto.RunRecord)9 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)7 IOException (java.io.IOException)6 ServiceManager (io.cdap.cdap.test.ServiceManager)5