use of co.cask.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testSimpleConditionWithMultipleInputActions.
@Test
public void testSimpleConditionWithMultipleInputActions() throws Exception {
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
/*
* action1--|
* |--> condition --> file ---> trueSink
* action2--| |
* |--->file----> falseSink
*
*/
String appName = "SimpleConditionWithMultipleInputActions";
String trueSource = "true" + appName + "Source";
String falseSource = "false" + appName + "Source";
String trueSink = "true" + appName + "Sink";
String falseSink = "false" + appName + "Sink";
String actionTable = "actionTable" + appName;
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("trueSource", MockSource.getPlugin(trueSource, schema))).addStage(new ETLStage("falseSource", MockSource.getPlugin(falseSource, schema))).addStage(new ETLStage("trueSink", MockSink.getPlugin(trueSink))).addStage(new ETLStage("falseSink", MockSink.getPlugin(falseSink))).addStage(new ETLStage("condition", MockCondition.getPlugin("condition"))).addStage(new ETLStage("action1", MockAction.getPlugin(actionTable, "row1", "key1", "val1"))).addStage(new ETLStage("action2", MockAction.getPlugin(actionTable, "row2", "key2", "val2"))).addConnection("action1", "condition").addConnection("action2", "condition").addConnection("condition", "trueSource", true).addConnection("condition", "falseSource", false).addConnection("trueSource", "trueSink").addConnection("falseSource", "falseSink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app(appName);
ApplicationManager appManager = deployApplication(appId, appRequest);
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
for (String branch : Arrays.asList("true", "false")) {
// write records to source
String source = branch.equals("true") ? trueSource : falseSource;
String sink = branch.equals("true") ? trueSink : falseSink;
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start(ImmutableMap.of("condition.branch.to.execute", branch));
if (branch.equals("true")) {
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
} else {
workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
}
// check sink
DataSetManager<Table> sinkManager = getDataset(sink);
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
validateMetric(2, appId, branch + "Source.records.out");
validateMetric(2, appId, branch + "Sink.records.in");
// check Action1 and Action2 is executed correctly
DataSetManager<Table> actionTableDS = getDataset(actionTable);
Assert.assertEquals("val1", MockAction.readOutput(actionTableDS, "row1", "key1"));
Assert.assertEquals("val2", MockAction.readOutput(actionTableDS, "row2", "key2"));
}
}
use of co.cask.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testScheduledPipelines.
@Test
public void testScheduledPipelines() throws Exception {
// Deploy middle pipeline scheduled to be triggered by the completion of head pipeline
String expectedValue1 = "headArgValue";
String expectedValue2 = "headPluginValue";
WorkflowManager middleWorkflowManagerMR = deployPipelineWithSchedule("middle", Engine.SPARK, "head", new ArgumentMapping("head-arg", "middle-arg"), expectedValue1, new PluginPropertyMapping("action1", "value", "middle-plugin"), expectedValue2);
// Deploy tail pipeline scheduled to be triggered by the completion of middle pipeline
WorkflowManager tailWorkflowManagerMR = deployPipelineWithSchedule("tail", Engine.MAPREDUCE, "middle", new ArgumentMapping("middle-arg", "tail-arg"), expectedValue1, new PluginPropertyMapping("action2", "value", "tail-plugin"), expectedValue2);
// Run the head pipeline and wait for its completion
runHeadTriggeringPipeline(Engine.MAPREDUCE, expectedValue1, expectedValue2);
// After the completion of the head pipeline, verify the results of middle pipeline
assertTriggeredPipelinesResult(middleWorkflowManagerMR, "middle", Engine.SPARK, expectedValue1, expectedValue2);
// After the completion of the middle pipeline, verify the results of tail pipeline
assertTriggeredPipelinesResult(tailWorkflowManagerMR, "tail", Engine.MAPREDUCE, expectedValue1, expectedValue2);
}
use of co.cask.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testMultipleOrderedInputActions.
@Test
public void testMultipleOrderedInputActions() throws Exception {
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
/*
* action1--->action2---|
* |--> condition --> file ---> trueSink
* action3--->action4---| |
* |--->file----> falseSink
*
*/
String appName = "MultipleOrderedInputActions";
String trueSource = "true" + appName + "Source";
String falseSource = "false" + appName + "Source";
String trueSink = "true" + appName + "Sink";
String falseSink = "false" + appName + "Sink";
String actionTable = "actionTable" + appName;
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("trueSource", MockSource.getPlugin(trueSource, schema))).addStage(new ETLStage("falseSource", MockSource.getPlugin(falseSource, schema))).addStage(new ETLStage("trueSink", MockSink.getPlugin(trueSink))).addStage(new ETLStage("falseSink", MockSink.getPlugin(falseSink))).addStage(new ETLStage("condition", MockCondition.getPlugin("condition"))).addStage(new ETLStage("action1", MockAction.getPlugin(actionTable, "row1", "key1", "val1"))).addStage(new ETLStage("action2", MockAction.getPlugin(actionTable, "row2", "key2", "val2", "row1key1", "val1"))).addStage(new ETLStage("action3", MockAction.getPlugin(actionTable, "row3", "key3", "val3"))).addStage(new ETLStage("action4", MockAction.getPlugin(actionTable, "row4", "key4", "val4", "row3key3", "val3"))).addConnection("action1", "action2").addConnection("action3", "action4").addConnection("action2", "condition").addConnection("action4", "condition").addConnection("condition", "trueSource", true).addConnection("condition", "falseSource", false).addConnection("trueSource", "trueSink").addConnection("falseSource", "falseSink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app(appName);
ApplicationManager appManager = deployApplication(appId, appRequest);
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
for (String branch : Arrays.asList("true", "false")) {
// write records to source
String source = branch.equals("true") ? trueSource : falseSource;
String sink = branch.equals("true") ? trueSink : falseSink;
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start(ImmutableMap.of("condition.branch.to.execute", branch));
if (branch.equals("true")) {
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
} else {
workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 5, TimeUnit.MINUTES);
}
// check sink
DataSetManager<Table> sinkManager = getDataset(sink);
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
validateMetric(2, appId, branch + "Source.records.out");
validateMetric(2, appId, branch + "Sink.records.in");
// check Action1 and Action2 is executed correctly
DataSetManager<Table> actionTableDS = getDataset(actionTable);
Assert.assertEquals("val1", MockAction.readOutput(actionTableDS, "row1", "key1"));
Assert.assertEquals("val2", MockAction.readOutput(actionTableDS, "row2", "key2"));
Assert.assertEquals("val3", MockAction.readOutput(actionTableDS, "row3", "key3"));
Assert.assertEquals("val4", MockAction.readOutput(actionTableDS, "row4", "key4"));
}
}
use of co.cask.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testPipelineWithActions.
private void testPipelineWithActions(Engine engine) throws Exception {
String actionTable = "actionTable-" + engine;
String action1RowKey = "action1.row";
String action1ColumnKey = "action1.column";
String action1Value = "action1.value";
String action2RowKey = "action2.row";
String action2ColumnKey = "action2.column";
String action2Value = "action2.value";
String action3RowKey = "action3.row";
String action3ColumnKey = "action3.column";
String action3Value = "action3.value";
String sourceName = "actionSource-" + engine;
String sinkName = "actionSink-" + engine;
String sourceTableName = "actionSourceTable-" + engine;
String sinkTableName = "actionSinkTable-" + engine;
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("action1", MockAction.getPlugin(actionTable, action1RowKey, action1ColumnKey, action1Value))).addStage(new ETLStage("action2", MockAction.getPlugin(actionTable, action2RowKey, action2ColumnKey, action2Value))).addStage(new ETLStage("action3", MockAction.getPlugin(actionTable, action3RowKey, action3ColumnKey, action3Value))).addStage(new ETLStage(sourceName, MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage(sinkName, MockSink.getPlugin(sinkTableName))).addConnection(sourceName, sinkName).addConnection("action1", "action2").addConnection("action2", sourceName).addConnection(sinkName, "action3").setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("MyApp-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
// write records to source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check sink
DataSetManager<Table> sinkManager = getDataset(sinkTableName);
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
DataSetManager<Table> actionTableDS = getDataset(actionTable);
Assert.assertEquals(action1Value, MockAction.readOutput(actionTableDS, action1RowKey, action1ColumnKey));
Assert.assertEquals(action2Value, MockAction.readOutput(actionTableDS, action2RowKey, action2ColumnKey));
Assert.assertEquals(action3Value, MockAction.readOutput(actionTableDS, action3RowKey, action3ColumnKey));
validateMetric(2, appId, sourceName + ".records.out");
validateMetric(2, appId, sinkName + ".records.in");
List<RunRecord> history = workflowManager.getHistory(ProgramRunStatus.COMPLETED);
Assert.assertEquals(1, history.size());
String runId = history.get(0).getPid();
for (WorkflowToken.Scope scope : Arrays.asList(WorkflowToken.Scope.SYSTEM, WorkflowToken.Scope.USER)) {
WorkflowTokenDetail token = workflowManager.getToken(runId, scope, null);
for (Map.Entry<String, List<WorkflowTokenDetail.NodeValueDetail>> tokenData : token.getTokenData().entrySet()) {
Assert.assertTrue(!tokenData.getKey().startsWith(co.cask.cdap.etl.common.Constants.StageStatistics.PREFIX));
}
}
}
use of co.cask.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testRuntimeArgs.
private void testRuntimeArgs(Engine engine) throws Exception {
String sourceName = "runtimeArgInput-" + engine;
String sinkName = "runtimeArgOutput-" + engine;
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("action", MockAction.getPlugin("dumy", "val", "ue", "dwayne"))).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", StringValueFilterTransform.getPlugin("name", "samuel"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("action", "source").addConnection("source", "filter").addConnection("filter", "sink").setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("RuntimeArgApp-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
// there should be only two programs - one workflow and one mapreduce/spark
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordDwayne = StructuredRecord.builder(schema).set("name", "dwayne").build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(sourceName);
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordDwayne));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check sink
DataSetManager<Table> sinkManager = getDataset(sinkName);
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
}
Aggregations