use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testKVTableLookup.
@Test
public void testKVTableLookup() throws Exception {
addDatasetInstance(KeyValueTable.class.getName(), "ageTable");
DataSetManager<KeyValueTable> lookupTable = getDataset("ageTable");
lookupTable.get().write("samuel".getBytes(Charsets.UTF_8), "12".getBytes(Charsets.UTF_8));
lookupTable.get().write("bob".getBytes(Charsets.UTF_8), "36".getBytes(Charsets.UTF_8));
lookupTable.get().write("jane".getBytes(Charsets.UTF_8), "25".getBytes(Charsets.UTF_8));
lookupTable.flush();
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin("inputTable"))).addStage(new ETLStage("transform", LookupTransform.getPlugin("person", "age", "ageTable"))).addStage(new ETLStage("sink", MockSink.getPlugin("outputTable"))).addConnection("source", "transform").addConnection("transform", "sink").build();
ApplicationId appId = NamespaceId.DEFAULT.app("testKVTableLookup");
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationManager appManager = deployApplication(appId, appRequest);
// set up input data
Schema inputSchema = Schema.recordOf("person", Schema.Field.of("person", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(inputSchema).set("person", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(inputSchema).set("person", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(inputSchema).set("person", "jane").build();
DataSetManager<Table> inputTable = getDataset("inputTable");
MockSource.writeInput(inputTable, ImmutableList.of(recordSamuel, recordBob, recordJane));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME).start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
Schema schema = Schema.recordOf("person", Schema.Field.of("person", Schema.of(Schema.Type.STRING)), Schema.Field.of("age", Schema.of(Schema.Type.STRING)));
Set<StructuredRecord> expected = new HashSet<>();
expected.add(StructuredRecord.builder(schema).set("person", "samuel").set("age", "12").build());
expected.add(StructuredRecord.builder(schema).set("person", "bob").set("age", "36").build());
expected.add(StructuredRecord.builder(schema).set("person", "jane").set("age", "25").build());
DataSetManager<Table> outputTable = getDataset("outputTable");
Set<StructuredRecord> actual = new HashSet<>(MockSink.readOutput(outputTable));
Assert.assertEquals(expected, actual);
validateMetric(3, appId, "source.records.out");
validateMetric(3, appId, "sink.records.in");
deleteDatasetInstance(NamespaceId.DEFAULT.dataset("inputTable"));
deleteDatasetInstance(NamespaceId.DEFAULT.dataset("outputTable"));
}
use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testNestedCondition.
private void testNestedCondition(Engine engine) throws Exception {
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
/*
* action1----->|
* |
* action2----->|
* |
* V
* source --> condition1 --> sink1
* |
* |------->condition2 --> sink2
* |
* |-------> condition3----> sink3----->action3----->condition4---->action4
* |
* |------>action5
*/
String appName = "NestedCondition-" + engine;
String source = appName + "Source-" + engine;
String sink1 = appName + "Sink1-" + engine;
String sink2 = appName + "Sink2-" + engine;
String sink3 = appName + "Sink3-" + engine;
String actionTable = "actionTable" + appName + "-" + engine;
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(source, schema))).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2))).addStage(new ETLStage("sink3", MockSink.getPlugin(sink3))).addStage(new ETLStage("action1", MockAction.getPlugin(actionTable, "row1", "key1", "val1"))).addStage(new ETLStage("action2", MockAction.getPlugin(actionTable, "row2", "key2", "val2"))).addStage(new ETLStage("action3", MockAction.getPlugin(actionTable, "row3", "key3", "val3"))).addStage(new ETLStage("action4", MockAction.getPlugin(actionTable, "row4", "key4", "val4"))).addStage(new ETLStage("action5", MockAction.getPlugin(actionTable, "row5", "key5", "val5"))).addStage(new ETLStage("condition1", MockCondition.getPlugin("condition1"))).addStage(new ETLStage("condition2", MockCondition.getPlugin("condition2"))).addStage(new ETLStage("condition3", MockCondition.getPlugin("condition3"))).addStage(new ETLStage("condition4", MockCondition.getPlugin("condition4"))).addConnection("action1", "condition1").addConnection("action2", "condition1").addConnection("source", "condition1").addConnection("condition1", "sink1", true).addConnection("condition1", "condition2", false).addConnection("condition2", "sink2", true).addConnection("condition2", "condition3", false).addConnection("condition3", "sink3", true).addConnection("sink3", "action3").addConnection("action3", "condition4").addConnection("condition4", "action4", true).addConnection("condition4", "action5", false).setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app(appName);
ApplicationManager appManager = deployApplication(appId, appRequest);
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
// write records to source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start(ImmutableMap.of("condition3.branch.to.execute", "true", "condition4.branch.to.execute", "true"));
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check sink
DataSetManager<Table> sinkManager = getDataset(sink3);
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
// other sinks should not have any data in them
sinkManager = getDataset(sink1);
Assert.assertTrue(MockSink.readOutput(sinkManager).isEmpty());
sinkManager = getDataset(sink2);
Assert.assertTrue(MockSink.readOutput(sinkManager).isEmpty());
// check actions
DataSetManager<Table> actionTableDS = getDataset(actionTable);
// action1 is executed
Assert.assertEquals("val1", MockAction.readOutput(actionTableDS, "row1", "key1"));
// action2 is executed
Assert.assertEquals("val2", MockAction.readOutput(actionTableDS, "row2", "key2"));
// action3 is executed
Assert.assertEquals("val3", MockAction.readOutput(actionTableDS, "row3", "key3"));
// action4 is executed
Assert.assertEquals("val4", MockAction.readOutput(actionTableDS, "row4", "key4"));
// action5 should not get executed.
Assert.assertNull(MockAction.readOutput(actionTableDS, "row5", "key5"));
}
use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testNoConnectorsForSourceCondition.
@Test
public void testNoConnectorsForSourceCondition() throws Exception {
//
// condition1-->condition2-->source-->sink
//
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin("simpleNoConnectorConditionSource", schema))).addStage(new ETLStage("trueSink", MockSink.getPlugin("trueOutput"))).addStage(new ETLStage("condition1", MockCondition.getPlugin("condition1"))).addStage(new ETLStage("condition2", MockCondition.getPlugin("condition2"))).addConnection("condition1", "condition2", true).addConnection("condition2", "source", true).addConnection("source", "trueSink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("NoConnectorForSourceConditionApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
// write records to source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset("simpleNoConnectorConditionSource"));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start(ImmutableMap.of("condition1.branch.to.execute", "true", "condition2.branch.to.execute", "true"));
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check sink
DataSetManager<Table> sinkManager = getDataset("trueOutput");
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineTest method testFailureToStartIncapableProgram.
@Test
public void testFailureToStartIncapableProgram() throws Exception {
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("source", IncapableSource.getPlugin())).addStage(new ETLStage("sink", IncapableSink.getPlugin())).addConnection("source", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("IncapableApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
// starting the workflow should throw incapable exception as the pipeline contains incapable plugins
workflowManager.start();
// the program should fail as it has incapable plugins
workflowManager.waitForRun(ProgramRunStatus.FAILED, 5, TimeUnit.MINUTES);
}
use of io.cdap.cdap.test.WorkflowManager in project cdap by caskdata.
the class DataPipelineConnectionTest method testUsingConnections.
private void testUsingConnections(Engine engine) throws Exception {
String sourceConnName = "sourceConn " + engine;
String sinkConnName = "sinkConn " + engine;
String srcTableName = "src" + engine;
String sinkTableName = "sink" + engine;
// add some bad json object to the property
addConnection(sourceConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, ImmutableMap.of("tableName", srcTableName, "key1", "${badval}"), new ArtifactSelectorConfig())));
addConnection(sinkConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, ImmutableMap.of("tableName", sinkTableName, "key1", "${badval}"), new ArtifactSelectorConfig())));
// add json string to the runtime arguments to ensure plugin can get instantiated under such condition
Map<String, String> runtimeArguments = Collections.singletonMap("badval", "{\"a\" : 1}");
// source -> sink
ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPluginUsingConnection(sourceConnName))).addStage(new ETLStage("sink", MockSink.getPluginUsingConnection(sinkConnName))).addConnection("source", "sink").build();
Schema schema = Schema.recordOf("x", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord samuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord dwayne = StructuredRecord.builder(schema).set("name", "dwayne").build();
// add the dataset by the test, the source won't create it since table name is macro enabled
addDatasetInstance(NamespaceId.DEFAULT.dataset(srcTableName), Table.class.getName());
DataSetManager<Table> sourceTable = getDataset(srcTableName);
MockSource.writeInput(sourceTable, ImmutableList.of(samuel, dwayne));
// verify preview can run successfully using connections
PreviewManager previewManager = getPreviewManager();
PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, runtimeArguments, 10);
// Start the preview and get the corresponding PreviewRunner.
ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, new AppRequest<>(APP_ARTIFACT, config, previewConfig));
// Wait for the preview status go into COMPLETED.
Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {
@Override
public PreviewStatus.Status call() throws Exception {
PreviewStatus status = previewManager.getStatus(previewId);
return status == null ? null : status.getStatus();
}
}, 5, TimeUnit.MINUTES);
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
ApplicationId appId = NamespaceId.DEFAULT.app("testApp" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
// start the actual pipeline run
WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
manager.startAndWaitForGoodRun(runtimeArguments, ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
DataSetManager<Table> sinkTable = getDataset(sinkTableName);
List<StructuredRecord> outputRecords = MockSink.readOutput(sinkTable);
Assert.assertEquals(ImmutableSet.of(dwayne, samuel), new HashSet<>(outputRecords));
// modify the connection to use a new table name for source and sink
String newSrcTableName = "new" + srcTableName;
String newSinkTableName = "new" + sinkTableName;
addConnection(sourceConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", newSrcTableName), new ArtifactSelectorConfig())));
addConnection(sinkConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", newSinkTableName), new ArtifactSelectorConfig())));
addDatasetInstance(NamespaceId.DEFAULT.dataset(newSrcTableName), Table.class.getName());
StructuredRecord newRecord1 = StructuredRecord.builder(schema).set("name", "john").build();
StructuredRecord newRecord2 = StructuredRecord.builder(schema).set("name", "tom").build();
sourceTable = getDataset(newSrcTableName);
MockSource.writeInput(sourceTable, ImmutableList.of(newRecord1, newRecord2));
// run the program again, it should use the new table to read and write
manager.start(runtimeArguments);
manager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 3, TimeUnit.MINUTES);
sinkTable = getDataset(newSinkTableName);
outputRecords = MockSink.readOutput(sinkTable);
Assert.assertEquals(ImmutableSet.of(newRecord1, newRecord2), new HashSet<>(outputRecords));
deleteConnection(sourceConnName);
deleteConnection(sinkConnName);
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(srcTableName));
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sinkTableName));
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(newSrcTableName));
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(newSinkTableName));
}
Aggregations