use of co.cask.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method testPostAction.
@Test
public void testPostAction() throws Exception {
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin("actionInput"))).addStage(new ETLStage("sink", MockSink.getPlugin("actionOutput"))).addPostAction(new ETLStage("tokenWriter", NodeStatesAction.getPlugin("tokenTable"))).addConnection("source", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("ActionApp");
ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
StructuredRecord recordJane = StructuredRecord.builder(schema).set("name", "jane").build();
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset("actionInput"));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob, recordJane));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
DataSetManager<Table> tokenTableManager = getDataset(NamespaceId.DEFAULT.dataset("tokenTable"));
Table tokenTable = tokenTableManager.get();
NodeStatus status = NodeStatus.valueOf(Bytes.toString(tokenTable.get(Bytes.toBytes("phase-1"), Bytes.toBytes("status"))));
Assert.assertEquals(NodeStatus.COMPLETED, status);
}
use of co.cask.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class PreviewDataPipelineTest method testDataPipelinePreviewRun.
private void testDataPipelinePreviewRun(Engine engine) throws Exception {
PreviewManager previewManager = getPreviewManager();
String sourceTableName = "singleInput";
String sinkTableName = "singleOutput";
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
/*
* source --> transform -> sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage("transform", IdentityTransform.getPlugin())).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setEngine(engine).setNumOfRecordsPreview(100).build();
// Construct the preview config with the program name and program type
PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
// Create the table for the mock source
addDatasetInstance(Table.class.getName(), sourceTableName, DatasetProperties.of(ImmutableMap.of("schema", schema.toString())));
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig, previewConfig);
// Start the preview and get the corresponding PreviewRunner.
ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
final PreviewRunner previewRunner = previewManager.getRunner(previewId);
// Wait for the preview status go into COMPLETED.
Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {
@Override
public PreviewStatus.Status call() throws Exception {
PreviewStatus status = previewRunner.getStatus();
return status == null ? null : status.getStatus();
}
}, 5, TimeUnit.MINUTES);
// Get the data for stage "source" in the PreviewStore, should contain two records.
checkPreviewStore(previewRunner, "source", 2);
// Get the data for stage "transform" in the PreviewStore, should contain two records.
checkPreviewStore(previewRunner, "transform", 2);
// Get the data for stage "sink" in the PreviewStore, should contain two records.
checkPreviewStore(previewRunner, "sink", 2);
// Validate the metrics for preview
validateMetric(2, previewId, "source.records.in", previewRunner);
validateMetric(2, previewId, "source.records.out", previewRunner);
validateMetric(2, previewId, "transform.records.in", previewRunner);
validateMetric(2, previewId, "transform.records.out", previewRunner);
validateMetric(2, previewId, "sink.records.out", previewRunner);
validateMetric(2, previewId, "sink.records.in", previewRunner);
// Check the sink table is not created in the real space.
DataSetManager<Table> sinkManager = getDataset(sinkTableName);
Assert.assertNull(sinkManager.get());
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sourceTableName));
}
use of co.cask.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class PreviewDataPipelineTest method testPreviewFailedRun.
private void testPreviewFailedRun(Engine engine) throws Exception {
PreviewManager previewManager = getPreviewManager();
String sourceTableName = "singleInput";
String sinkTableName = "singleOutput";
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
/*
* source --> transform -> sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage("transform", ExceptionTransform.getPlugin("name", "samuel"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setNumOfRecordsPreview(100).setEngine(engine).build();
// Construct the preview config with the program name and program type.
PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
// Create the table for the mock source
addDatasetInstance(Table.class.getName(), sourceTableName, DatasetProperties.of(ImmutableMap.of("schema", schema.toString())));
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
MockSource.writeInput(inputManager, "1", recordSamuel);
MockSource.writeInput(inputManager, "2", recordBob);
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig, previewConfig);
// Start the preview and get the corresponding PreviewRunner.
ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
final PreviewRunner previewRunner = previewManager.getRunner(previewId);
// Wait for the preview status go into FAILED.
Tasks.waitFor(PreviewStatus.Status.RUN_FAILED, new Callable<PreviewStatus.Status>() {
@Override
public PreviewStatus.Status call() throws Exception {
PreviewStatus status = previewRunner.getStatus();
return status == null ? null : status.getStatus();
}
}, 5, TimeUnit.MINUTES);
// Get the data for stage "source" in the PreviewStore.
checkPreviewStore(previewRunner, "source", 2);
// Get the data for stage "transform" in the PreviewStore, should contain one less record than source.
checkPreviewStore(previewRunner, "transform", 1);
// Get the data for stage "sink" in the PreviewStore, should contain one less record than source.
checkPreviewStore(previewRunner, "sink", 1);
// Validate the metrics for preview
validateMetric(2, previewId, "source.records.in", previewRunner);
validateMetric(2, previewId, "source.records.out", previewRunner);
validateMetric(2, previewId, "transform.records.in", previewRunner);
validateMetric(1, previewId, "transform.records.out", previewRunner);
validateMetric(1, previewId, "sink.records.out", previewRunner);
validateMetric(1, previewId, "sink.records.in", previewRunner);
// Check the sink table is not created in the real space.
DataSetManager<Table> sinkManager = getDataset(sinkTableName);
Assert.assertNull(sinkManager.get());
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sourceTableName));
}
use of co.cask.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method testSimpleMultiSource.
private void testSimpleMultiSource(Engine engine) throws Exception {
/*
* source1 --|
* |--> sleep --> sink
* source2 --|
*/
String source1Name = String.format("simpleMSInput1-%s", engine);
String source2Name = String.format("simpleMSInput2-%s", engine);
String sinkName = String.format("simpleMSOutput-%s", engine);
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source1", MockSource.getPlugin(source1Name))).addStage(new ETLStage("source2", MockSource.getPlugin(source2Name))).addStage(new ETLStage("sleep", SleepTransform.getPlugin(2L))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source1", "sleep").addConnection("source2", "sleep").addConnection("sleep", "sink").setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("SimpleMultiSourceApp-" + engine);
ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
// there should be only two programs - one workflow and one mapreduce/spark
Assert.assertEquals(2, appManager.getInfo().getPrograms().size());
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
StructuredRecord recordVincent = StructuredRecord.builder(schema).set("name", "vincent").build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source1Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordVincent));
inputManager = getDataset(NamespaceId.DEFAULT.dataset(source2Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordBob));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check sink
DataSetManager<Table> sinkManager = getDataset(sinkName);
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob, recordVincent);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
validateMetric(2, appId, "source1.records.out");
validateMetric(1, appId, "source2.records.out");
validateMetric(3, appId, "sleep.records.in");
validateMetric(3, appId, "sleep.records.out");
validateMetric(3, appId, "sink.records.in");
Assert.assertTrue(getMetric(appId, "sleep." + co.cask.cdap.etl.common.Constants.Metrics.TOTAL_TIME) > 0L);
}
use of co.cask.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class DataPipelineTest method testRuntimeArgs.
private void testRuntimeArgs(Engine engine) throws Exception {
String sourceName = "runtimeArgInput-" + engine;
String sinkName = "runtimeArgOutput-" + engine;
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("action", MockAction.getPlugin("dumy", "val", "ue", "dwayne"))).addStage(new ETLStage("source", MockSource.getPlugin(sourceName))).addStage(new ETLStage("filter", StringValueFilterTransform.getPlugin("name", "samuel"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("action", "source").addConnection("source", "filter").addConnection("filter", "sink").setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("RuntimeArgApp-" + engine);
ApplicationManager appManager = deployApplication(appId.toId(), appRequest);
// there should be only two programs - one workflow and one mapreduce/spark
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordDwayne = StructuredRecord.builder(schema).set("name", "dwayne").build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(sourceName);
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordDwayne));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check sink
DataSetManager<Table> sinkManager = getDataset(sinkName);
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
}
Aggregations