use of io.cdap.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class DataPipelineTest method testSimpleMultiSource.
private void testSimpleMultiSource(Engine engine) throws Exception {
/*
* source1 --|
* |--> sleep --> sink
* source2 --|
*/
String source1Name = String.format("simpleMSInput1-%s", engine);
String source2Name = String.format("simpleMSInput2-%s", engine);
String sinkName = String.format("simpleMSOutput-%s", engine);
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source1", MockSource.getPlugin(source1Name))).addStage(new ETLStage("source2", MockSource.getPlugin(source2Name))).addStage(new ETLStage("sleep", SleepTransform.getPlugin(2L))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkName))).addConnection("source1", "sleep").addConnection("source2", "sleep").addConnection("sleep", "sink").setEngine(engine).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("SimpleMultiSourceApp-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
// there should be only two programs - one workflow and one mapreduce/spark
Assert.assertEquals(2, appManager.getInfo().getPrograms().size());
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
StructuredRecord recordVincent = StructuredRecord.builder(schema).set("name", "vincent").build();
// write one record to each source
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(source1Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordVincent));
inputManager = getDataset(NamespaceId.DEFAULT.dataset(source2Name));
MockSource.writeInput(inputManager, ImmutableList.of(recordBob));
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// check sink
DataSetManager<Table> sinkManager = getDataset(sinkName);
Set<StructuredRecord> expected = ImmutableSet.of(recordSamuel, recordBob, recordVincent);
Set<StructuredRecord> actual = Sets.newHashSet(MockSink.readOutput(sinkManager));
Assert.assertEquals(expected, actual);
validateMetric(2, appId, "source1.records.out");
validateMetric(1, appId, "source2.records.out");
validateMetric(3, appId, "sleep.records.in");
validateMetric(3, appId, "sleep.records.out");
validateMetric(3, appId, "sink.records.in");
Assert.assertTrue(getMetric(appId, "sleep." + co.cask.cdap.etl.common.Constants.Metrics.TOTAL_TIME) > 0L);
try (CloseableIterator<Message> messages = getMessagingContext().getMessageFetcher().fetch(appId.getNamespace(), "sleepTopic", 10, null)) {
Assert.assertTrue(messages.hasNext());
Assert.assertEquals("2", messages.next().getPayloadAsString());
Assert.assertFalse(messages.hasNext());
}
getMessagingAdmin(appId.getNamespace()).deleteTopic("sleepTopic");
}
use of io.cdap.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class DataPipelineTest method testSecureStorePipeline.
/**
* Tests the secure storage macro function in a pipelines by creating datasets from the secure store data.
*/
private void testSecureStorePipeline(Engine engine, String prefix) throws Exception {
/*
* Trivial pipeline from batch source to batch sink.
*
* source --------- sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockRuntimeDatasetSource.getPlugin("input", "${secure(" + prefix + "source)}"))).addStage(new ETLStage("sink", MockRuntimeDatasetSink.getPlugin("output", "${secure(" + prefix + "sink)}"))).addConnection("source", "sink").setEngine(engine).build();
// place dataset names into secure storage
getSecureStoreManager().putSecureData("default", prefix + "source", prefix + "MockSecureSourceDataset", "secure source dataset name", new HashMap<String, String>());
getSecureStoreManager().putSecureData("default", prefix + "sink", prefix + "MockSecureSinkDataset", "secure dataset name", new HashMap<String, String>());
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("App-" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
// make sure the datasets don't exist beforehand
Assert.assertNull(getDataset(prefix + "MockSecureSourceDataset").get());
Assert.assertNull(getDataset(prefix + "MockSecureSinkDataset").get());
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// now the datasets should exist
Assert.assertNotNull(getDataset(prefix + "MockSecureSourceDataset").get());
Assert.assertNotNull(getDataset(prefix + "MockSecureSinkDataset").get());
}
use of io.cdap.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class DataPipelineTest method testMacrosMapReducePipeline.
@Test
public void testMacrosMapReducePipeline() throws Exception {
/*
* Trivial MapReduce pipeline from batch source to batch sink.
*
* source --------- sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockRuntimeDatasetSource.getPlugin("mrinput", "${runtime${source}}"))).addStage(new ETLStage("sink", MockRuntimeDatasetSink.getPlugin("mroutput", "${runtime}${sink}"))).addConnection("source", "sink").build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("MRApp");
ApplicationManager appManager = deployApplication(appId, appRequest);
// set runtime arguments for macro substitution
Map<String, String> runtimeArguments = ImmutableMap.of("runtime", "mockRuntime", "sink", "MRSinkDataset", "source", "Source", "runtimeSource", "mockRuntimeMRSourceDataset");
// make sure the datasets don't exist beforehand
Assert.assertNull(getDataset("mockRuntimeMRSourceDataset").get());
Assert.assertNull(getDataset("mockRuntimeMRSinkDataset").get());
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.setRuntimeArgs(runtimeArguments);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
// now the datasets should exist
Assert.assertNotNull(getDataset("mockRuntimeMRSourceDataset").get());
Assert.assertNotNull(getDataset("mockRuntimeMRSinkDataset").get());
}
use of io.cdap.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class PreviewDataPipelineTest method testPreviewFailedRun.
private void testPreviewFailedRun(Engine engine) throws Exception {
PreviewManager previewManager = getPreviewManager();
String sourceTableName = "singleInput";
String sinkTableName = "singleOutput";
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
/*
* source --> transform -> sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage("transform", ExceptionTransform.getPlugin("name", "samuel"))).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setNumOfRecordsPreview(100).setEngine(engine).build();
// Construct the preview config with the program name and program type.
PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
// Create the table for the mock source
addDatasetInstance(Table.class.getName(), sourceTableName, DatasetProperties.of(ImmutableMap.of("schema", schema.toString())));
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
MockSource.writeInput(inputManager, "1", recordSamuel);
MockSource.writeInput(inputManager, "2", recordBob);
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig, previewConfig);
// Start the preview and get the corresponding PreviewRunner.
ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
final PreviewRunner previewRunner = previewManager.getRunner(previewId);
// Wait for the preview status go into FAILED.
Tasks.waitFor(PreviewStatus.Status.RUN_FAILED, new Callable<PreviewStatus.Status>() {
@Override
public PreviewStatus.Status call() throws Exception {
PreviewStatus status = previewRunner.getStatus();
return status == null ? null : status.getStatus();
}
}, 5, TimeUnit.MINUTES);
// Get the data for stage "source" in the PreviewStore.
checkPreviewStore(previewRunner, "source", 2);
// Get the data for stage "transform" in the PreviewStore, should contain one less record than source.
checkPreviewStore(previewRunner, "transform", 1);
// Get the data for stage "sink" in the PreviewStore, should contain one less record than source.
checkPreviewStore(previewRunner, "sink", 1);
// Validate the metrics for preview
validateMetric(2, previewId, "source.records.in", previewRunner);
validateMetric(2, previewId, "source.records.out", previewRunner);
validateMetric(2, previewId, "transform.records.in", previewRunner);
validateMetric(1, previewId, "transform.records.out", previewRunner);
validateMetric(1, previewId, "sink.records.out", previewRunner);
validateMetric(1, previewId, "sink.records.in", previewRunner);
// Check the sink table is not created in the real space.
DataSetManager<Table> sinkManager = getDataset(sinkTableName);
Assert.assertNull(sinkManager.get());
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sourceTableName));
}
use of io.cdap.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class PreviewDataPipelineTest method testDataPipelinePreviewRun.
private void testDataPipelinePreviewRun(Engine engine) throws Exception {
PreviewManager previewManager = getPreviewManager();
String sourceTableName = "singleInput";
String sinkTableName = "singleOutput";
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
/*
* source --> transform -> sink
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MockSource.getPlugin(sourceTableName, schema))).addStage(new ETLStage("transform", IdentityTransform.getPlugin())).addStage(new ETLStage("sink", MockSink.getPlugin(sinkTableName))).addConnection("source", "transform").addConnection("transform", "sink").setEngine(engine).setNumOfRecordsPreview(100).build();
// Construct the preview config with the program name and program type
PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, Collections.<String, String>emptyMap(), 10);
// Create the table for the mock source
addDatasetInstance(Table.class.getName(), sourceTableName, DatasetProperties.of(ImmutableMap.of("schema", schema.toString())));
DataSetManager<Table> inputManager = getDataset(NamespaceId.DEFAULT.dataset(sourceTableName));
StructuredRecord recordSamuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord recordBob = StructuredRecord.builder(schema).set("name", "bob").build();
MockSource.writeInput(inputManager, ImmutableList.of(recordSamuel, recordBob));
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT_RANGE, etlConfig, previewConfig);
// Start the preview and get the corresponding PreviewRunner.
ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, appRequest);
final PreviewRunner previewRunner = previewManager.getRunner(previewId);
// Wait for the preview status go into COMPLETED.
Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {
@Override
public PreviewStatus.Status call() throws Exception {
PreviewStatus status = previewRunner.getStatus();
return status == null ? null : status.getStatus();
}
}, 5, TimeUnit.MINUTES);
// Get the data for stage "source" in the PreviewStore, should contain two records.
checkPreviewStore(previewRunner, "source", 2);
// Get the data for stage "transform" in the PreviewStore, should contain two records.
checkPreviewStore(previewRunner, "transform", 2);
// Get the data for stage "sink" in the PreviewStore, should contain two records.
checkPreviewStore(previewRunner, "sink", 2);
// Validate the metrics for preview
validateMetric(2, previewId, "source.records.in", previewRunner);
validateMetric(2, previewId, "source.records.out", previewRunner);
validateMetric(2, previewId, "transform.records.in", previewRunner);
validateMetric(2, previewId, "transform.records.out", previewRunner);
validateMetric(2, previewId, "sink.records.out", previewRunner);
validateMetric(2, previewId, "sink.records.in", previewRunner);
// Check the sink table is not created in the real space.
DataSetManager<Table> sinkManager = getDataset(sinkTableName);
Assert.assertNull(sinkManager.get());
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sourceTableName));
Assert.assertNotNull(previewRunner.getRunRecord());
}
Aggregations