use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class PipelineSpecGenerator method configureStage.
/**
* Configures a stage and returns the spec for it.
*
* @param stageConnections the user provided configuration for the stage along with its connections
* @param pluginConfigurer configurer used to configure the stage
* @return the spec for the stage
*/
private ConfiguredStage configureStage(StageConnections stageConnections, DefaultPipelineConfigurer pluginConfigurer) {
ETLStage stage = stageConnections.getStage();
String stageName = stage.getName();
ETLPlugin stagePlugin = stage.getPlugin();
if (!Strings.isNullOrEmpty(stage.getErrorDatasetName())) {
configurer.createDataset(stage.getErrorDatasetName(), errorDatasetClass, errorDatasetProperties);
}
PluginSpec pluginSpec = configurePlugin(stageName, stagePlugin, pluginConfigurer);
Schema outputSchema = pluginConfigurer.getStageConfigurer().getOutputSchema();
Map<String, Schema> inputSchemas = pluginConfigurer.getStageConfigurer().getInputSchemas();
StageSpec stageSpec = StageSpec.builder(stageName, pluginSpec).setErrorDatasetName(stage.getErrorDatasetName()).addInputSchemas(inputSchemas).setOutputSchema(outputSchema).setErrorSchema(pluginConfigurer.getStageConfigurer().getErrorSchema()).addInputs(stageConnections.getInputs()).addOutputs(stageConnections.getOutputs()).build();
return new ConfiguredStage(stageSpec, pluginConfigurer.getPipelineProperties());
}
use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class ETLWorkerTest method testDAG.
@Test
public void testDAG() throws Exception {
Schema schema = Schema.recordOf("testRecord", Schema.Field.of("x", Schema.of(Schema.Type.INT)));
StructuredRecord record1 = StructuredRecord.builder(schema).set("x", 1).build();
StructuredRecord record2 = StructuredRecord.builder(schema).set("x", 2).build();
StructuredRecord record3 = StructuredRecord.builder(schema).set("x", 3).build();
List<StructuredRecord> input = ImmutableList.of(record1, record2, record3);
/*
* ----- value filter ------- sink1
* |
* source --------- double --------
* | |---- sink2
* ----- identity ------
*/
File sink1Out = TMP_FOLDER.newFolder();
File sink2Out = TMP_FOLDER.newFolder();
ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(input))).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1Out))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2Out))).addStage(new ETLStage("valueFilter", IntValueFilterTransform.getPlugin("x", 2))).addStage(new ETLStage("double", DoubleTransform.getPlugin())).addStage(new ETLStage("identity", IdentityTransform.getPlugin())).addConnection("source", "valueFilter").addConnection("source", "double").addConnection("source", "identity").addConnection("valueFilter", "sink1").addConnection("double", "sink2").addConnection("identity", "sink2").build();
ApplicationId appId = NamespaceId.DEFAULT.app("dagTest");
AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
ApplicationManager appManager = deployApplication(appId, appRequest);
Assert.assertNotNull(appManager);
WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
workerManager.start();
workerManager.waitForStatus(true, 10, 1);
try {
List<StructuredRecord> sink1output = MockSink.getRecords(sink1Out, 0, 10, TimeUnit.SECONDS);
List<StructuredRecord> sink1expected = ImmutableList.of(record1, record3);
Assert.assertEquals(sink1expected, sink1output);
List<StructuredRecord> sink2output = MockSink.getRecords(sink2Out, 0, 10, TimeUnit.SECONDS);
Assert.assertEquals(9, sink2output.size());
} finally {
stopWorker(workerManager);
}
validateMetric(3, appId, "source.records.out");
validateMetric(3, appId, "valueFilter.records.in");
validateMetric(2, appId, "valueFilter.records.out");
validateMetric(3, appId, "double.records.in");
validateMetric(6, appId, "double.records.out");
validateMetric(3, appId, "identity.records.in");
validateMetric(3, appId, "identity.records.out");
validateMetric(2, appId, "sink1.records.in");
validateMetric(9, appId, "sink2.records.in");
}
use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testSingleAction.
@Test
public void testSingleAction() {
ETLBatchConfig config = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).build();
PipelineSpec actual = specGenerator.generateSpec(config);
Map<String, String> emptyMap = ImmutableMap.of();
PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).build();
Assert.assertEquals(expected, actual);
}
use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testNestedConditionWithCrossConnection.
@Test(expected = IllegalArgumentException.class)
public void testNestedConditionWithCrossConnection() {
//
// anothersource-------------
// |
// source--condition1-----t1-----condition2------t11------sink1
// | | |
// | |-----------t12--------
// t2---------sink2
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("anothersource", MOCK_SOURCE)).addStage(new ETLStage("condition1", MOCK_CONDITION)).addStage(new ETLStage("condition2", MOCK_CONDITION)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t11", MOCK_TRANSFORM_A)).addStage(new ETLStage("t12", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_B)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addConnection("source", "condition1").addConnection("condition1", "t1", true).addConnection("t1", "condition2").addConnection("condition2", "t11", false).addConnection("condition2", "t12", true).addConnection("condition1", "t2", false).addConnection("t11", "sink1").addConnection("t12", "sink1").addConnection("anothersource", "sink1").addConnection("t2", "sink2").build();
specGenerator.generateSpec(etlConfig);
}
use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testConflictingPipelineProperties.
@Test(expected = IllegalArgumentException.class)
public void testConflictingPipelineProperties() {
// populate some mock plugins.
MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action1", MockPlugin.builder().putPipelineProperty("prop1", "val1").build(), artifactIds);
pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action2", MockPlugin.builder().putPipelineProperty("prop1", "val2").build(), artifactIds);
Map<String, String> empty = ImmutableMap.of();
ETLBatchConfig config = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("a1", new ETLPlugin("action1", Action.PLUGIN_TYPE, empty))).addStage(new ETLStage("a2", new ETLPlugin("action2", Action.PLUGIN_TYPE, empty))).addConnection("a1", "a2").setEngine(Engine.MAPREDUCE).build();
new BatchPipelineSpecGenerator<>(pluginConfigurer, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), Engine.MAPREDUCE).generateSpec(config);
}
Aggregations