use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class PipelineTest method testWordCount.
public void testWordCount(String pluginType) throws Exception {
String inputName = "wcInput-" + pluginType;
String outputName = "wcOutput-" + pluginType;
// create the pipeline config
ETLStage source = new ETLStage("wcInput", MockSource.getPlugin(inputName));
ETLStage sink = new ETLStage("wcOutput", MockSink.getPlugin(outputName));
Map<String, String> aggProperties = new HashMap<>();
aggProperties.put("field", "text");
ETLStage agg = new ETLStage("middle", new ETLPlugin("WordCount", pluginType, aggProperties, null));
ETLBatchConfig pipelineConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink).addStage(agg).addConnection(source.getName(), agg.getName()).addConnection(agg.getName(), sink.getName()).build();
// create the pipeline
ApplicationId pipelineId = NamespaceId.DEFAULT.app("wcTestPipeline-" + pluginType);
ApplicationManager appManager = deployApplication(pipelineId, new AppRequest<>(APP_ARTIFACT, pipelineConfig));
// write the input
Schema inputSchema = Schema.recordOf("text", Schema.Field.of("text", Schema.of(Schema.Type.STRING)));
DataSetManager<Table> inputManager = getDataset(inputName);
List<StructuredRecord> inputRecords = new ArrayList<>();
inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello World").build());
inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Hal").build());
inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Sam").build());
MockSource.writeInput(inputManager, inputRecords);
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 4, TimeUnit.MINUTES);
DataSetManager<Table> outputManager = getDataset(outputName);
Set<StructuredRecord> outputRecords = new HashSet<>();
outputRecords.addAll(MockSink.readOutput(outputManager));
Set<StructuredRecord> expected = new HashSet<>();
expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "Hello").set("count", 3L).build());
expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "World").set("count", 1L).build());
expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "my").set("count", 2L).build());
expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "name").set("count", 2L).build());
expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "is").set("count", 2L).build());
expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "Hal").set("count", 1L).build());
expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "Sam").set("count", 1L).build());
Assert.assertEquals(expected, outputRecords);
}
use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testSingleAction.
@Test
public void testSingleAction() {
ETLBatchConfig config = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).build();
PipelineSpec actual = specGenerator.generateSpec(config);
Map<String, String> emptyMap = ImmutableMap.of();
PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).build();
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testNestedConditionWithCrossConnection.
@Test(expected = IllegalArgumentException.class)
public void testNestedConditionWithCrossConnection() {
//
// anothersource-------------
// |
// source--condition1-----t1-----condition2------t11------sink1
// | | |
// | |-----------t12--------
// t2---------sink2
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("anothersource", MOCK_SOURCE)).addStage(new ETLStage("condition1", MOCK_CONDITION)).addStage(new ETLStage("condition2", MOCK_CONDITION)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t11", MOCK_TRANSFORM_A)).addStage(new ETLStage("t12", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_B)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addConnection("source", "condition1").addConnection("condition1", "t1", true).addConnection("t1", "condition2").addConnection("condition2", "t11", false).addConnection("condition2", "t12", true).addConnection("condition1", "t2", false).addConnection("t11", "sink1").addConnection("t12", "sink1").addConnection("anothersource", "sink1").addConnection("t2", "sink2").build();
specGenerator.generateSpec(etlConfig);
}
use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testConflictingPipelineProperties.
@Test(expected = IllegalArgumentException.class)
public void testConflictingPipelineProperties() {
// populate some mock plugins.
MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action1", MockPlugin.builder().putPipelineProperty("prop1", "val1").build(), artifactIds);
pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action2", MockPlugin.builder().putPipelineProperty("prop1", "val2").build(), artifactIds);
Map<String, String> empty = ImmutableMap.of();
ETLBatchConfig config = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("a1", new ETLPlugin("action1", Action.PLUGIN_TYPE, empty))).addStage(new ETLStage("a2", new ETLPlugin("action2", Action.PLUGIN_TYPE, empty))).addConnection("a1", "a2").setEngine(Engine.MAPREDUCE).build();
new BatchPipelineSpecGenerator<>(pluginConfigurer, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), Engine.MAPREDUCE).generateSpec(config);
}
use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testCycle.
@Test(expected = IllegalStateException.class)
public void testCycle() {
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink", MOCK_SINK)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_A)).addConnection("source", "t1").addConnection("t1", "t2").addConnection("t2", "t1").addConnection("t2", "sink").build();
specGenerator.generateSpec(etlConfig);
}
Aggregations