Search in sources :

Example 91 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineTest method testWordCount.

public void testWordCount(String pluginType) throws Exception {
    String inputName = "wcInput-" + pluginType;
    String outputName = "wcOutput-" + pluginType;
    // create the pipeline config
    ETLStage source = new ETLStage("wcInput", MockSource.getPlugin(inputName));
    ETLStage sink = new ETLStage("wcOutput", MockSink.getPlugin(outputName));
    Map<String, String> aggProperties = new HashMap<>();
    aggProperties.put("field", "text");
    ETLStage agg = new ETLStage("middle", new ETLPlugin("WordCount", pluginType, aggProperties, null));
    ETLBatchConfig pipelineConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink).addStage(agg).addConnection(source.getName(), agg.getName()).addConnection(agg.getName(), sink.getName()).build();
    // create the pipeline
    ApplicationId pipelineId = NamespaceId.DEFAULT.app("wcTestPipeline-" + pluginType);
    ApplicationManager appManager = deployApplication(pipelineId, new AppRequest<>(APP_ARTIFACT, pipelineConfig));
    // write the input
    Schema inputSchema = Schema.recordOf("text", Schema.Field.of("text", Schema.of(Schema.Type.STRING)));
    DataSetManager<Table> inputManager = getDataset(inputName);
    List<StructuredRecord> inputRecords = new ArrayList<>();
    inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello World").build());
    inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Hal").build());
    inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Sam").build());
    MockSource.writeInput(inputManager, inputRecords);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 4, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(outputName);
    Set<StructuredRecord> outputRecords = new HashSet<>();
    outputRecords.addAll(MockSink.readOutput(outputManager));
    Set<StructuredRecord> expected = new HashSet<>();
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "Hello").set("count", 3L).build());
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "World").set("count", 1L).build());
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "my").set("count", 2L).build());
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "name").set("count", 2L).build());
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "is").set("count", 2L).build());
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "Hal").set("count", 1L).build());
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "Sam").set("count", 1L).build());
    Assert.assertEquals(expected, outputRecords);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Table(io.cdap.cdap.api.dataset.table.Table) HashMap(java.util.HashMap) Schema(io.cdap.cdap.api.data.schema.Schema) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Example 92 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testSingleAction.

@Test
public void testSingleAction() {
    ETLBatchConfig config = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) BatchPipelineSpec(co.cask.cdap.etl.batch.BatchPipelineSpec) Test(org.junit.Test)

Example 93 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testNestedConditionWithCrossConnection.

@Test(expected = IllegalArgumentException.class)
public void testNestedConditionWithCrossConnection() {
    // 
    // anothersource-------------
    // |
    // source--condition1-----t1-----condition2------t11------sink1
    // |                      |                     |
    // |                      |-----------t12--------
    // t2---------sink2
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("anothersource", MOCK_SOURCE)).addStage(new ETLStage("condition1", MOCK_CONDITION)).addStage(new ETLStage("condition2", MOCK_CONDITION)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t11", MOCK_TRANSFORM_A)).addStage(new ETLStage("t12", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_B)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addConnection("source", "condition1").addConnection("condition1", "t1", true).addConnection("t1", "condition2").addConnection("condition2", "t11", false).addConnection("condition2", "t12", true).addConnection("condition1", "t2", false).addConnection("t11", "sink1").addConnection("t12", "sink1").addConnection("anothersource", "sink1").addConnection("t2", "sink2").build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Example 94 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testConflictingPipelineProperties.

@Test(expected = IllegalArgumentException.class)
public void testConflictingPipelineProperties() {
    // populate some mock plugins.
    MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
    Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action1", MockPlugin.builder().putPipelineProperty("prop1", "val1").build(), artifactIds);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action2", MockPlugin.builder().putPipelineProperty("prop1", "val2").build(), artifactIds);
    Map<String, String> empty = ImmutableMap.of();
    ETLBatchConfig config = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("a1", new ETLPlugin("action1", Action.PLUGIN_TYPE, empty))).addStage(new ETLStage("a2", new ETLPlugin("action2", Action.PLUGIN_TYPE, empty))).addConnection("a1", "a2").setEngine(Engine.MAPREDUCE).build();
    new BatchPipelineSpecGenerator<>(pluginConfigurer, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), Engine.MAPREDUCE).generateSpec(config);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ArtifactId(co.cask.cdap.api.artifact.ArtifactId) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) ETLPlugin(co.cask.cdap.etl.proto.v2.ETLPlugin) MockPluginConfigurer(co.cask.cdap.etl.common.MockPluginConfigurer) Test(org.junit.Test)

Example 95 with ETLBatchConfig

use of io.cdap.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testCycle.

@Test(expected = IllegalStateException.class)
public void testCycle() {
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink", MOCK_SINK)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_A)).addConnection("source", "t1").addConnection("t1", "t2").addConnection("t2", "t1").addConnection("t2", "sink").build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Aggregations

ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)121 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)121 Test (org.junit.Test)117 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)79 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)76 ApplicationManager (io.cdap.cdap.test.ApplicationManager)74 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)72 WorkflowManager (io.cdap.cdap.test.WorkflowManager)72 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)70 Table (io.cdap.cdap.api.dataset.table.Table)68 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)64 Schema (io.cdap.cdap.api.data.schema.Schema)58 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)46 AppRequest (co.cask.cdap.proto.artifact.AppRequest)43 ApplicationId (co.cask.cdap.proto.id.ApplicationId)43 ApplicationManager (co.cask.cdap.test.ApplicationManager)40 WorkflowManager (co.cask.cdap.test.WorkflowManager)39 Table (co.cask.cdap.api.dataset.table.Table)36 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)33 ArrayList (java.util.ArrayList)33