Search in sources :

Example 21 with ETLBatchConfig

use of co.cask.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testBadErrorTransformInput.

@Test(expected = IllegalArgumentException.class)
public void testBadErrorTransformInput() {
    /*
     * source --> joiner --> error --> sink
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("joiner", MOCK_JOINER)).addStage(new ETLStage("error", MOCK_ERROR)).addStage(new ETLStage("sink", MOCK_SINK)).addConnection("source", "joiner").addConnection("joiner", "error").addConnection("error", "sink").build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Example 22 with ETLBatchConfig

use of co.cask.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineTest method testWordCount.

public void testWordCount(String pluginType) throws Exception {
    String inputName = "wcInput-" + pluginType;
    String outputName = "wcOutput-" + pluginType;
    // create the pipeline config
    ETLStage source = new ETLStage("wcInput", MockSource.getPlugin(inputName));
    ETLStage sink = new ETLStage("wcOutput", MockSink.getPlugin(outputName));
    Map<String, String> aggProperties = new HashMap<>();
    aggProperties.put("field", "text");
    ETLStage agg = new ETLStage("middle", new ETLPlugin("WordCount", pluginType, aggProperties, null));
    ETLBatchConfig pipelineConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink).addStage(agg).addConnection(source.getName(), agg.getName()).addConnection(agg.getName(), sink.getName()).build();
    // create the pipeline
    ApplicationId pipelineId = NamespaceId.DEFAULT.app("wcTestPipeline-" + pluginType);
    ApplicationManager appManager = deployApplication(pipelineId, new AppRequest<>(APP_ARTIFACT, pipelineConfig));
    // write the input
    Schema inputSchema = Schema.recordOf("text", Schema.Field.of("text", Schema.of(Schema.Type.STRING)));
    DataSetManager<Table> inputManager = getDataset(inputName);
    List<StructuredRecord> inputRecords = new ArrayList<>();
    inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello World").build());
    inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Hal").build());
    inputRecords.add(StructuredRecord.builder(inputSchema).set("text", "Hello my name is Sam").build());
    MockSource.writeInput(inputManager, inputRecords);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForFinish(4, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(outputName);
    Set<StructuredRecord> outputRecords = new HashSet<>();
    outputRecords.addAll(MockSink.readOutput(outputManager));
    Set<StructuredRecord> expected = new HashSet<>();
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "Hello").set("count", 3L).build());
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "World").set("count", 1L).build());
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "my").set("count", 2L).build());
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "name").set("count", 2L).build());
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "is").set("count", 2L).build());
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "Hal").set("count", 1L).build());
    expected.add(StructuredRecord.builder(WordCountAggregator.OUTPUT_SCHEMA).set("word", "Sam").set("count", 1L).build());
    Assert.assertEquals(expected, outputRecords);
}
Also used : ApplicationManager(co.cask.cdap.test.ApplicationManager) Table(co.cask.cdap.api.dataset.table.Table) KeyValueTable(co.cask.cdap.api.dataset.lib.KeyValueTable) HashMap(java.util.HashMap) Schema(co.cask.cdap.api.data.schema.Schema) WorkflowManager(co.cask.cdap.test.WorkflowManager) ArrayList(java.util.ArrayList) ETLPlugin(co.cask.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) ApplicationId(co.cask.cdap.proto.id.ApplicationId) HashSet(java.util.HashSet)

Example 23 with ETLBatchConfig

use of co.cask.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testDifferentInputSchemasForAction.

@Test
public void testDifferentInputSchemasForAction() {
    /*
     *           ---- transformA ---- sinkA ----
     *           |                             |
     * source ---                              |--- action
     *           |                             |
     *           ---- transformB ---- sinkB ----
     *
     * sink gets schema A and schema B as input, should fail
     */
    ETLBatchConfig config = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("tA", MOCK_TRANSFORM_A)).addStage(new ETLStage("tB", MOCK_TRANSFORM_B)).addStage(new ETLStage("sinkA", MOCK_SINK)).addStage(new ETLStage("sinkB", MOCK_SINK)).addStage(new ETLStage("action", MOCK_ACTION)).addConnection("source", "tA").addConnection("source", "tB").addConnection("tA", "sinkA").addConnection("tB", "sinkB").addConnection("sinkA", "action").addConnection("sinkB", "action").build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).setOutputSchema(SCHEMA_A).addOutputs("tA", "tB").build()).addStage(StageSpec.builder("sinkA", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("tA", SCHEMA_A).addInputs("tA").addOutputs("action").setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("sinkB", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("tB", SCHEMA_B).addInputs("tB").addOutputs("action").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("tA", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).setOutputSchema(SCHEMA_A).addInputs("source").addOutputs("sinkA").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("tB", new PluginSpec(Transform.PLUGIN_TYPE, "mockB", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).setOutputSchema(SCHEMA_B).addInputs("source").addOutputs("sinkB").setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).addInputs("sinkA", "sinkB").build()).addConnections(config.getConnections()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) BatchPipelineSpec(co.cask.cdap.etl.batch.BatchPipelineSpec) Test(org.junit.Test)

Example 24 with ETLBatchConfig

use of co.cask.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testUniqueStageNames.

@Test(expected = IllegalArgumentException.class)
public void testUniqueStageNames() {
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t1", MOCK_TRANSFORM_B)).addStage(new ETLStage("sink", MOCK_SINK)).addConnection("source", "sink").build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Example 25 with ETLBatchConfig

use of co.cask.cdap.etl.proto.v2.ETLBatchConfig in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testConnectionIntoSource.

@Test(expected = IllegalArgumentException.class)
public void testConnectionIntoSource() {
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink", MOCK_SINK)).addStage(new ETLStage("transform", MOCK_TRANSFORM_A)).addConnection("source", "sink").addConnection("transform", "source").build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Aggregations

ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)47 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)46 ApplicationId (co.cask.cdap.proto.id.ApplicationId)32 ApplicationManager (co.cask.cdap.test.ApplicationManager)30 WorkflowManager (co.cask.cdap.test.WorkflowManager)30 AppRequest (co.cask.cdap.proto.artifact.AppRequest)27 Test (org.junit.Test)27 Table (co.cask.cdap.api.dataset.table.Table)26 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)25 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)24 Schema (co.cask.cdap.api.data.schema.Schema)22 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)9 ArrayList (java.util.ArrayList)7 HashMap (java.util.HashMap)7 HashSet (java.util.HashSet)6 BatchPipelineSpec (co.cask.cdap.etl.batch.BatchPipelineSpec)4 Resources (co.cask.cdap.api.Resources)2 FileSet (co.cask.cdap.api.dataset.lib.FileSet)2 PreviewManager (co.cask.cdap.app.preview.PreviewManager)2 PreviewRunner (co.cask.cdap.app.preview.PreviewRunner)2