Search in sources :

Example 46 with ETLStage

use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.

the class PipelineSpecGenerator method configureStage.

/**
   * Configures a stage and returns the spec for it.
   *
   * @param stageConnections the user provided configuration for the stage along with its connections
   * @param pluginConfigurer configurer used to configure the stage
   * @return the spec for the stage
   */
private ConfiguredStage configureStage(StageConnections stageConnections, DefaultPipelineConfigurer pluginConfigurer) {
    ETLStage stage = stageConnections.getStage();
    String stageName = stage.getName();
    ETLPlugin stagePlugin = stage.getPlugin();
    if (!Strings.isNullOrEmpty(stage.getErrorDatasetName())) {
        configurer.createDataset(stage.getErrorDatasetName(), errorDatasetClass, errorDatasetProperties);
    }
    PluginSpec pluginSpec = configurePlugin(stageName, stagePlugin, pluginConfigurer);
    Schema outputSchema = pluginConfigurer.getStageConfigurer().getOutputSchema();
    Map<String, Schema> inputSchemas = pluginConfigurer.getStageConfigurer().getInputSchemas();
    StageSpec stageSpec = StageSpec.builder(stageName, pluginSpec).setErrorDatasetName(stage.getErrorDatasetName()).addInputSchemas(inputSchemas).setOutputSchema(outputSchema).setErrorSchema(pluginConfigurer.getStageConfigurer().getErrorSchema()).addInputs(stageConnections.getInputs()).addOutputs(stageConnections.getOutputs()).build();
    return new ConfiguredStage(stageSpec, pluginConfigurer.getPipelineProperties());
}
Also used : ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Schema(co.cask.cdap.api.data.schema.Schema) ETLPlugin(co.cask.cdap.etl.proto.v2.ETLPlugin)

Example 47 with ETLStage

use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.

the class ETLWorkerTest method testDAG.

@Test
public void testDAG() throws Exception {
    Schema schema = Schema.recordOf("testRecord", Schema.Field.of("x", Schema.of(Schema.Type.INT)));
    StructuredRecord record1 = StructuredRecord.builder(schema).set("x", 1).build();
    StructuredRecord record2 = StructuredRecord.builder(schema).set("x", 2).build();
    StructuredRecord record3 = StructuredRecord.builder(schema).set("x", 3).build();
    List<StructuredRecord> input = ImmutableList.of(record1, record2, record3);
    /*
     *            ----- value filter ------- sink1
     *           |
     * source --------- double --------
     *           |                     |---- sink2
     *            ----- identity ------
     */
    File sink1Out = TMP_FOLDER.newFolder();
    File sink2Out = TMP_FOLDER.newFolder();
    ETLRealtimeConfig etlConfig = ETLRealtimeConfig.builder().addStage(new ETLStage("source", MockSource.getPlugin(input))).addStage(new ETLStage("sink1", MockSink.getPlugin(sink1Out))).addStage(new ETLStage("sink2", MockSink.getPlugin(sink2Out))).addStage(new ETLStage("valueFilter", IntValueFilterTransform.getPlugin("x", 2))).addStage(new ETLStage("double", DoubleTransform.getPlugin())).addStage(new ETLStage("identity", IdentityTransform.getPlugin())).addConnection("source", "valueFilter").addConnection("source", "double").addConnection("source", "identity").addConnection("valueFilter", "sink1").addConnection("double", "sink2").addConnection("identity", "sink2").build();
    ApplicationId appId = NamespaceId.DEFAULT.app("dagTest");
    AppRequest<ETLRealtimeConfig> appRequest = new AppRequest<>(APP_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    Assert.assertNotNull(appManager);
    WorkerManager workerManager = appManager.getWorkerManager(ETLWorker.NAME);
    workerManager.start();
    workerManager.waitForStatus(true, 10, 1);
    try {
        List<StructuredRecord> sink1output = MockSink.getRecords(sink1Out, 0, 10, TimeUnit.SECONDS);
        List<StructuredRecord> sink1expected = ImmutableList.of(record1, record3);
        Assert.assertEquals(sink1expected, sink1output);
        List<StructuredRecord> sink2output = MockSink.getRecords(sink2Out, 0, 10, TimeUnit.SECONDS);
        Assert.assertEquals(9, sink2output.size());
    } finally {
        stopWorker(workerManager);
    }
    validateMetric(3, appId, "source.records.out");
    validateMetric(3, appId, "valueFilter.records.in");
    validateMetric(2, appId, "valueFilter.records.out");
    validateMetric(3, appId, "double.records.in");
    validateMetric(6, appId, "double.records.out");
    validateMetric(3, appId, "identity.records.in");
    validateMetric(3, appId, "identity.records.out");
    validateMetric(2, appId, "sink1.records.in");
    validateMetric(9, appId, "sink2.records.in");
}
Also used : WorkerManager(co.cask.cdap.test.WorkerManager) ApplicationManager(co.cask.cdap.test.ApplicationManager) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Schema(co.cask.cdap.api.data.schema.Schema) ETLRealtimeConfig(co.cask.cdap.etl.proto.v2.ETLRealtimeConfig) ApplicationId(co.cask.cdap.proto.id.ApplicationId) File(java.io.File) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) AppRequest(co.cask.cdap.proto.artifact.AppRequest) Test(org.junit.Test)

Example 48 with ETLStage

use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testSingleAction.

@Test
public void testSingleAction() {
    ETLBatchConfig config = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) BatchPipelineSpec(co.cask.cdap.etl.batch.BatchPipelineSpec) Test(org.junit.Test)

Example 49 with ETLStage

use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testNestedConditionWithCrossConnection.

@Test(expected = IllegalArgumentException.class)
public void testNestedConditionWithCrossConnection() {
    // 
    // anothersource-------------
    // |
    // source--condition1-----t1-----condition2------t11------sink1
    // |                      |                     |
    // |                      |-----------t12--------
    // t2---------sink2
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("anothersource", MOCK_SOURCE)).addStage(new ETLStage("condition1", MOCK_CONDITION)).addStage(new ETLStage("condition2", MOCK_CONDITION)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t11", MOCK_TRANSFORM_A)).addStage(new ETLStage("t12", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_B)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addConnection("source", "condition1").addConnection("condition1", "t1", true).addConnection("t1", "condition2").addConnection("condition2", "t11", false).addConnection("condition2", "t12", true).addConnection("condition1", "t2", false).addConnection("t11", "sink1").addConnection("t12", "sink1").addConnection("anothersource", "sink1").addConnection("t2", "sink2").build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Example 50 with ETLStage

use of co.cask.cdap.etl.proto.v2.ETLStage in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testConflictingPipelineProperties.

@Test(expected = IllegalArgumentException.class)
public void testConflictingPipelineProperties() {
    // populate some mock plugins.
    MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
    Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action1", MockPlugin.builder().putPipelineProperty("prop1", "val1").build(), artifactIds);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action2", MockPlugin.builder().putPipelineProperty("prop1", "val2").build(), artifactIds);
    Map<String, String> empty = ImmutableMap.of();
    ETLBatchConfig config = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("a1", new ETLPlugin("action1", Action.PLUGIN_TYPE, empty))).addStage(new ETLStage("a2", new ETLPlugin("action2", Action.PLUGIN_TYPE, empty))).addConnection("a1", "a2").setEngine(Engine.MAPREDUCE).build();
    new BatchPipelineSpecGenerator<>(pluginConfigurer, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), Engine.MAPREDUCE).generateSpec(config);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) ArtifactId(co.cask.cdap.api.artifact.ArtifactId) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) ETLPlugin(co.cask.cdap.etl.proto.v2.ETLPlugin) MockPluginConfigurer(co.cask.cdap.etl.common.MockPluginConfigurer) Test(org.junit.Test)

Aggregations

ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)94 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)75 Test (org.junit.Test)64 ApplicationId (co.cask.cdap.proto.id.ApplicationId)62 ApplicationManager (co.cask.cdap.test.ApplicationManager)58 AppRequest (co.cask.cdap.proto.artifact.AppRequest)57 Schema (co.cask.cdap.api.data.schema.Schema)51 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)50 Table (co.cask.cdap.api.dataset.table.Table)49 WorkflowManager (co.cask.cdap.test.WorkflowManager)44 KeyValueTable (co.cask.cdap.api.dataset.lib.KeyValueTable)39 HashSet (java.util.HashSet)16 ArrayList (java.util.ArrayList)15 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)14 HashMap (java.util.HashMap)14 DataStreamsConfig (co.cask.cdap.etl.proto.v2.DataStreamsConfig)11 TimeoutException (java.util.concurrent.TimeoutException)11 TopicNotFoundException (co.cask.cdap.api.messaging.TopicNotFoundException)7 SparkManager (co.cask.cdap.test.SparkManager)7 BatchPipelineSpec (co.cask.cdap.etl.batch.BatchPipelineSpec)6