Search in sources :

Example 56 with ETLConfig

use of io.cdap.cdap.etl.proto.v2.ETLConfig in project cdap by cdapio.

the class PipelineSpecGeneratorTest method testConflictingInputSchemas.

@Test(expected = IllegalArgumentException.class)
public void testConflictingInputSchemas() throws ValidationException {
    /*
     *           ---- transformA ----
     *           |                  |
     * source ---                   |--- sink
     *           |                  |
     *           ---- transformB ----
     *
     * sink gets schema A and schema B as input, should fail
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink", MOCK_SINK)).addStage(new ETLStage("tA", MOCK_TRANSFORM_A)).addStage(new ETLStage("tB", MOCK_TRANSFORM_B)).addConnection("source", "tA").addConnection("source", "tB").addConnection("tA", "sink").addConnection("tB", "sink").build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Example 57 with ETLConfig

use of io.cdap.cdap.etl.proto.v2.ETLConfig in project cdap by cdapio.

the class PipelineSpecGeneratorTest method testGenerateSpec.

@Test
public void testGenerateSpec() throws ValidationException {
    /*
     *           ---- t1 ------------
     *           |            |      |
     * source ---             |      |--- t3 --- sink1
     *           |            |      |
     *           ------------ t2 --------------- sink2
     *           |                        |
     *           |                        |
     *           -------------------------
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_A)).addStage(new ETLStage("t3", MOCK_TRANSFORM_B)).addConnection("source", "t1").addConnection("source", "t2").addConnection("source", "sink2").addConnection("t1", "t2").addConnection("t1", "t3").addConnection("t1", "sink2").addConnection("t2", "sink2").addConnection("t2", "t3").addConnection("t3", "sink1").setNumOfRecordsPreview(100).build();
    // test the spec generated is correct, with the right input and output schemas and artifact information.
    BatchPipelineSpec actual = specGenerator.generateSpec(etlConfig);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).addOutput(SCHEMA_A, "t1", "t2", "sink2").build()).addStage(StageSpec.builder("sink1", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("t3", SCHEMA_B).setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("sink2", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A, "source", SCHEMA_A)).setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("t1", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput(SCHEMA_A, "t2", "t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t2", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("source", SCHEMA_A, "t1", SCHEMA_A)).addOutput(SCHEMA_A, "t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t3", new PluginSpec(Transform.PLUGIN_TYPE, "mockB", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A)).addOutput(SCHEMA_B, "sink1").setErrorSchema(SCHEMA_A).build()).addConnections(etlConfig.getConnections()).setResources(etlConfig.getResources()).setDriverResources(new Resources(1024, 1)).setClientResources(new Resources(1024, 1)).setStageLoggingEnabled(etlConfig.isStageLoggingEnabled()).setNumOfRecordsPreview(etlConfig.getNumOfRecordsPreview()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Resources(io.cdap.cdap.api.Resources) Test(org.junit.Test)

Example 58 with ETLConfig

use of io.cdap.cdap.etl.proto.v2.ETLConfig in project cdap by cdapio.

the class PipelineSpecGeneratorTest method testNestedValidCondition.

@Test
public void testNestedValidCondition() throws ValidationException {
    // source--condition1-----t1-----condition2------t11------sink1
    // |                      |                     |
    // |                      |-----------t12--------
    // t2---------sink2
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("condition1", MOCK_CONDITION)).addStage(new ETLStage("condition2", MOCK_CONDITION)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t11", MOCK_TRANSFORM_A)).addStage(new ETLStage("t12", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_B)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addConnection("source", "condition1").addConnection("condition1", "t1", true).addConnection("t1", "condition2").addConnection("condition2", "t11", false).addConnection("condition2", "t12", true).addConnection("condition1", "t2", false).addConnection("t11", "sink1").addConnection("t12", "sink1").addConnection("t2", "sink2").build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Example 59 with ETLConfig

use of io.cdap.cdap.etl.proto.v2.ETLConfig in project cdap by cdapio.

the class PipelineSpecGeneratorTest method testSimpleValidCondition.

@Test
public void testSimpleValidCondition() throws ValidationException {
    // source--condition-----t1-----sink
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("condition", MOCK_CONDITION)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("sink", MOCK_SINK)).addConnection("source", "condition").addConnection("condition", "t1", true).addConnection("t1", "sink").build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Example 60 with ETLConfig

use of io.cdap.cdap.etl.proto.v2.ETLConfig in project cdap by cdapio.

the class PipelineSpecGeneratorTest method testTwoConditionsGoingToSameStage.

@Test(expected = IllegalArgumentException.class)
public void testTwoConditionsGoingToSameStage() throws ValidationException {
    // source--condition1-----t1-----condition2------t11------sink1
    // |                      |                     |
    // |                      |-----------t12--------
    // t2---------sink2                   |
    // |
    // |
    // anotherSource--------->condition3----------------------sink3
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("anotherSource", MOCK_SOURCE)).addStage(new ETLStage("condition1", MOCK_CONDITION)).addStage(new ETLStage("condition2", MOCK_CONDITION)).addStage(new ETLStage("condition3", MOCK_CONDITION)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t11", MOCK_TRANSFORM_A)).addStage(new ETLStage("t12", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_B)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addStage(new ETLStage("sink3", MOCK_SINK)).addConnection("source", "condition1").addConnection("condition1", "t1", true).addConnection("t1", "condition2").addConnection("condition2", "t11", false).addConnection("condition2", "t12", true).addConnection("condition1", "t2", false).addConnection("t11", "sink1").addConnection("t12", "sink1").addConnection("t2", "sink2").addConnection("anotherSource", "condition3").addConnection("condition3", "sink3", false).addConnection("condition3", "t12", true).build();
    specGenerator.generateSpec(etlConfig);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) Test(org.junit.Test)

Aggregations

ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)220 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)207 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)152 ApplicationManager (io.cdap.cdap.test.ApplicationManager)152 Test (org.junit.Test)152 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)151 Table (io.cdap.cdap.api.dataset.table.Table)119 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)111 WorkflowManager (io.cdap.cdap.test.WorkflowManager)111 Schema (io.cdap.cdap.api.data.schema.Schema)98 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)76 ETLPlugin (io.cdap.cdap.etl.proto.v2.ETLPlugin)69 HashSet (java.util.HashSet)36 HashMap (java.util.HashMap)32 ImmutableMap (com.google.common.collect.ImmutableMap)29 ArrayList (java.util.ArrayList)28 TimeoutException (java.util.concurrent.TimeoutException)22 Map (java.util.Map)18 SpamMessage (io.cdap.cdap.datapipeline.mock.SpamMessage)16 Lineage (io.cdap.cdap.data2.metadata.lineage.Lineage)14