Search in sources :

Example 1 with PipelineSpec

use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.

the class FieldLineageProcessorTest method testGeneratedOperations.

@Test
public void testGeneratedOperations() throws Exception {
    // src -> transform1 -> transform2 -> sink
    Schema srcSchema = Schema.recordOf("srcSchema", Schema.Field.of("body", Schema.of(Schema.Type.STRING)), Schema.Field.of("offset", Schema.of(Schema.Type.INT)));
    Schema transform1Schema = Schema.recordOf("trans1Schema", Schema.Field.of("body", Schema.of(Schema.Type.STRING)));
    Schema transform2Schema = Schema.recordOf("trans2Schema", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
    Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("src", DUMMY_PLUGIN).addOutput(srcSchema, "transform1").build(), StageSpec.builder("transform1", DUMMY_PLUGIN).addInputSchema("src", srcSchema).addOutput(transform1Schema, "transform2").build(), StageSpec.builder("transform2", DUMMY_PLUGIN).addInputSchema("transform1", transform1Schema).addOutput(transform2Schema, "sink").build(), StageSpec.builder("sink", DUMMY_PLUGIN).addInputSchema("transform2", transform2Schema).build());
    Set<Connection> connections = ImmutableSet.of(new Connection("src", "transform1"), new Connection("transform1", "transform2"), new Connection("transform2", "sink"));
    PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
    FieldLineageProcessor processor = new FieldLineageProcessor(pipelineSpec);
    Map<String, List<FieldOperation>> fieldOperations = ImmutableMap.of("src", Collections.singletonList(new FieldReadOperation("Read", "1st operation", EndPoint.of("file"), ImmutableList.of("body", "offset"))), "transform1", Collections.emptyList(), "transform2", Collections.emptyList(), "sink", Collections.singletonList(new FieldWriteOperation("Write", "4th operation", EndPoint.of("sink"), ImmutableList.of("id", "name"))));
    Set<Operation> operations = processor.validateAndConvert(fieldOperations);
    Set<Operation> expected = ImmutableSet.of(new ReadOperation("src.Read", "1st operation", EndPoint.of("file"), ImmutableList.of("body", "offset")), new TransformOperation("transform1.Transform", "", ImmutableList.of(InputField.of("src.Read", "body"), InputField.of("src.Read", "offset")), "body"), new TransformOperation("transform2.Transform", "", ImmutableList.of(InputField.of("transform1.Transform", "body")), ImmutableList.of("id", "name")), new WriteOperation("sink.Write", "4th operation", EndPoint.of("sink"), ImmutableList.of(InputField.of("transform2.Transform", "id"), InputField.of("transform2.Transform", "name"))));
    Assert.assertEquals(expected, operations);
}
Also used : ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) FieldReadOperation(io.cdap.cdap.etl.api.lineage.field.FieldReadOperation) Schema(io.cdap.cdap.api.data.schema.Schema) Connection(io.cdap.cdap.etl.proto.Connection) ReadOperation(io.cdap.cdap.api.lineage.field.ReadOperation) FieldOperation(io.cdap.cdap.etl.api.lineage.field.FieldOperation) FieldWriteOperation(io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) FieldReadOperation(io.cdap.cdap.etl.api.lineage.field.FieldReadOperation) Operation(io.cdap.cdap.api.lineage.field.Operation) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) TransformOperation(io.cdap.cdap.api.lineage.field.TransformOperation) FieldWriteOperation(io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation) FieldReadOperation(io.cdap.cdap.etl.api.lineage.field.FieldReadOperation) FieldWriteOperation(io.cdap.cdap.etl.api.lineage.field.FieldWriteOperation) WriteOperation(io.cdap.cdap.api.lineage.field.WriteOperation) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) Test(org.junit.Test)

Example 2 with PipelineSpec

use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testSQLEngineNotEnabled.

@Test
public void testSQLEngineNotEnabled() throws ValidationException {
    ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).setPushdownEnabled(false).setTransformationPushdown(new ETLTransformationPushdown(MOCK_SQL_ENGINE)).build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setSqlEngineStageSpec(null).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ETLTransformationPushdown(io.cdap.cdap.etl.proto.v2.ETLTransformationPushdown) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Test(org.junit.Test)

Example 3 with PipelineSpec

use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testInputSchemasWithDifferentName.

@Test
public void testInputSchemasWithDifferentName() {
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("s1", MOCK_SOURCE)).addStage(new ETLStage("s2", MOCK_SOURCE2)).addStage(new ETLStage("sink", MOCK_SINK)).addConnection("s1", "sink").addConnection("s2", "sink").setNumOfRecordsPreview(100).build();
    Map<String, String> emptyMap = Collections.emptyMap();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("s1", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).addOutput(SCHEMA_A, "sink").build()).addStage(StageSpec.builder("s2", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource2", emptyMap, ARTIFACT_ID)).addOutput(SCHEMA_A2, "sink").build()).addStage(StageSpec.builder("sink", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("s1", SCHEMA_A, "s2", SCHEMA_A2)).setErrorSchema(SCHEMA_A).build()).addConnections(etlConfig.getConnections()).setResources(etlConfig.getResources()).setDriverResources(new Resources(1024, 1)).setClientResources(new Resources(1024, 1)).setStageLoggingEnabled(etlConfig.isStageLoggingEnabled()).setNumOfRecordsPreview(etlConfig.getNumOfRecordsPreview()).build();
    PipelineSpec actual = specGenerator.generateSpec(etlConfig);
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Resources(io.cdap.cdap.api.Resources) Test(org.junit.Test)

Example 4 with PipelineSpec

use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testConditionSchemaPropagation.

@Test
public void testConditionSchemaPropagation() throws ValidationException {
    /*
     * source --> condition --> sink
     */
    ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("cond", MOCK_CONDITION)).addStage(new ETLStage("sink", MOCK_SINK)).addConnection("source", "cond").addConnection("cond", "sink", true).setNumOfRecordsPreview(100).build();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", EMPTY_MAP, ARTIFACT_ID)).addOutput(SCHEMA_A, "cond").build()).addStage(StageSpec.builder("cond", new PluginSpec(Condition.PLUGIN_TYPE, "mockcondition", EMPTY_MAP, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput("sink", null, SCHEMA_A).setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("sink", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", EMPTY_MAP, ARTIFACT_ID)).addInputSchema("cond", SCHEMA_A).setErrorSchema(SCHEMA_A).build()).addConnections(config.getConnections()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Test(org.junit.Test)

Example 5 with PipelineSpec

use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testSingleAction.

@Test
public void testSingleAction() throws ValidationException {
    ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Test(org.junit.Test)

Aggregations

PipelineSpec (io.cdap.cdap.etl.proto.v2.spec.PipelineSpec)18 Test (org.junit.Test)18 BatchPipelineSpec (io.cdap.cdap.etl.batch.BatchPipelineSpec)11 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)11 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)11 PluginSpec (io.cdap.cdap.etl.proto.v2.spec.PluginSpec)11 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)9 PipelinePhase (io.cdap.cdap.etl.common.PipelinePhase)8 Connection (io.cdap.cdap.etl.proto.Connection)8 HashMap (java.util.HashMap)8 HashSet (java.util.HashSet)7 Resources (io.cdap.cdap.api.Resources)3 Schema (io.cdap.cdap.api.data.schema.Schema)3 ETLTransformationPushdown (io.cdap.cdap.etl.proto.v2.ETLTransformationPushdown)2 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Admin (io.cdap.cdap.api.Admin)1 TxRunnable (io.cdap.cdap.api.TxRunnable)1 ArtifactId (io.cdap.cdap.api.artifact.ArtifactId)1