use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.
the class FieldLineageProcessorTest method testGeneratedOperations.
@Test
public void testGeneratedOperations() throws Exception {
// src -> transform1 -> transform2 -> sink
Schema srcSchema = Schema.recordOf("srcSchema", Schema.Field.of("body", Schema.of(Schema.Type.STRING)), Schema.Field.of("offset", Schema.of(Schema.Type.INT)));
Schema transform1Schema = Schema.recordOf("trans1Schema", Schema.Field.of("body", Schema.of(Schema.Type.STRING)));
Schema transform2Schema = Schema.recordOf("trans2Schema", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
Set<StageSpec> stageSpecs = ImmutableSet.of(StageSpec.builder("src", DUMMY_PLUGIN).addOutput(srcSchema, "transform1").build(), StageSpec.builder("transform1", DUMMY_PLUGIN).addInputSchema("src", srcSchema).addOutput(transform1Schema, "transform2").build(), StageSpec.builder("transform2", DUMMY_PLUGIN).addInputSchema("transform1", transform1Schema).addOutput(transform2Schema, "sink").build(), StageSpec.builder("sink", DUMMY_PLUGIN).addInputSchema("transform2", transform2Schema).build());
Set<Connection> connections = ImmutableSet.of(new Connection("src", "transform1"), new Connection("transform1", "transform2"), new Connection("transform2", "sink"));
PipelineSpec pipelineSpec = PipelineSpec.builder().addStages(stageSpecs).addConnections(connections).build();
FieldLineageProcessor processor = new FieldLineageProcessor(pipelineSpec);
Map<String, List<FieldOperation>> fieldOperations = ImmutableMap.of("src", Collections.singletonList(new FieldReadOperation("Read", "1st operation", EndPoint.of("file"), ImmutableList.of("body", "offset"))), "transform1", Collections.emptyList(), "transform2", Collections.emptyList(), "sink", Collections.singletonList(new FieldWriteOperation("Write", "4th operation", EndPoint.of("sink"), ImmutableList.of("id", "name"))));
Set<Operation> operations = processor.validateAndConvert(fieldOperations);
Set<Operation> expected = ImmutableSet.of(new ReadOperation("src.Read", "1st operation", EndPoint.of("file"), ImmutableList.of("body", "offset")), new TransformOperation("transform1.Transform", "", ImmutableList.of(InputField.of("src.Read", "body"), InputField.of("src.Read", "offset")), "body"), new TransformOperation("transform2.Transform", "", ImmutableList.of(InputField.of("transform1.Transform", "body")), ImmutableList.of("id", "name")), new WriteOperation("sink.Write", "4th operation", EndPoint.of("sink"), ImmutableList.of(InputField.of("transform2.Transform", "id"), InputField.of("transform2.Transform", "name"))));
Assert.assertEquals(expected, operations);
}
use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testSQLEngineNotEnabled.
@Test
public void testSQLEngineNotEnabled() throws ValidationException {
ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).setPushdownEnabled(false).setTransformationPushdown(new ETLTransformationPushdown(MOCK_SQL_ENGINE)).build();
PipelineSpec actual = specGenerator.generateSpec(config);
Map<String, String> emptyMap = ImmutableMap.of();
PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setSqlEngineStageSpec(null).build();
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testInputSchemasWithDifferentName.
@Test
public void testInputSchemasWithDifferentName() {
ETLBatchConfig etlConfig = ETLBatchConfig.builder().addStage(new ETLStage("s1", MOCK_SOURCE)).addStage(new ETLStage("s2", MOCK_SOURCE2)).addStage(new ETLStage("sink", MOCK_SINK)).addConnection("s1", "sink").addConnection("s2", "sink").setNumOfRecordsPreview(100).build();
Map<String, String> emptyMap = Collections.emptyMap();
PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("s1", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).addOutput(SCHEMA_A, "sink").build()).addStage(StageSpec.builder("s2", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource2", emptyMap, ARTIFACT_ID)).addOutput(SCHEMA_A2, "sink").build()).addStage(StageSpec.builder("sink", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("s1", SCHEMA_A, "s2", SCHEMA_A2)).setErrorSchema(SCHEMA_A).build()).addConnections(etlConfig.getConnections()).setResources(etlConfig.getResources()).setDriverResources(new Resources(1024, 1)).setClientResources(new Resources(1024, 1)).setStageLoggingEnabled(etlConfig.isStageLoggingEnabled()).setNumOfRecordsPreview(etlConfig.getNumOfRecordsPreview()).build();
PipelineSpec actual = specGenerator.generateSpec(etlConfig);
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testConditionSchemaPropagation.
@Test
public void testConditionSchemaPropagation() throws ValidationException {
/*
* source --> condition --> sink
*/
ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("cond", MOCK_CONDITION)).addStage(new ETLStage("sink", MOCK_SINK)).addConnection("source", "cond").addConnection("cond", "sink", true).setNumOfRecordsPreview(100).build();
PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", EMPTY_MAP, ARTIFACT_ID)).addOutput(SCHEMA_A, "cond").build()).addStage(StageSpec.builder("cond", new PluginSpec(Condition.PLUGIN_TYPE, "mockcondition", EMPTY_MAP, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput("sink", null, SCHEMA_A).setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("sink", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", EMPTY_MAP, ARTIFACT_ID)).addInputSchema("cond", SCHEMA_A).setErrorSchema(SCHEMA_A).build()).addConnections(config.getConnections()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).build();
PipelineSpec actual = specGenerator.generateSpec(config);
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.v2.spec.PipelineSpec in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testSingleAction.
@Test
public void testSingleAction() throws ValidationException {
ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).build();
PipelineSpec actual = specGenerator.generateSpec(config);
Map<String, String> emptyMap = ImmutableMap.of();
PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).build();
Assert.assertEquals(expected, actual);
}
Aggregations