use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testSQLEngine.
@Test
public void testSQLEngine() throws ValidationException {
ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).setPushdownEnabled(true).setTransformationPushdown(new ETLTransformationPushdown(MOCK_SQL_ENGINE)).build();
PipelineSpec actual = specGenerator.generateSpec(config);
Map<String, String> emptyMap = ImmutableMap.of();
PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setSqlEngineStageSpec(StageSpec.builder("sqlengine_mocksqlengine", new PluginSpec(BatchSQLEngine.PLUGIN_TYPE, "mocksqlengine", emptyMap, ARTIFACT_ID)).build()).build();
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testGenerateSpec.
@Test
public void testGenerateSpec() throws ValidationException {
/*
* ---- t1 ------------
* | | |
* source --- | |--- t3 --- sink1
* | | |
* ------------ t2 --------------- sink2
* | |
* | |
* -------------------------
*/
ETLBatchConfig etlConfig = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_A)).addStage(new ETLStage("t3", MOCK_TRANSFORM_B)).addConnection("source", "t1").addConnection("source", "t2").addConnection("source", "sink2").addConnection("t1", "t2").addConnection("t1", "t3").addConnection("t1", "sink2").addConnection("t2", "sink2").addConnection("t2", "t3").addConnection("t3", "sink1").setNumOfRecordsPreview(100).build();
// test the spec generated is correct, with the right input and output schemas and artifact information.
BatchPipelineSpec actual = specGenerator.generateSpec(etlConfig);
Map<String, String> emptyMap = ImmutableMap.of();
PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).addOutput(SCHEMA_A, "t1", "t2", "sink2").build()).addStage(StageSpec.builder("sink1", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("t3", SCHEMA_B).setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("sink2", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A, "source", SCHEMA_A)).setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("t1", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput(SCHEMA_A, "t2", "t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t2", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("source", SCHEMA_A, "t1", SCHEMA_A)).addOutput(SCHEMA_A, "t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t3", new PluginSpec(Transform.PLUGIN_TYPE, "mockB", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A)).addOutput(SCHEMA_B, "sink1").setErrorSchema(SCHEMA_A).build()).addConnections(etlConfig.getConnections()).setResources(etlConfig.getResources()).setDriverResources(new Resources(1024, 1)).setClientResources(new Resources(1024, 1)).setStageLoggingEnabled(etlConfig.isStageLoggingEnabled()).setNumOfRecordsPreview(etlConfig.getNumOfRecordsPreview()).build();
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testAutoJoin.
@Test
public void testAutoJoin() {
/*
* ---- transformA --------|
* | |
* source ---| |-- autojoin --- sink
* | |
* ---- transformABC ------|
*/
ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("tA", MOCK_TRANSFORM_A)).addStage(new ETLStage("tABC", MOCK_TRANSFORM_ABC)).addStage(new ETLStage("autojoin", MOCK_AUTO_JOINER)).addStage(new ETLStage("sink", MOCK_SINK)).addConnection("source", "tA").addConnection("source", "tABC").addConnection("tA", "autojoin").addConnection("tABC", "autojoin").addConnection("autojoin", "sink").setNumOfRecordsPreview(100).build();
joinDefinition = JoinDefinition.builder().select(new JoinField("tA", "a"), new JoinField("tABC", "b"), new JoinField("tABC", "c")).from(JoinStage.builder("tA", SCHEMA_A).isRequired().build(), JoinStage.builder("tABC", SCHEMA_ABC).isOptional().build()).on(JoinCondition.onKeys().addKey(new JoinKey("tA", Collections.singletonList("a"))).addKey(new JoinKey("tABC", Collections.singletonList("a"))).build()).setOutputSchemaName("abc.joined").build();
Schema joinSchema = Schema.recordOf("abc.joined", Schema.Field.of("a", Schema.of(Schema.Type.STRING)), Schema.Field.of("b", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c", Schema.nullableOf(Schema.of(Schema.Type.INT))));
Map<String, String> emptyMap = new HashMap<>();
PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).addOutput(SCHEMA_A, "tA", "tABC").build()).addStage(StageSpec.builder("tA", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput(SCHEMA_A, "autojoin").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("tABC", new PluginSpec(Transform.PLUGIN_TYPE, "mockABC", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput(SCHEMA_ABC, "autojoin").setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("autojoin", new PluginSpec(BatchJoiner.PLUGIN_TYPE, "mockautojoiner", emptyMap, ARTIFACT_ID)).addInputSchema("tA", SCHEMA_A).addInputSchema("tABC", SCHEMA_ABC).addOutput(joinSchema, "sink").setErrorSchema(SCHEMA_ABC).build()).addStage(StageSpec.builder("sink", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("autojoin", joinSchema).setErrorSchema(joinSchema).build()).addConnections(config.getConnections()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).build();
PipelineSpec actual = specGenerator.generateSpec(config);
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testSQLEngineEnabledButNotConfigured.
@Test
public void testSQLEngineEnabledButNotConfigured() throws ValidationException {
ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).setPushdownEnabled(true).setTransformationPushdown(null).build();
PipelineSpec actual = specGenerator.generateSpec(config);
Map<String, String> emptyMap = ImmutableMap.of();
PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setSqlEngineStageSpec(null).build();
Assert.assertEquals(expected, actual);
}
use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testPipelineProperties.
@Test
public void testPipelineProperties() throws ValidationException {
// populate some mock plugins.
MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action1", MockPlugin.builder().putPipelineProperty("prop1", "val1").putPipelineProperty("prop2", "val2").build(), artifactIds);
pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action2", MockPlugin.builder().putPipelineProperty("prop2", "val2").build(), artifactIds);
Map<String, String> empty = ImmutableMap.of();
ETLBatchConfig config = ETLBatchConfig.builder().setProperties(ImmutableMap.of("system.spark.spark.test", "abc", "system.mapreduce.prop3", "val3")).addStage(new ETLStage("a1", new ETLPlugin("action1", Action.PLUGIN_TYPE, empty))).addStage(new ETLStage("a2", new ETLPlugin("action2", Action.PLUGIN_TYPE, empty))).addConnection("a1", "a2").setEngine(Engine.MAPREDUCE).setNumOfRecordsPreview(100).build();
PipelineSpec actual = new BatchPipelineSpecGenerator(NamespaceId.DEFAULT.getNamespace(), pluginConfigurer, null, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), Engine.MAPREDUCE, MOCK_FEATURE_FLAGS_PROVIDER).generateSpec(config);
PipelineSpec expected = BatchPipelineSpec.builder().addConnection("a1", "a2").setProperties(ImmutableMap.of("prop1", "val1", "prop2", "val2", "prop3", "val3")).addStage(StageSpec.builder("a1", new PluginSpec(Action.PLUGIN_TYPE, "action1", empty, ARTIFACT_ID)).addOutput(null, "a2").build()).addStage(StageSpec.builder("a2", new PluginSpec(Action.PLUGIN_TYPE, "action2", empty, ARTIFACT_ID)).addInputSchema("a1", null).build()).setResources(new Resources(1024)).setDriverResources(new Resources(1024)).setClientResources(new Resources(1024)).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).build();
Assert.assertEquals(expected, actual);
}
Aggregations