Search in sources :

Example 11 with PluginSpec

use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testSQLEngine.

@Test
public void testSQLEngine() throws ValidationException {
    ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).setPushdownEnabled(true).setTransformationPushdown(new ETLTransformationPushdown(MOCK_SQL_ENGINE)).build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setSqlEngineStageSpec(StageSpec.builder("sqlengine_mocksqlengine", new PluginSpec(BatchSQLEngine.PLUGIN_TYPE, "mocksqlengine", emptyMap, ARTIFACT_ID)).build()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ETLTransformationPushdown(io.cdap.cdap.etl.proto.v2.ETLTransformationPushdown) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Test(org.junit.Test)

Example 12 with PluginSpec

use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testGenerateSpec.

@Test
public void testGenerateSpec() throws ValidationException {
    /*
     *           ---- t1 ------------
     *           |            |      |
     * source ---             |      |--- t3 --- sink1
     *           |            |      |
     *           ------------ t2 --------------- sink2
     *           |                        |
     *           |                        |
     *           -------------------------
     */
    ETLBatchConfig etlConfig = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("sink1", MOCK_SINK)).addStage(new ETLStage("sink2", MOCK_SINK)).addStage(new ETLStage("t1", MOCK_TRANSFORM_A)).addStage(new ETLStage("t2", MOCK_TRANSFORM_A)).addStage(new ETLStage("t3", MOCK_TRANSFORM_B)).addConnection("source", "t1").addConnection("source", "t2").addConnection("source", "sink2").addConnection("t1", "t2").addConnection("t1", "t3").addConnection("t1", "sink2").addConnection("t2", "sink2").addConnection("t2", "t3").addConnection("t3", "sink1").setNumOfRecordsPreview(100).build();
    // test the spec generated is correct, with the right input and output schemas and artifact information.
    BatchPipelineSpec actual = specGenerator.generateSpec(etlConfig);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).addOutput(SCHEMA_A, "t1", "t2", "sink2").build()).addStage(StageSpec.builder("sink1", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("t3", SCHEMA_B).setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("sink2", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A, "source", SCHEMA_A)).setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("t1", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput(SCHEMA_A, "t2", "t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t2", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("source", SCHEMA_A, "t1", SCHEMA_A)).addOutput(SCHEMA_A, "t3", "sink2").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("t3", new PluginSpec(Transform.PLUGIN_TYPE, "mockB", emptyMap, ARTIFACT_ID)).addInputSchemas(ImmutableMap.of("t1", SCHEMA_A, "t2", SCHEMA_A)).addOutput(SCHEMA_B, "sink1").setErrorSchema(SCHEMA_A).build()).addConnections(etlConfig.getConnections()).setResources(etlConfig.getResources()).setDriverResources(new Resources(1024, 1)).setClientResources(new Resources(1024, 1)).setStageLoggingEnabled(etlConfig.isStageLoggingEnabled()).setNumOfRecordsPreview(etlConfig.getNumOfRecordsPreview()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Resources(io.cdap.cdap.api.Resources) Test(org.junit.Test)

Example 13 with PluginSpec

use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testAutoJoin.

@Test
public void testAutoJoin() {
    /*
     *           ---- transformA --------|
     *           |                       |
     * source ---|                       |-- autojoin --- sink
     *           |                       |
     *           ---- transformABC ------|
     */
    ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("source", MOCK_SOURCE)).addStage(new ETLStage("tA", MOCK_TRANSFORM_A)).addStage(new ETLStage("tABC", MOCK_TRANSFORM_ABC)).addStage(new ETLStage("autojoin", MOCK_AUTO_JOINER)).addStage(new ETLStage("sink", MOCK_SINK)).addConnection("source", "tA").addConnection("source", "tABC").addConnection("tA", "autojoin").addConnection("tABC", "autojoin").addConnection("autojoin", "sink").setNumOfRecordsPreview(100).build();
    joinDefinition = JoinDefinition.builder().select(new JoinField("tA", "a"), new JoinField("tABC", "b"), new JoinField("tABC", "c")).from(JoinStage.builder("tA", SCHEMA_A).isRequired().build(), JoinStage.builder("tABC", SCHEMA_ABC).isOptional().build()).on(JoinCondition.onKeys().addKey(new JoinKey("tA", Collections.singletonList("a"))).addKey(new JoinKey("tABC", Collections.singletonList("a"))).build()).setOutputSchemaName("abc.joined").build();
    Schema joinSchema = Schema.recordOf("abc.joined", Schema.Field.of("a", Schema.of(Schema.Type.STRING)), Schema.Field.of("b", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("c", Schema.nullableOf(Schema.of(Schema.Type.INT))));
    Map<String, String> emptyMap = new HashMap<>();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("source", new PluginSpec(BatchSource.PLUGIN_TYPE, "mocksource", emptyMap, ARTIFACT_ID)).addOutput(SCHEMA_A, "tA", "tABC").build()).addStage(StageSpec.builder("tA", new PluginSpec(Transform.PLUGIN_TYPE, "mockA", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput(SCHEMA_A, "autojoin").setErrorSchema(SCHEMA_B).build()).addStage(StageSpec.builder("tABC", new PluginSpec(Transform.PLUGIN_TYPE, "mockABC", emptyMap, ARTIFACT_ID)).addInputSchema("source", SCHEMA_A).addOutput(SCHEMA_ABC, "autojoin").setErrorSchema(SCHEMA_A).build()).addStage(StageSpec.builder("autojoin", new PluginSpec(BatchJoiner.PLUGIN_TYPE, "mockautojoiner", emptyMap, ARTIFACT_ID)).addInputSchema("tA", SCHEMA_A).addInputSchema("tABC", SCHEMA_ABC).addOutput(joinSchema, "sink").setErrorSchema(SCHEMA_ABC).build()).addStage(StageSpec.builder("sink", new PluginSpec(BatchSink.PLUGIN_TYPE, "mocksink", emptyMap, ARTIFACT_ID)).addInputSchema("autojoin", joinSchema).setErrorSchema(joinSchema).build()).addConnections(config.getConnections()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) JoinKey(io.cdap.cdap.etl.api.join.JoinKey) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) HashMap(java.util.HashMap) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Schema(io.cdap.cdap.api.data.schema.Schema) JoinField(io.cdap.cdap.etl.api.join.JoinField) Test(org.junit.Test)

Example 14 with PluginSpec

use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testSQLEngineEnabledButNotConfigured.

@Test
public void testSQLEngineEnabledButNotConfigured() throws ValidationException {
    ETLBatchConfig config = ETLBatchConfig.builder().setTimeSchedule("* * * * *").addStage(new ETLStage("action", MOCK_ACTION)).setPushdownEnabled(true).setTransformationPushdown(null).build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    Map<String, String> emptyMap = ImmutableMap.of();
    PipelineSpec expected = BatchPipelineSpec.builder().addStage(StageSpec.builder("action", new PluginSpec(Action.PLUGIN_TYPE, "mockaction", emptyMap, ARTIFACT_ID)).build()).setResources(config.getResources()).setDriverResources(config.getDriverResources()).setClientResources(config.getClientResources()).setStageLoggingEnabled(config.isStageLoggingEnabled()).setSqlEngineStageSpec(null).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) Test(org.junit.Test)

Example 15 with PluginSpec

use of io.cdap.cdap.etl.proto.v2.spec.PluginSpec in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testPipelineProperties.

@Test
public void testPipelineProperties() throws ValidationException {
    // populate some mock plugins.
    MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
    Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action1", MockPlugin.builder().putPipelineProperty("prop1", "val1").putPipelineProperty("prop2", "val2").build(), artifactIds);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action2", MockPlugin.builder().putPipelineProperty("prop2", "val2").build(), artifactIds);
    Map<String, String> empty = ImmutableMap.of();
    ETLBatchConfig config = ETLBatchConfig.builder().setProperties(ImmutableMap.of("system.spark.spark.test", "abc", "system.mapreduce.prop3", "val3")).addStage(new ETLStage("a1", new ETLPlugin("action1", Action.PLUGIN_TYPE, empty))).addStage(new ETLStage("a2", new ETLPlugin("action2", Action.PLUGIN_TYPE, empty))).addConnection("a1", "a2").setEngine(Engine.MAPREDUCE).setNumOfRecordsPreview(100).build();
    PipelineSpec actual = new BatchPipelineSpecGenerator(NamespaceId.DEFAULT.getNamespace(), pluginConfigurer, null, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), Engine.MAPREDUCE, MOCK_FEATURE_FLAGS_PROVIDER).generateSpec(config);
    PipelineSpec expected = BatchPipelineSpec.builder().addConnection("a1", "a2").setProperties(ImmutableMap.of("prop1", "val1", "prop2", "val2", "prop3", "val3")).addStage(StageSpec.builder("a1", new PluginSpec(Action.PLUGIN_TYPE, "action1", empty, ARTIFACT_ID)).addOutput(null, "a2").build()).addStage(StageSpec.builder("a2", new PluginSpec(Action.PLUGIN_TYPE, "action2", empty, ARTIFACT_ID)).addInputSchema("a1", null).build()).setResources(new Resources(1024)).setDriverResources(new Resources(1024)).setClientResources(new Resources(1024)).setNumOfRecordsPreview(config.getNumOfRecordsPreview()).build();
    Assert.assertEquals(expected, actual);
}
Also used : ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) PluginSpec(io.cdap.cdap.etl.proto.v2.spec.PluginSpec) ArtifactId(io.cdap.cdap.api.artifact.ArtifactId) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) BatchPipelineSpecGenerator(io.cdap.cdap.etl.batch.BatchPipelineSpecGenerator) PipelineSpec(io.cdap.cdap.etl.proto.v2.spec.PipelineSpec) BatchPipelineSpec(io.cdap.cdap.etl.batch.BatchPipelineSpec) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) Resources(io.cdap.cdap.api.Resources) MockPluginConfigurer(io.cdap.cdap.etl.common.MockPluginConfigurer) Test(org.junit.Test)

Aggregations

PluginSpec (io.cdap.cdap.etl.proto.v2.spec.PluginSpec)16 Test (org.junit.Test)12 BatchPipelineSpec (io.cdap.cdap.etl.batch.BatchPipelineSpec)11 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)11 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)11 PipelineSpec (io.cdap.cdap.etl.proto.v2.spec.PipelineSpec)11 Resources (io.cdap.cdap.api.Resources)4 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)3 HashMap (java.util.HashMap)3 ArtifactId (io.cdap.cdap.api.artifact.ArtifactId)2 ArtifactVersion (io.cdap.cdap.api.artifact.ArtifactVersion)2 PipelinePhase (io.cdap.cdap.etl.common.PipelinePhase)2 ETLTransformationPushdown (io.cdap.cdap.etl.proto.v2.ETLTransformationPushdown)2 ArtifactVersionRange (io.cdap.cdap.api.artifact.ArtifactVersionRange)1 Schema (io.cdap.cdap.api.data.schema.Schema)1 MacroEvaluator (io.cdap.cdap.api.macro.MacroEvaluator)1 MacroParserOptions (io.cdap.cdap.api.macro.MacroParserOptions)1 InvalidPluginConfigException (io.cdap.cdap.api.plugin.InvalidPluginConfigException)1 PluginProperties (io.cdap.cdap.api.plugin.PluginProperties)1 AbstractSpark (io.cdap.cdap.api.spark.AbstractSpark)1