Search in sources :

Example 1 with BatchPipelineSpecGenerator

use of co.cask.cdap.etl.batch.BatchPipelineSpecGenerator in project cdap by caskdata.

the class PipelineSpecGeneratorTest method setupTests.

@BeforeClass
public static void setupTests() {
    // populate some mock plugins.
    MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
    Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
    pluginConfigurer.addMockPlugin(BatchSource.PLUGIN_TYPE, "mocksource", MockPlugin.builder().setOutputSchema(SCHEMA_A).build(), artifactIds);
    pluginConfigurer.addMockPlugin(Transform.PLUGIN_TYPE, "mockA", MockPlugin.builder().setOutputSchema(SCHEMA_A).setErrorSchema(SCHEMA_B).build(), artifactIds);
    pluginConfigurer.addMockPlugin(Transform.PLUGIN_TYPE, "mockB", MockPlugin.builder().setOutputSchema(SCHEMA_B).build(), artifactIds);
    pluginConfigurer.addMockPlugin(BatchSink.PLUGIN_TYPE, "mocksink", MockPlugin.builder().build(), artifactIds);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "mockaction", MockPlugin.builder().build(), artifactIds);
    pluginConfigurer.addMockPlugin(BatchJoiner.PLUGIN_TYPE, "mockjoiner", MockPlugin.builder().build(), artifactIds);
    pluginConfigurer.addMockPlugin(ErrorTransform.PLUGIN_TYPE, "mockerror", MockPlugin.builder().build(), artifactIds);
    specGenerator = new BatchPipelineSpecGenerator(pluginConfigurer, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), FileSet.class, DatasetProperties.EMPTY, Engine.MAPREDUCE);
}
Also used : ArtifactId(co.cask.cdap.api.artifact.ArtifactId) FileSet(co.cask.cdap.api.dataset.lib.FileSet) BatchPipelineSpecGenerator(co.cask.cdap.etl.batch.BatchPipelineSpecGenerator) MockPluginConfigurer(co.cask.cdap.etl.common.MockPluginConfigurer) BeforeClass(org.junit.BeforeClass)

Example 2 with BatchPipelineSpecGenerator

use of co.cask.cdap.etl.batch.BatchPipelineSpecGenerator in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testPipelineProperties.

@Test
public void testPipelineProperties() {
    // populate some mock plugins.
    MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
    Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action1", MockPlugin.builder().putPipelineProperty("prop1", "val1").putPipelineProperty("prop2", "val2").build(), artifactIds);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action2", MockPlugin.builder().putPipelineProperty("prop2", "val2").build(), artifactIds);
    PipelineSpecGenerator specGenerator = new BatchPipelineSpecGenerator(pluginConfigurer, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), FileSet.class, DatasetProperties.EMPTY, Engine.MAPREDUCE);
    Map<String, String> empty = ImmutableMap.of();
    ETLConfig config = ETLBatchConfig.builder("* * * * *").setProperties(ImmutableMap.of("system.spark.spark.test", "abc", "system.mapreduce.prop3", "val3")).addStage(new ETLStage("a1", new ETLPlugin("action1", Action.PLUGIN_TYPE, empty))).addStage(new ETLStage("a2", new ETLPlugin("action2", Action.PLUGIN_TYPE, empty))).addConnection("a1", "a2").setEngine(Engine.MAPREDUCE).build();
    PipelineSpec actual = specGenerator.generateSpec(config);
    PipelineSpec expected = BatchPipelineSpec.builder().addConnection("a1", "a2").setProperties(ImmutableMap.of("prop1", "val1", "prop2", "val2", "prop3", "val3")).addStage(StageSpec.builder("a1", new PluginSpec(Action.PLUGIN_TYPE, "action1", empty, ARTIFACT_ID)).addOutputs("a2").build()).addStage(StageSpec.builder("a2", new PluginSpec(Action.PLUGIN_TYPE, "action2", empty, ARTIFACT_ID)).addInputs("a1").build()).setResources(new Resources(1024)).setDriverResources(new Resources(1024)).setClientResources(new Resources(1024)).build();
    Assert.assertEquals(expected, actual);
}
Also used : ArtifactId(co.cask.cdap.api.artifact.ArtifactId) BatchPipelineSpecGenerator(co.cask.cdap.etl.batch.BatchPipelineSpecGenerator) ETLPlugin(co.cask.cdap.etl.proto.v2.ETLPlugin) MockPluginConfigurer(co.cask.cdap.etl.common.MockPluginConfigurer) ETLConfig(co.cask.cdap.etl.proto.v2.ETLConfig) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) BatchPipelineSpec(co.cask.cdap.etl.batch.BatchPipelineSpec) BatchPipelineSpecGenerator(co.cask.cdap.etl.batch.BatchPipelineSpecGenerator) Resources(co.cask.cdap.api.Resources) Test(org.junit.Test)

Example 3 with BatchPipelineSpecGenerator

use of co.cask.cdap.etl.batch.BatchPipelineSpecGenerator in project cdap by caskdata.

the class PipelineSpecGeneratorTest method testConflictingPipelineProperties.

@Test(expected = IllegalArgumentException.class)
public void testConflictingPipelineProperties() {
    // populate some mock plugins.
    MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
    Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action1", MockPlugin.builder().putPipelineProperty("prop1", "val1").build(), artifactIds);
    pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action2", MockPlugin.builder().putPipelineProperty("prop1", "val2").build(), artifactIds);
    PipelineSpecGenerator specGenerator = new BatchPipelineSpecGenerator(pluginConfigurer, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), FileSet.class, DatasetProperties.EMPTY, Engine.MAPREDUCE);
    Map<String, String> empty = ImmutableMap.of();
    ETLConfig config = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("a1", new ETLPlugin("action1", Action.PLUGIN_TYPE, empty))).addStage(new ETLStage("a2", new ETLPlugin("action2", Action.PLUGIN_TYPE, empty))).addConnection("a1", "a2").setEngine(Engine.MAPREDUCE).build();
    specGenerator.generateSpec(config);
}
Also used : ArtifactId(co.cask.cdap.api.artifact.ArtifactId) BatchPipelineSpecGenerator(co.cask.cdap.etl.batch.BatchPipelineSpecGenerator) ETLStage(co.cask.cdap.etl.proto.v2.ETLStage) ETLPlugin(co.cask.cdap.etl.proto.v2.ETLPlugin) BatchPipelineSpecGenerator(co.cask.cdap.etl.batch.BatchPipelineSpecGenerator) MockPluginConfigurer(co.cask.cdap.etl.common.MockPluginConfigurer) ETLConfig(co.cask.cdap.etl.proto.v2.ETLConfig) Test(org.junit.Test)

Example 4 with BatchPipelineSpecGenerator

use of co.cask.cdap.etl.batch.BatchPipelineSpecGenerator in project cdap by caskdata.

the class DataPipelineApp method configure.

@Override
public void configure() {
    ETLBatchConfig config = getConfig();
    setDescription(DEFAULT_DESCRIPTION);
    PipelineSpecGenerator<ETLBatchConfig, BatchPipelineSpec> specGenerator = new BatchPipelineSpecGenerator(getConfigurer(), ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE, SparkSink.PLUGIN_TYPE), TimePartitionedFileSet.class, FileSetProperties.builder().setInputFormat(AvroKeyInputFormat.class).setOutputFormat(AvroKeyOutputFormat.class).setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", Constants.ERROR_SCHEMA.toString()).build(), config.getEngine());
    BatchPipelineSpec spec = specGenerator.generateSpec(config);
    addWorkflow(new SmartWorkflow(spec, supportedPluginTypes, getConfigurer(), config.getEngine()));
    Schedules.Builder scheduleBuilder = Schedules.builder(SCHEDULE_NAME).setDescription("Data pipeline schedule");
    Integer maxConcurrentRuns = config.getMaxConcurrentRuns();
    if (maxConcurrentRuns != null) {
        scheduleBuilder.setMaxConcurrentRuns(maxConcurrentRuns);
    }
    scheduleWorkflow(scheduleBuilder.createTimeSchedule(config.getSchedule()), SmartWorkflow.NAME);
}
Also used : ETLBatchConfig(co.cask.cdap.etl.proto.v2.ETLBatchConfig) BatchPipelineSpec(co.cask.cdap.etl.batch.BatchPipelineSpec) Schedules(co.cask.cdap.api.schedule.Schedules) BatchPipelineSpecGenerator(co.cask.cdap.etl.batch.BatchPipelineSpecGenerator) AvroKeyOutputFormat(org.apache.avro.mapreduce.AvroKeyOutputFormat)

Aggregations

BatchPipelineSpecGenerator (co.cask.cdap.etl.batch.BatchPipelineSpecGenerator)4 ArtifactId (co.cask.cdap.api.artifact.ArtifactId)3 MockPluginConfigurer (co.cask.cdap.etl.common.MockPluginConfigurer)3 BatchPipelineSpec (co.cask.cdap.etl.batch.BatchPipelineSpec)2 ETLConfig (co.cask.cdap.etl.proto.v2.ETLConfig)2 ETLPlugin (co.cask.cdap.etl.proto.v2.ETLPlugin)2 ETLStage (co.cask.cdap.etl.proto.v2.ETLStage)2 Test (org.junit.Test)2 Resources (co.cask.cdap.api.Resources)1 FileSet (co.cask.cdap.api.dataset.lib.FileSet)1 Schedules (co.cask.cdap.api.schedule.Schedules)1 ETLBatchConfig (co.cask.cdap.etl.proto.v2.ETLBatchConfig)1 AvroKeyOutputFormat (org.apache.avro.mapreduce.AvroKeyOutputFormat)1 BeforeClass (org.junit.BeforeClass)1