use of co.cask.cdap.etl.batch.BatchPipelineSpecGenerator in project cdap by caskdata.
the class PipelineSpecGeneratorTest method setupTests.
@BeforeClass
public static void setupTests() {
// populate some mock plugins.
MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
pluginConfigurer.addMockPlugin(BatchSource.PLUGIN_TYPE, "mocksource", MockPlugin.builder().setOutputSchema(SCHEMA_A).build(), artifactIds);
pluginConfigurer.addMockPlugin(Transform.PLUGIN_TYPE, "mockA", MockPlugin.builder().setOutputSchema(SCHEMA_A).setErrorSchema(SCHEMA_B).build(), artifactIds);
pluginConfigurer.addMockPlugin(Transform.PLUGIN_TYPE, "mockB", MockPlugin.builder().setOutputSchema(SCHEMA_B).build(), artifactIds);
pluginConfigurer.addMockPlugin(BatchSink.PLUGIN_TYPE, "mocksink", MockPlugin.builder().build(), artifactIds);
pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "mockaction", MockPlugin.builder().build(), artifactIds);
pluginConfigurer.addMockPlugin(BatchJoiner.PLUGIN_TYPE, "mockjoiner", MockPlugin.builder().build(), artifactIds);
pluginConfigurer.addMockPlugin(ErrorTransform.PLUGIN_TYPE, "mockerror", MockPlugin.builder().build(), artifactIds);
specGenerator = new BatchPipelineSpecGenerator(pluginConfigurer, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), FileSet.class, DatasetProperties.EMPTY, Engine.MAPREDUCE);
}
use of co.cask.cdap.etl.batch.BatchPipelineSpecGenerator in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testPipelineProperties.
@Test
public void testPipelineProperties() {
// populate some mock plugins.
MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action1", MockPlugin.builder().putPipelineProperty("prop1", "val1").putPipelineProperty("prop2", "val2").build(), artifactIds);
pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action2", MockPlugin.builder().putPipelineProperty("prop2", "val2").build(), artifactIds);
PipelineSpecGenerator specGenerator = new BatchPipelineSpecGenerator(pluginConfigurer, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), FileSet.class, DatasetProperties.EMPTY, Engine.MAPREDUCE);
Map<String, String> empty = ImmutableMap.of();
ETLConfig config = ETLBatchConfig.builder("* * * * *").setProperties(ImmutableMap.of("system.spark.spark.test", "abc", "system.mapreduce.prop3", "val3")).addStage(new ETLStage("a1", new ETLPlugin("action1", Action.PLUGIN_TYPE, empty))).addStage(new ETLStage("a2", new ETLPlugin("action2", Action.PLUGIN_TYPE, empty))).addConnection("a1", "a2").setEngine(Engine.MAPREDUCE).build();
PipelineSpec actual = specGenerator.generateSpec(config);
PipelineSpec expected = BatchPipelineSpec.builder().addConnection("a1", "a2").setProperties(ImmutableMap.of("prop1", "val1", "prop2", "val2", "prop3", "val3")).addStage(StageSpec.builder("a1", new PluginSpec(Action.PLUGIN_TYPE, "action1", empty, ARTIFACT_ID)).addOutputs("a2").build()).addStage(StageSpec.builder("a2", new PluginSpec(Action.PLUGIN_TYPE, "action2", empty, ARTIFACT_ID)).addInputs("a1").build()).setResources(new Resources(1024)).setDriverResources(new Resources(1024)).setClientResources(new Resources(1024)).build();
Assert.assertEquals(expected, actual);
}
use of co.cask.cdap.etl.batch.BatchPipelineSpecGenerator in project cdap by caskdata.
the class PipelineSpecGeneratorTest method testConflictingPipelineProperties.
@Test(expected = IllegalArgumentException.class)
public void testConflictingPipelineProperties() {
// populate some mock plugins.
MockPluginConfigurer pluginConfigurer = new MockPluginConfigurer();
Set<ArtifactId> artifactIds = ImmutableSet.of(ARTIFACT_ID);
pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action1", MockPlugin.builder().putPipelineProperty("prop1", "val1").build(), artifactIds);
pluginConfigurer.addMockPlugin(Action.PLUGIN_TYPE, "action2", MockPlugin.builder().putPipelineProperty("prop1", "val2").build(), artifactIds);
PipelineSpecGenerator specGenerator = new BatchPipelineSpecGenerator(pluginConfigurer, ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE), FileSet.class, DatasetProperties.EMPTY, Engine.MAPREDUCE);
Map<String, String> empty = ImmutableMap.of();
ETLConfig config = ETLBatchConfig.builder("* * * * *").addStage(new ETLStage("a1", new ETLPlugin("action1", Action.PLUGIN_TYPE, empty))).addStage(new ETLStage("a2", new ETLPlugin("action2", Action.PLUGIN_TYPE, empty))).addConnection("a1", "a2").setEngine(Engine.MAPREDUCE).build();
specGenerator.generateSpec(config);
}
use of co.cask.cdap.etl.batch.BatchPipelineSpecGenerator in project cdap by caskdata.
the class DataPipelineApp method configure.
@Override
public void configure() {
ETLBatchConfig config = getConfig();
setDescription(DEFAULT_DESCRIPTION);
PipelineSpecGenerator<ETLBatchConfig, BatchPipelineSpec> specGenerator = new BatchPipelineSpecGenerator(getConfigurer(), ImmutableSet.of(BatchSource.PLUGIN_TYPE), ImmutableSet.of(BatchSink.PLUGIN_TYPE, SparkSink.PLUGIN_TYPE), TimePartitionedFileSet.class, FileSetProperties.builder().setInputFormat(AvroKeyInputFormat.class).setOutputFormat(AvroKeyOutputFormat.class).setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", Constants.ERROR_SCHEMA.toString()).build(), config.getEngine());
BatchPipelineSpec spec = specGenerator.generateSpec(config);
addWorkflow(new SmartWorkflow(spec, supportedPluginTypes, getConfigurer(), config.getEngine()));
Schedules.Builder scheduleBuilder = Schedules.builder(SCHEDULE_NAME).setDescription("Data pipeline schedule");
Integer maxConcurrentRuns = config.getMaxConcurrentRuns();
if (maxConcurrentRuns != null) {
scheduleBuilder.setMaxConcurrentRuns(maxConcurrentRuns);
}
scheduleWorkflow(scheduleBuilder.createTimeSchedule(config.getSchedule()), SmartWorkflow.NAME);
}
Aggregations