use of co.cask.cdap.etl.spec.PluginSpec in project cdap by caskdata.
the class ExternalSparkProgram method configure.
@Override
protected void configure() {
PluginSpec pluginSpec = stageSpec.getPlugin();
PluginProperties pluginProperties = PluginProperties.builder().addAll(pluginSpec.getProperties()).build();
// use a UUID as plugin ID so that it doesn't clash with anything. Only using the class here to
// check which main class is needed
// TODO: clean this up so that we only get the class once and store it in the PluginSpec instead of getting
// it in the pipeline spec generator and here
Object sparkPlugin = usePlugin(pluginSpec.getType(), pluginSpec.getName(), UUID.randomUUID().toString(), pluginProperties);
if (sparkPlugin == null) {
// should never happen, should have been checked before by the pipeline spec generator
throw new IllegalStateException(String.format("No plugin found of type %s and name %s for stage %s", pluginSpec.getType(), pluginSpec.getName(), STAGE_NAME));
}
if (Spark.class.isAssignableFrom(sparkPlugin.getClass())) {
// TODO: Pass in a forwarding configurer so that we can capture the properties set by the plugin
// However the usage is very limited as the plugin can always use plugin config to preserve properties
((Spark) sparkPlugin).configure(getConfigurer());
} else if (SparkMain.class.isAssignableFrom(sparkPlugin.getClass())) {
setMainClass(ScalaSparkMainWrapper.class);
} else {
setMainClass(JavaSparkMainWrapper.class);
}
setName(phaseSpec.getPhaseName());
Map<String, String> properties = new HashMap<>();
properties.put(STAGE_NAME, stageSpec.getName());
properties.put(Constants.PIPELINEID, GSON.toJson(phaseSpec, BatchPhaseSpec.class));
setProperties(properties);
}
use of co.cask.cdap.etl.spec.PluginSpec in project cdap by caskdata.
the class BatchPhaseSpecTest method testDescription.
@Test
public void testDescription() throws Exception {
/*
* source1 --|
* |--> sink.connector
* source2 --|
*/
Map<String, String> props = new HashMap<>();
PluginSpec connectorSpec = new PluginSpec(Constants.Connector.PLUGIN_TYPE, "connector", ImmutableMap.<String, String>of(), null);
ArtifactId artifactId = new ArtifactId("art", new ArtifactVersion("1.0.0"), ArtifactScope.USER);
PipelinePhase.Builder builder = PipelinePhase.builder(ImmutableSet.of(BatchSource.PLUGIN_TYPE, Constants.Connector.PLUGIN_TYPE)).addStage(StageSpec.builder("source1", new PluginSpec(BatchSource.PLUGIN_TYPE, "src", props, artifactId)).build()).addStage(StageSpec.builder("source2", new PluginSpec(BatchSource.PLUGIN_TYPE, "src", props, artifactId)).addInputSchema("a", Schema.recordOf("stuff", Schema.Field.of("x", Schema.of(Schema.Type.INT)))).build()).addStage(StageSpec.builder("sink.connector", connectorSpec).build()).addConnection("source1", "sink.connector").addConnection("source2", "sink.connector");
BatchPhaseSpec phaseSpec = new BatchPhaseSpec("phase-1", builder.build(), new Resources(), new Resources(), new Resources(), false, false, Collections.<String, String>emptyMap(), 0, Collections.<String, String>emptyMap(), false);
Assert.assertEquals("Sources 'source1', 'source2' to sinks 'sink.connector'.", phaseSpec.getDescription());
}
use of co.cask.cdap.etl.spec.PluginSpec in project cdap by caskdata.
the class PipelinePlanner method dagToPipeline.
/**
* Converts a Dag into a PipelinePhase, using what we know about the plugin type of each node in the dag.
* The PipelinePhase is what programs will take as input, and keeps track of sources, transforms, sinks, etc.
*
* @param dag the dag to convert
* @param connectors connector nodes across all dags
* @param specs specifications for every stage
* @return the converted dag
*/
private PipelinePhase dagToPipeline(Dag dag, Map<String, String> connectors, Map<String, StageSpec> specs, Map<String, String> conditionConnectors) {
PipelinePhase.Builder phaseBuilder = PipelinePhase.builder(supportedPluginTypes);
for (String stageName : dag.getTopologicalOrder()) {
Set<String> outputs = dag.getNodeOutputs(stageName);
if (!outputs.isEmpty()) {
phaseBuilder.addConnections(stageName, outputs);
}
// add connectors
String originalName = connectors.get(stageName);
if (originalName != null || conditionConnectors.values().contains(stageName)) {
String connectorType = dag.getSources().contains(stageName) ? Constants.Connector.SOURCE_TYPE : Constants.Connector.SINK_TYPE;
PluginSpec connectorSpec = new PluginSpec(Constants.Connector.PLUGIN_TYPE, "connector", ImmutableMap.of(Constants.Connector.ORIGINAL_NAME, originalName != null ? originalName : stageName, Constants.Connector.TYPE, connectorType), null);
phaseBuilder.addStage(StageSpec.builder(stageName, connectorSpec).build());
continue;
}
// add other plugin types
StageSpec spec = specs.get(stageName);
phaseBuilder.addStage(spec);
}
return phaseBuilder.build();
}
use of co.cask.cdap.etl.spec.PluginSpec in project cdap by caskdata.
the class BatchPipelineSpecGenerator method generateSpec.
@Override
public BatchPipelineSpec generateSpec(ETLBatchConfig config) {
BatchPipelineSpec.Builder specBuilder = BatchPipelineSpec.builder();
for (ETLStage endingAction : config.getPostActions()) {
String name = endingAction.getName();
DefaultPipelineConfigurer<T> pipelineConfigurer = new DefaultPipelineConfigurer<>(configurer, name, engine);
PluginSpec pluginSpec = configurePlugin(endingAction.getName(), endingAction.getPlugin(), pipelineConfigurer);
specBuilder.addAction(new ActionSpec(name, pluginSpec));
}
configureStages(config, specBuilder);
return specBuilder.build();
}
Aggregations