use of co.cask.cdap.etl.planner.ControlDag in project cdap by caskdata.
the class SmartWorkflow method configure.
@Override
protected void configure() {
setName(NAME);
setDescription(DESCRIPTION);
// set the pipeline spec as a property in case somebody like the UI wants to read it
Map<String, String> properties = new HashMap<>();
properties.put(Constants.PIPELINE_SPEC_KEY, GSON.toJson(spec));
setProperties(properties);
stageSpecs = new HashMap<>();
useSpark = engine == Engine.SPARK;
if (!useSpark) {
for (StageSpec stageSpec : spec.getStages()) {
stageSpecs.put(stageSpec.getName(), stageSpec);
String pluginType = stageSpec.getPlugin().getType();
if (SparkCompute.PLUGIN_TYPE.equals(pluginType) || SparkSink.PLUGIN_TYPE.equals(pluginType)) {
useSpark = true;
break;
}
}
}
PipelinePlanner planner;
Set<String> actionTypes = ImmutableSet.of(Action.PLUGIN_TYPE, Constants.SPARK_PROGRAM_PLUGIN_TYPE);
if (useSpark) {
// if the pipeline uses spark, we don't need to break the pipeline up into phases, we can just have
// a single phase.
planner = new PipelinePlanner(supportedPluginTypes, ImmutableSet.<String>of(), ImmutableSet.<String>of(), actionTypes);
} else {
planner = new PipelinePlanner(supportedPluginTypes, ImmutableSet.of(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE), ImmutableSet.of(SparkCompute.PLUGIN_TYPE, SparkSink.PLUGIN_TYPE), actionTypes);
}
plan = planner.plan(spec);
// single phase, just add the program directly
if (plan.getPhases().size() == 1) {
addProgram(plan.getPhases().keySet().iterator().next(), new TrunkProgramAdder(getConfigurer()));
return;
}
// Dag classes don't allow a 'dag' without connections
if (plan.getPhaseConnections().isEmpty()) {
WorkflowProgramAdder programAdder;
// multiple phases, do a fork then join
WorkflowForkConfigurer forkConfigurer = getConfigurer().fork();
programAdder = new BranchProgramAdder(forkConfigurer);
for (String phaseName : plan.getPhases().keySet()) {
addProgram(phaseName, programAdder);
}
if (forkConfigurer != null) {
forkConfigurer.join();
}
return;
}
dag = new ControlDag(plan.getPhaseConnections());
// after flattening, there is guaranteed to be just one source
dag.flatten();
String start = dag.getSources().iterator().next();
addPrograms(start, getConfigurer());
}
Aggregations