Search in sources :

Example 1 with ControlDag

use of co.cask.cdap.etl.planner.ControlDag in project cdap by caskdata.

the class SmartWorkflow method configure.

@Override
protected void configure() {
    setName(NAME);
    setDescription(DESCRIPTION);
    // set the pipeline spec as a property in case somebody like the UI wants to read it
    Map<String, String> properties = new HashMap<>();
    properties.put(Constants.PIPELINE_SPEC_KEY, GSON.toJson(spec));
    setProperties(properties);
    stageSpecs = new HashMap<>();
    useSpark = engine == Engine.SPARK;
    if (!useSpark) {
        for (StageSpec stageSpec : spec.getStages()) {
            stageSpecs.put(stageSpec.getName(), stageSpec);
            String pluginType = stageSpec.getPlugin().getType();
            if (SparkCompute.PLUGIN_TYPE.equals(pluginType) || SparkSink.PLUGIN_TYPE.equals(pluginType)) {
                useSpark = true;
                break;
            }
        }
    }
    PipelinePlanner planner;
    Set<String> actionTypes = ImmutableSet.of(Action.PLUGIN_TYPE, Constants.SPARK_PROGRAM_PLUGIN_TYPE);
    if (useSpark) {
        // if the pipeline uses spark, we don't need to break the pipeline up into phases, we can just have
        // a single phase.
        planner = new PipelinePlanner(supportedPluginTypes, ImmutableSet.<String>of(), ImmutableSet.<String>of(), actionTypes);
    } else {
        planner = new PipelinePlanner(supportedPluginTypes, ImmutableSet.of(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE), ImmutableSet.of(SparkCompute.PLUGIN_TYPE, SparkSink.PLUGIN_TYPE), actionTypes);
    }
    plan = planner.plan(spec);
    // single phase, just add the program directly
    if (plan.getPhases().size() == 1) {
        addProgram(plan.getPhases().keySet().iterator().next(), new TrunkProgramAdder(getConfigurer()));
        return;
    }
    // Dag classes don't allow a 'dag' without connections
    if (plan.getPhaseConnections().isEmpty()) {
        WorkflowProgramAdder programAdder;
        // multiple phases, do a fork then join
        WorkflowForkConfigurer forkConfigurer = getConfigurer().fork();
        programAdder = new BranchProgramAdder(forkConfigurer);
        for (String phaseName : plan.getPhases().keySet()) {
            addProgram(phaseName, programAdder);
        }
        if (forkConfigurer != null) {
            forkConfigurer.join();
        }
        return;
    }
    dag = new ControlDag(plan.getPhaseConnections());
    // after flattening, there is guaranteed to be just one source
    dag.flatten();
    String start = dag.getSources().iterator().next();
    addPrograms(start, getConfigurer());
}
Also used : WorkflowForkConfigurer(co.cask.cdap.api.workflow.WorkflowForkConfigurer) ControlDag(co.cask.cdap.etl.planner.ControlDag) PipelinePlanner(co.cask.cdap.etl.planner.PipelinePlanner) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) StageSpec(co.cask.cdap.etl.spec.StageSpec)

Aggregations

WorkflowForkConfigurer (co.cask.cdap.api.workflow.WorkflowForkConfigurer)1 ControlDag (co.cask.cdap.etl.planner.ControlDag)1 PipelinePlanner (co.cask.cdap.etl.planner.PipelinePlanner)1 StageSpec (co.cask.cdap.etl.spec.StageSpec)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1