use of io.cdap.cdap.etl.spark.batch.ETLSpark in project cdap by caskdata.
the class SmartWorkflow method addProgram.
private WorkflowProgramAdder addProgram(String phaseName, WorkflowProgramAdder programAdder) {
PipelinePhase phase = plan.getPhase(phaseName);
// artificially added by the control dag flattening process. So nothing to add, skip it
if (phase == null) {
return programAdder;
}
// can't use phase name as a program name because it might contain invalid characters
String programName = "phase-" + phaseNum;
phaseNum++;
BatchPhaseSpec batchPhaseSpec = getPhaseSpec(programName, phase);
Set<String> pluginTypes = batchPhaseSpec.getPhase().getPluginTypes();
if (pluginTypes.contains(Action.PLUGIN_TYPE)) {
// actions will be all by themselves in a phase
programAdder.addAction(new PipelineAction(batchPhaseSpec));
} else if (pluginTypes.contains(Condition.PLUGIN_TYPE)) {
// conditions will be all by themselves in a phase
programAdder = programAdder.condition(new PipelineCondition(batchPhaseSpec));
} else if (pluginTypes.contains(Constants.SPARK_PROGRAM_PLUGIN_TYPE)) {
// spark programs will be all by themselves in a phase
String stageName = phase.getStagesOfType(Constants.SPARK_PROGRAM_PLUGIN_TYPE).iterator().next().getName();
StageSpec stageSpec = stageSpecs.get(stageName);
applicationConfigurer.addSpark(new ExternalSparkProgram(batchPhaseSpec, stageSpec, applicationConfigurer.getRuntimeConfigurer(), applicationConfigurer.getDeployedNamespace()));
programAdder.addSpark(programName);
} else if (useSpark) {
applicationConfigurer.addSpark(new ETLSpark(batchPhaseSpec, applicationConfigurer.getRuntimeConfigurer(), applicationConfigurer.getDeployedNamespace()));
programAdder.addSpark(programName);
} else {
applicationConfigurer.addMapReduce(new ETLMapReduce(batchPhaseSpec, new HashSet<>(connectorDatasets.values()), applicationConfigurer.getRuntimeConfigurer(), applicationConfigurer.getDeployedNamespace()));
programAdder.addMapReduce(programName);
}
return programAdder;
}
Aggregations