Search in sources :

Example 1 with ETLSpark

use of io.cdap.cdap.etl.spark.batch.ETLSpark in project cdap by caskdata.

the class SmartWorkflow method addProgram.

private WorkflowProgramAdder addProgram(String phaseName, WorkflowProgramAdder programAdder) {
    PipelinePhase phase = plan.getPhase(phaseName);
    // artificially added by the control dag flattening process. So nothing to add, skip it
    if (phase == null) {
        return programAdder;
    }
    // can't use phase name as a program name because it might contain invalid characters
    String programName = "phase-" + phaseNum;
    phaseNum++;
    BatchPhaseSpec batchPhaseSpec = getPhaseSpec(programName, phase);
    Set<String> pluginTypes = batchPhaseSpec.getPhase().getPluginTypes();
    if (pluginTypes.contains(Action.PLUGIN_TYPE)) {
        // actions will be all by themselves in a phase
        programAdder.addAction(new PipelineAction(batchPhaseSpec));
    } else if (pluginTypes.contains(Condition.PLUGIN_TYPE)) {
        // conditions will be all by themselves in a phase
        programAdder = programAdder.condition(new PipelineCondition(batchPhaseSpec));
    } else if (pluginTypes.contains(Constants.SPARK_PROGRAM_PLUGIN_TYPE)) {
        // spark programs will be all by themselves in a phase
        String stageName = phase.getStagesOfType(Constants.SPARK_PROGRAM_PLUGIN_TYPE).iterator().next().getName();
        StageSpec stageSpec = stageSpecs.get(stageName);
        applicationConfigurer.addSpark(new ExternalSparkProgram(batchPhaseSpec, stageSpec, applicationConfigurer.getRuntimeConfigurer(), applicationConfigurer.getDeployedNamespace()));
        programAdder.addSpark(programName);
    } else if (useSpark) {
        applicationConfigurer.addSpark(new ETLSpark(batchPhaseSpec, applicationConfigurer.getRuntimeConfigurer(), applicationConfigurer.getDeployedNamespace()));
        programAdder.addSpark(programName);
    } else {
        applicationConfigurer.addMapReduce(new ETLMapReduce(batchPhaseSpec, new HashSet<>(connectorDatasets.values()), applicationConfigurer.getRuntimeConfigurer(), applicationConfigurer.getDeployedNamespace()));
        programAdder.addMapReduce(programName);
    }
    return programAdder;
}
Also used : ETLSpark(io.cdap.cdap.etl.spark.batch.ETLSpark) ETLMapReduce(io.cdap.cdap.etl.batch.mapreduce.ETLMapReduce) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) BatchPhaseSpec(io.cdap.cdap.etl.batch.BatchPhaseSpec) PipelineCondition(io.cdap.cdap.etl.batch.condition.PipelineCondition) PipelineAction(io.cdap.cdap.etl.batch.customaction.PipelineAction)

Aggregations

BatchPhaseSpec (io.cdap.cdap.etl.batch.BatchPhaseSpec)1 PipelineCondition (io.cdap.cdap.etl.batch.condition.PipelineCondition)1 PipelineAction (io.cdap.cdap.etl.batch.customaction.PipelineAction)1 ETLMapReduce (io.cdap.cdap.etl.batch.mapreduce.ETLMapReduce)1 PipelinePhase (io.cdap.cdap.etl.common.PipelinePhase)1 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)1 ETLSpark (io.cdap.cdap.etl.spark.batch.ETLSpark)1