Search in sources :

Example 6 with BatchPhaseSpec

use of co.cask.cdap.etl.batch.BatchPhaseSpec in project cdap by caskdata.

the class SmartWorkflow method addProgram.

private void addProgram(String phaseName, WorkflowProgramAdder programAdder) {
    PipelinePhase phase = plan.getPhase(phaseName);
    // artificially added by the control dag flattening process. So nothing to add, skip it
    if (phase == null) {
        return;
    }
    // can't use phase name as a program name because it might contain invalid characters
    String programName = "phase-" + phaseNum;
    phaseNum++;
    // if this phase uses connectors, add the local dataset for that connector if we haven't already
    for (StageInfo connectorInfo : phase.getStagesOfType(Constants.CONNECTOR_TYPE)) {
        String connectorName = connectorInfo.getName();
        String datasetName = connectorDatasets.get(connectorName);
        if (datasetName == null) {
            datasetName = "conn-" + connectorNum++;
            connectorDatasets.put(connectorName, datasetName);
            // add the local dataset
            ConnectorSource connectorSource = new ConnectorSource(datasetName, null);
            connectorSource.configure(getConfigurer());
        }
    }
    Map<String, String> phaseConnectorDatasets = new HashMap<>();
    for (StageInfo connectorStage : phase.getStagesOfType(Constants.CONNECTOR_TYPE)) {
        phaseConnectorDatasets.put(connectorStage.getName(), connectorDatasets.get(connectorStage.getName()));
    }
    BatchPhaseSpec batchPhaseSpec = new BatchPhaseSpec(programName, phase, spec.getResources(), spec.getDriverResources(), spec.getClientResources(), spec.isStageLoggingEnabled(), spec.isProcessTimingEnabled(), phaseConnectorDatasets, spec.getNumOfRecordsPreview(), spec.getProperties());
    Set<String> pluginTypes = batchPhaseSpec.getPhase().getPluginTypes();
    if (pluginTypes.contains(Action.PLUGIN_TYPE)) {
        // actions will be all by themselves in a phase
        programAdder.addAction(new PipelineAction(batchPhaseSpec));
    } else if (pluginTypes.contains(Constants.SPARK_PROGRAM_PLUGIN_TYPE)) {
        // spark programs will be all by themselves in a phase
        String stageName = phase.getStagesOfType(Constants.SPARK_PROGRAM_PLUGIN_TYPE).iterator().next().getName();
        StageSpec stageSpec = stageSpecs.get(stageName);
        applicationConfigurer.addSpark(new ExternalSparkProgram(batchPhaseSpec, stageSpec));
        programAdder.addSpark(programName);
    } else if (useSpark) {
        applicationConfigurer.addSpark(new ETLSpark(batchPhaseSpec));
        programAdder.addSpark(programName);
    } else {
        applicationConfigurer.addMapReduce(new ETLMapReduce(batchPhaseSpec));
        programAdder.addMapReduce(programName);
    }
}
Also used : ETLMapReduce(co.cask.cdap.etl.batch.mapreduce.ETLMapReduce) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) StageInfo(co.cask.cdap.etl.planner.StageInfo) PipelineAction(co.cask.cdap.etl.batch.customaction.PipelineAction) ConnectorSource(co.cask.cdap.etl.batch.connector.ConnectorSource) ETLSpark(co.cask.cdap.etl.spark.batch.ETLSpark) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) StageSpec(co.cask.cdap.etl.spec.StageSpec) BatchPhaseSpec(co.cask.cdap.etl.batch.BatchPhaseSpec)

Aggregations

BatchPhaseSpec (co.cask.cdap.etl.batch.BatchPhaseSpec)6 DefaultMacroEvaluator (co.cask.cdap.etl.common.DefaultMacroEvaluator)4 StageInfo (co.cask.cdap.etl.planner.StageInfo)4 HashMap (java.util.HashMap)4 MacroEvaluator (co.cask.cdap.api.macro.MacroEvaluator)3 PipelinePhase (co.cask.cdap.etl.common.PipelinePhase)3 PipelinePluginContext (co.cask.cdap.etl.common.plugin.PipelinePluginContext)3 Map (java.util.Map)3 PluginContext (co.cask.cdap.api.plugin.PluginContext)2 BatchAggregator (co.cask.cdap.etl.api.batch.BatchAggregator)2 BatchJoiner (co.cask.cdap.etl.api.batch.BatchJoiner)2 BatchSinkContext (co.cask.cdap.etl.api.batch.BatchSinkContext)2 BatchSourceContext (co.cask.cdap.etl.api.batch.BatchSourceContext)2 DefaultAggregatorContext (co.cask.cdap.etl.batch.DefaultAggregatorContext)2 DefaultJoinerContext (co.cask.cdap.etl.batch.DefaultJoinerContext)2 CompositeFinisher (co.cask.cdap.etl.common.CompositeFinisher)2 SparkPipelinePluginContext (co.cask.cdap.etl.spark.plugin.SparkPipelinePluginContext)2 AbstractCustomAction (co.cask.cdap.api.customaction.AbstractCustomAction)1 CustomAction (co.cask.cdap.api.customaction.CustomAction)1 CustomActionContext (co.cask.cdap.api.customaction.CustomActionContext)1