Search in sources :

Example 1 with ConnectorSource

use of co.cask.cdap.etl.batch.connector.ConnectorSource in project cdap by caskdata.

the class SmartWorkflow method addProgram.

private void addProgram(String phaseName, WorkflowProgramAdder programAdder) {
    PipelinePhase phase = plan.getPhase(phaseName);
    // artificially added by the control dag flattening process. So nothing to add, skip it
    if (phase == null) {
        return;
    }
    // can't use phase name as a program name because it might contain invalid characters
    String programName = "phase-" + phaseNum;
    phaseNum++;
    // if this phase uses connectors, add the local dataset for that connector if we haven't already
    for (StageInfo connectorInfo : phase.getStagesOfType(Constants.CONNECTOR_TYPE)) {
        String connectorName = connectorInfo.getName();
        String datasetName = connectorDatasets.get(connectorName);
        if (datasetName == null) {
            datasetName = "conn-" + connectorNum++;
            connectorDatasets.put(connectorName, datasetName);
            // add the local dataset
            ConnectorSource connectorSource = new ConnectorSource(datasetName, null);
            connectorSource.configure(getConfigurer());
        }
    }
    Map<String, String> phaseConnectorDatasets = new HashMap<>();
    for (StageInfo connectorStage : phase.getStagesOfType(Constants.CONNECTOR_TYPE)) {
        phaseConnectorDatasets.put(connectorStage.getName(), connectorDatasets.get(connectorStage.getName()));
    }
    BatchPhaseSpec batchPhaseSpec = new BatchPhaseSpec(programName, phase, spec.getResources(), spec.getDriverResources(), spec.getClientResources(), spec.isStageLoggingEnabled(), spec.isProcessTimingEnabled(), phaseConnectorDatasets, spec.getNumOfRecordsPreview(), spec.getProperties());
    Set<String> pluginTypes = batchPhaseSpec.getPhase().getPluginTypes();
    if (pluginTypes.contains(Action.PLUGIN_TYPE)) {
        // actions will be all by themselves in a phase
        programAdder.addAction(new PipelineAction(batchPhaseSpec));
    } else if (pluginTypes.contains(Constants.SPARK_PROGRAM_PLUGIN_TYPE)) {
        // spark programs will be all by themselves in a phase
        String stageName = phase.getStagesOfType(Constants.SPARK_PROGRAM_PLUGIN_TYPE).iterator().next().getName();
        StageSpec stageSpec = stageSpecs.get(stageName);
        applicationConfigurer.addSpark(new ExternalSparkProgram(batchPhaseSpec, stageSpec));
        programAdder.addSpark(programName);
    } else if (useSpark) {
        applicationConfigurer.addSpark(new ETLSpark(batchPhaseSpec));
        programAdder.addSpark(programName);
    } else {
        applicationConfigurer.addMapReduce(new ETLMapReduce(batchPhaseSpec));
        programAdder.addMapReduce(programName);
    }
}
Also used : ETLMapReduce(co.cask.cdap.etl.batch.mapreduce.ETLMapReduce) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) StageInfo(co.cask.cdap.etl.planner.StageInfo) PipelineAction(co.cask.cdap.etl.batch.customaction.PipelineAction) ConnectorSource(co.cask.cdap.etl.batch.connector.ConnectorSource) ETLSpark(co.cask.cdap.etl.spark.batch.ETLSpark) PipelinePhase(co.cask.cdap.etl.common.PipelinePhase) StageSpec(co.cask.cdap.etl.spec.StageSpec) BatchPhaseSpec(co.cask.cdap.etl.batch.BatchPhaseSpec)

Aggregations

BatchPhaseSpec (co.cask.cdap.etl.batch.BatchPhaseSpec)1 ConnectorSource (co.cask.cdap.etl.batch.connector.ConnectorSource)1 PipelineAction (co.cask.cdap.etl.batch.customaction.PipelineAction)1 ETLMapReduce (co.cask.cdap.etl.batch.mapreduce.ETLMapReduce)1 PipelinePhase (co.cask.cdap.etl.common.PipelinePhase)1 StageInfo (co.cask.cdap.etl.planner.StageInfo)1 ETLSpark (co.cask.cdap.etl.spark.batch.ETLSpark)1 StageSpec (co.cask.cdap.etl.spec.StageSpec)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1