Search in sources :

Example 1 with PipeStage

use of co.cask.cdap.etl.batch.PipeStage in project cdap by caskdata.

the class MapReduceTransformExecutorFactory method getPipeStage.

private PipeStage getPipeStage(PipelinePhase pipeline, String stageName, Map<String, PipeStage> pipeStages, OutputWriter<?, ?> outputWriter) throws Exception {
    StageSpec stageSpec = pipeline.getStage(stageName);
    String pluginType = stageSpec.getPluginType();
    // handle ending stage case, which don't use PipeEmitter
    if (pipeline.getSinks().contains(stageName)) {
        if (Constants.Connector.PLUGIN_TYPE.equals(pluginType) || BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
            // connectors and joiners require the getting the RecordInfo class directly instead of unwrapping it
            Transformation<RecordInfo<Object>, Object> sink = getTransformation(stageSpec);
            return new DirectOutputPipeStage<>(stageName, sink, new SinkEmitter<>(stageName, outputWriter));
        } else {
            // others (batchsink, aggregators, alertpublisher), only required the value within the RecordInfo
            return new UnwrapPipeStage<>(stageName, getTransformation(stageSpec), new SinkEmitter<>(stageName, outputWriter));
        }
    }
    // create PipeEmitter, which holds all output PipeStages it needs to write to and wraps any output it gets
    // into a RecordInfo
    // ConnectorSources require a special emitter since they need to build RecordInfo from the temporary dataset
    PipeEmitter.Builder emitterBuilder = Constants.Connector.PLUGIN_TYPE.equals(pluginType) && pipeline.getSources().contains(stageName) ? ConnectorSourceEmitter.builder(stageName) : PipeEmitter.builder(stageName);
    Map<String, StageSpec.Port> outputPorts = stageSpec.getOutputPorts();
    for (String outputStageName : pipeline.getDag().getNodeOutputs(stageName)) {
        StageSpec outputStageSpec = pipeline.getStage(outputStageName);
        String outputStageType = outputStageSpec.getPluginType();
        PipeStage outputPipeStage = pipeStages.get(outputStageName);
        if (ErrorTransform.PLUGIN_TYPE.equals(outputStageType)) {
            emitterBuilder.addErrorConsumer(outputPipeStage);
        } else if (AlertPublisher.PLUGIN_TYPE.equals(outputStageType)) {
            emitterBuilder.addAlertConsumer(outputPipeStage);
        } else if (Constants.Connector.PLUGIN_TYPE.equals(pluginType)) {
            // connectors only have a single output
            emitterBuilder.addOutputConsumer(outputPipeStage);
        } else {
            // if the output is a connector like agg5.connector, the outputPorts will contain the original 'agg5' as
            // a key, but not 'agg5.connector' so we need to lookup the original stage from the connector's plugin spec
            String originalOutputName = Constants.Connector.PLUGIN_TYPE.equals(outputStageType) ? outputStageSpec.getPlugin().getProperties().get(Constants.Connector.ORIGINAL_NAME) : outputStageName;
            String port = outputPorts.containsKey(originalOutputName) ? outputPorts.get(originalOutputName).getPort() : null;
            if (port != null) {
                emitterBuilder.addOutputConsumer(outputPipeStage, port);
            } else {
                emitterBuilder.addOutputConsumer(outputPipeStage);
            }
        }
    }
    PipeEmitter pipeEmitter = emitterBuilder.build();
    if (SplitterTransform.PLUGIN_TYPE.equals(pluginType)) {
        // this is a SplitterTransform, needs to emit records to the right outputs based on port
        return new MultiOutputTransformPipeStage<>(stageName, getMultiOutputTransform(stageSpec), pipeEmitter);
    } else {
        return new UnwrapPipeStage<>(stageName, getTransformation(stageSpec), pipeEmitter);
    }
}
Also used : UnwrapPipeStage(co.cask.cdap.etl.batch.UnwrapPipeStage) RecordInfo(co.cask.cdap.etl.common.RecordInfo) UnwrapPipeStage(co.cask.cdap.etl.batch.UnwrapPipeStage) MultiOutputTransformPipeStage(co.cask.cdap.etl.batch.MultiOutputTransformPipeStage) PipeStage(co.cask.cdap.etl.batch.PipeStage) DirectOutputPipeStage(co.cask.cdap.etl.batch.DirectOutputPipeStage) PipeEmitter(co.cask.cdap.etl.batch.PipeEmitter) DirectOutputPipeStage(co.cask.cdap.etl.batch.DirectOutputPipeStage) StageSpec(co.cask.cdap.etl.spec.StageSpec) MultiOutputTransformPipeStage(co.cask.cdap.etl.batch.MultiOutputTransformPipeStage)

Aggregations

DirectOutputPipeStage (co.cask.cdap.etl.batch.DirectOutputPipeStage)1 MultiOutputTransformPipeStage (co.cask.cdap.etl.batch.MultiOutputTransformPipeStage)1 PipeEmitter (co.cask.cdap.etl.batch.PipeEmitter)1 PipeStage (co.cask.cdap.etl.batch.PipeStage)1 UnwrapPipeStage (co.cask.cdap.etl.batch.UnwrapPipeStage)1 RecordInfo (co.cask.cdap.etl.common.RecordInfo)1 StageSpec (co.cask.cdap.etl.spec.StageSpec)1