use of co.cask.cdap.etl.batch.mapreduce.PipeTransformExecutor in project cdap by caskdata.
the class TransformExecutorFactory method create.
/**
* Create a transform executor for the specified pipeline. Will instantiate and initialize all sources,
* transforms, and sinks in the pipeline.
*
* @param pipeline the pipeline to create a transform executor for
* @return executor for the pipeline
* @throws InstantiationException if there was an error instantiating a plugin
* @throws Exception if there was an error initializing a plugin
*/
public <KEY_OUT, VAL_OUT> PipeTransformExecutor<T> create(PipelinePhase pipeline, OutputWriter<KEY_OUT, VAL_OUT> outputWriter, Map<String, ErrorOutputWriter<Object, Object>> transformErrorSinkMap) throws Exception {
Map<String, PipeTransformDetail> transformations = new HashMap<>();
Set<String> sources = pipeline.getSources();
// Set input and output schema for this stage
for (String pluginType : pipeline.getPluginTypes()) {
for (StageInfo stageInfo : pipeline.getStagesOfType(pluginType)) {
String stageName = stageInfo.getName();
outputSchemas.put(stageName, stageInfo.getOutputSchema());
perStageInputSchemas.put(stageName, stageInfo.getInputSchemas());
}
}
// recursively set PipeTransformDetail for all the stages
for (String source : sources) {
setPipeTransformDetail(pipeline, source, transformations, transformErrorSinkMap, outputWriter);
}
// sourceStageName will be null in reducers, so need to handle that case
Set<String> startingPoints = (sourceStageName == null) ? pipeline.getSources() : Sets.newHashSet(sourceStageName);
return new PipeTransformExecutor<>(transformations, startingPoints);
}
Aggregations