use of co.cask.cdap.etl.planner.StageInfo in project cdap by caskdata.
the class TransformExecutorFactory method setPipeTransformDetail.
private <KEY_OUT, VAL_OUT> void setPipeTransformDetail(PipelinePhase pipeline, String stageName, Map<String, PipeTransformDetail> transformations, Map<String, ErrorOutputWriter<Object, Object>> transformErrorSinkMap, OutputWriter<KEY_OUT, VAL_OUT> outputWriter) throws Exception {
if (pipeline.getSinks().contains(stageName)) {
StageInfo stageInfo = pipeline.getStage(stageName);
// If there is a connector sink/ joiner at the end of pipeline, do not remove stage name. This is needed to save
// stageName along with the record in connector sink and joiner takes input along with stageName
String pluginType = stageInfo.getPluginType();
boolean removeStageName = !(pluginType.equals(Constants.CONNECTOR_TYPE) || pluginType.equals(BatchJoiner.PLUGIN_TYPE));
boolean isErrorConsumer = pluginType.equals(ErrorTransform.PLUGIN_TYPE);
transformations.put(stageName, new PipeTransformDetail(stageName, removeStageName, isErrorConsumer, getTransformation(stageInfo), new SinkEmitter<>(stageName, outputWriter)));
return;
}
try {
addTransformation(pipeline, stageName, transformations, transformErrorSinkMap);
} catch (Exception e) {
// Catch the Exception to generate a User Error Log for the Pipeline
PIPELINE_LOG.error("Failed to start pipeline stage '{}' with the error: {}. Please review your pipeline " + "configuration and check the system logs for more details.", stageName, Throwables.getRootCause(e).getMessage(), Throwables.getRootCause(e));
throw e;
}
for (String output : pipeline.getDag().getNodeOutputs(stageName)) {
setPipeTransformDetail(pipeline, output, transformations, transformErrorSinkMap, outputWriter);
transformations.get(stageName).addTransformation(output, transformations.get(output));
}
}
use of co.cask.cdap.etl.planner.StageInfo in project cdap by caskdata.
the class SmartWorkflow method destroy.
@Override
public void destroy() {
WorkflowContext workflowContext = getContext();
// Execute the post actions only if pipeline is not running in preview mode.
if (!workflowContext.getDataTracer(PostAction.PLUGIN_TYPE).isEnabled()) {
BasicArguments arguments = new BasicArguments(workflowContext.getToken(), workflowContext.getRuntimeArguments());
for (Map.Entry<String, PostAction> endingActionEntry : postActions.entrySet()) {
String name = endingActionEntry.getKey();
PostAction action = endingActionEntry.getValue();
StageInfo stageInfo = StageInfo.builder(name, PostAction.PLUGIN_TYPE).setStageLoggingEnabled(spec.isStageLoggingEnabled()).setProcessTimingEnabled(spec.isProcessTimingEnabled()).build();
BatchActionContext context = new WorkflowBackedActionContext(workflowContext, workflowMetrics, stageInfo, arguments);
try {
action.run(context);
} catch (Throwable t) {
LOG.error("Error while running post action {}.", name, t);
}
}
}
ProgramStatus status = getContext().getState().getStatus();
if (status == ProgramStatus.FAILED) {
WRAPPERLOGGER.error("Pipeline '{}' failed.", getContext().getApplicationSpecification().getName());
} else {
WRAPPERLOGGER.info("Pipeline '{}' {}.", getContext().getApplicationSpecification().getName(), status == ProgramStatus.COMPLETED ? "succeeded" : status.name().toLowerCase());
}
}
use of co.cask.cdap.etl.planner.StageInfo in project cdap by caskdata.
the class SmartWorkflow method addProgram.
private void addProgram(String phaseName, WorkflowProgramAdder programAdder) {
PipelinePhase phase = plan.getPhase(phaseName);
// artificially added by the control dag flattening process. So nothing to add, skip it
if (phase == null) {
return;
}
// can't use phase name as a program name because it might contain invalid characters
String programName = "phase-" + phaseNum;
phaseNum++;
// if this phase uses connectors, add the local dataset for that connector if we haven't already
for (StageInfo connectorInfo : phase.getStagesOfType(Constants.CONNECTOR_TYPE)) {
String connectorName = connectorInfo.getName();
String datasetName = connectorDatasets.get(connectorName);
if (datasetName == null) {
datasetName = "conn-" + connectorNum++;
connectorDatasets.put(connectorName, datasetName);
// add the local dataset
ConnectorSource connectorSource = new ConnectorSource(datasetName, null);
connectorSource.configure(getConfigurer());
}
}
Map<String, String> phaseConnectorDatasets = new HashMap<>();
for (StageInfo connectorStage : phase.getStagesOfType(Constants.CONNECTOR_TYPE)) {
phaseConnectorDatasets.put(connectorStage.getName(), connectorDatasets.get(connectorStage.getName()));
}
BatchPhaseSpec batchPhaseSpec = new BatchPhaseSpec(programName, phase, spec.getResources(), spec.getDriverResources(), spec.getClientResources(), spec.isStageLoggingEnabled(), spec.isProcessTimingEnabled(), phaseConnectorDatasets, spec.getNumOfRecordsPreview(), spec.getProperties());
Set<String> pluginTypes = batchPhaseSpec.getPhase().getPluginTypes();
if (pluginTypes.contains(Action.PLUGIN_TYPE)) {
// actions will be all by themselves in a phase
programAdder.addAction(new PipelineAction(batchPhaseSpec));
} else if (pluginTypes.contains(Constants.SPARK_PROGRAM_PLUGIN_TYPE)) {
// spark programs will be all by themselves in a phase
String stageName = phase.getStagesOfType(Constants.SPARK_PROGRAM_PLUGIN_TYPE).iterator().next().getName();
StageSpec stageSpec = stageSpecs.get(stageName);
applicationConfigurer.addSpark(new ExternalSparkProgram(batchPhaseSpec, stageSpec));
programAdder.addSpark(programName);
} else if (useSpark) {
applicationConfigurer.addSpark(new ETLSpark(batchPhaseSpec));
programAdder.addSpark(programName);
} else {
applicationConfigurer.addMapReduce(new ETLMapReduce(batchPhaseSpec));
programAdder.addMapReduce(programName);
}
}
use of co.cask.cdap.etl.planner.StageInfo in project cdap by caskdata.
the class TransformRunner method getSinkWriter.
// this is needed because we need to write to the context differently depending on the number of outputs
private OutputWriter<Object, Object> getSinkWriter(MapReduceTaskContext<Object, Object> context, PipelinePhase pipelinePhase, Configuration hConf) {
Set<StageInfo> reducers = pipelinePhase.getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE);
JobContext hadoopContext = context.getHadoopContext();
if (!reducers.isEmpty() && hadoopContext instanceof Mapper.Context) {
return new SingleOutputWriter<>(context);
}
String sinkOutputsStr = hConf.get(ETLMapReduce.SINK_OUTPUTS_KEY);
// should never happen, this is set in initialize
Preconditions.checkNotNull(sinkOutputsStr, "Sink outputs not found in Hadoop conf.");
Map<String, SinkOutput> sinkOutputs = GSON.fromJson(sinkOutputsStr, ETLMapReduce.SINK_OUTPUTS_TYPE);
return hasSingleOutput(pipelinePhase.getStagesOfType(Transform.PLUGIN_TYPE), sinkOutputs) ? new SingleOutputWriter<>(context) : new MultiOutputWriter<>(context, sinkOutputs);
}
Aggregations