use of io.cdap.cdap.api.workflow.WorkflowToken in project cdap by caskdata.
the class PipelineAction method run.
@Override
public void run() throws Exception {
CustomActionContext context = getContext();
Map<String, String> properties = context.getSpecification().getProperties();
BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
PipelinePhase phase = phaseSpec.getPhase();
StageSpec stageSpec = phase.iterator().next();
PluginContext pluginContext = new PipelinePluginContext(context, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
PipelineRuntime pipelineRuntime = new PipelineRuntime(context, metrics);
Action action = pluginContext.newPluginInstance(stageSpec.getName(), new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace()));
ActionContext actionContext = new BasicActionContext(context, pipelineRuntime, stageSpec);
if (!context.getDataTracer(stageSpec.getName()).isEnabled()) {
action.run(actionContext);
}
WorkflowToken token = context.getWorkflowToken();
if (token == null) {
throw new IllegalStateException("WorkflowToken cannot be null when action is executed through Workflow.");
}
for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
token.put(entry.getKey(), entry.getValue());
}
}
use of io.cdap.cdap.api.workflow.WorkflowToken in project cdap by caskdata.
the class MapReducePreparer method prepare.
public List<Finisher> prepare(BatchPhaseSpec phaseSpec, Job job) throws TransactionFailureException, InstantiationException, IOException {
this.job = job;
this.hConf = job.getConfiguration();
hConf.setBoolean("mapreduce.map.speculative", false);
hConf.setBoolean("mapreduce.reduce.speculative", false);
sinkOutputs = new HashMap<>();
inputAliasToStage = new HashMap<>();
// Collect field operations emitted by various stages in this MapReduce program
stageOperations = new HashMap<>();
List<Finisher> finishers = prepare(phaseSpec);
hConf.set(ETLMapReduce.SINK_OUTPUTS_KEY, GSON.toJson(sinkOutputs));
hConf.set(ETLMapReduce.INPUT_ALIAS_KEY, GSON.toJson(inputAliasToStage));
WorkflowToken token = context.getWorkflowToken();
if (token != null) {
for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
token.put(entry.getKey(), entry.getValue());
}
// Put the collected field operations in workflow token
token.put(Constants.FIELD_OPERATION_KEY_IN_WORKFLOW_TOKEN, GSON.toJson(stageOperations));
}
// token is null when just the mapreduce job is run but not the entire workflow
// we still want things to work in that case.
hConf.set(ETLMapReduce.RUNTIME_ARGS_KEY, GSON.toJson(pipelineRuntime.getArguments().asMap()));
return finishers;
}
use of io.cdap.cdap.api.workflow.WorkflowToken in project cdap by cdapio.
the class BasicConditionContext method createStageStatistics.
private Map<String, StageStatistics> createStageStatistics(WorkflowContext context) {
Map<String, StageStatistics> stageStatistics = new HashMap<>();
WorkflowToken token = context.getToken();
for (WorkflowToken.Scope scope : Arrays.asList(WorkflowToken.Scope.SYSTEM, WorkflowToken.Scope.USER)) {
Map<String, List<NodeValue>> all = token.getAll(scope);
for (Map.Entry<String, List<NodeValue>> entry : all.entrySet()) {
if (!entry.getKey().startsWith(Constants.StageStatistics.PREFIX + ".")) {
continue;
}
String stageKey = entry.getKey().substring(Constants.StageStatistics.PREFIX.length() + 1);
String stageName;
if (stageKey.endsWith(Constants.StageStatistics.INPUT_RECORDS)) {
stageName = stageKey.substring(0, stageKey.length() - Constants.StageStatistics.INPUT_RECORDS.length() - 1);
} else if (stageKey.endsWith(Constants.StageStatistics.OUTPUT_RECORDS)) {
stageName = stageKey.substring(0, stageKey.length() - Constants.StageStatistics.OUTPUT_RECORDS.length() - 1);
} else if (stageKey.endsWith(Constants.StageStatistics.ERROR_RECORDS)) {
stageName = stageKey.substring(0, stageKey.length() - Constants.StageStatistics.ERROR_RECORDS.length() - 1);
} else {
// should not happen
LOG.warn(String.format("Ignoring key '%s' in the Workflow token while generating stage statistics " + "because it is not in the form " + "'stage.statistics.<stage_name>.<input|output|error>.records'.", stageKey));
continue;
}
// Since stage names are unique and properties for each stage tracked are unique(input, output, and error)
// there should only be single node who added this particular key in the Workflow
long value = entry.getValue().get(0).getValue().getAsLong();
StageStatistics statistics = stageStatistics.get(stageName);
if (statistics == null) {
statistics = new BasicStageStatistics(0, 0, 0);
stageStatistics.put(stageName, statistics);
}
long numOfInputRecords = statistics.getInputRecordsCount();
long numOfOutputRecords = statistics.getOutputRecordsCount();
long numOfErrorRecords = statistics.getErrorRecordsCount();
if (stageKey.endsWith(Constants.StageStatistics.INPUT_RECORDS)) {
numOfInputRecords = value;
} else if (stageKey.endsWith(Constants.StageStatistics.OUTPUT_RECORDS)) {
numOfOutputRecords = value;
} else {
numOfErrorRecords = value;
}
stageStatistics.put(stageName, new BasicStageStatistics(numOfInputRecords, numOfOutputRecords, numOfErrorRecords));
}
}
return stageStatistics;
}
use of io.cdap.cdap.api.workflow.WorkflowToken in project cdap by cdapio.
the class PipelineAction method run.
@Override
public void run() throws Exception {
CustomActionContext context = getContext();
Map<String, String> properties = context.getSpecification().getProperties();
BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
PipelinePhase phase = phaseSpec.getPhase();
StageSpec stageSpec = phase.iterator().next();
PluginContext pluginContext = new PipelinePluginContext(context, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
PipelineRuntime pipelineRuntime = new PipelineRuntime(context, metrics);
Action action = pluginContext.newPluginInstance(stageSpec.getName(), new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace()));
ActionContext actionContext = new BasicActionContext(context, pipelineRuntime, stageSpec);
if (!context.getDataTracer(stageSpec.getName()).isEnabled()) {
action.run(actionContext);
}
WorkflowToken token = context.getWorkflowToken();
if (token == null) {
throw new IllegalStateException("WorkflowToken cannot be null when action is executed through Workflow.");
}
for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
token.put(entry.getKey(), entry.getValue());
}
}
use of io.cdap.cdap.api.workflow.WorkflowToken in project cdap by cdapio.
the class SmartWorkflow method destroy.
@Override
public void destroy() {
WorkflowContext workflowContext = getContext();
PipelineRuntime pipelineRuntime = new PipelineRuntime(workflowContext, workflowMetrics);
// Execute the post actions only if pipeline is not running in preview mode.
if (!workflowContext.getDataTracer(PostAction.PLUGIN_TYPE).isEnabled()) {
for (Map.Entry<String, PostAction> endingActionEntry : postActions.entrySet()) {
String name = endingActionEntry.getKey();
PostAction action = endingActionEntry.getValue();
StageSpec stageSpec = stageSpecs.get(name);
BatchActionContext context = new WorkflowBackedActionContext(workflowContext, pipelineRuntime, stageSpec);
try {
action.run(context);
} catch (Throwable t) {
LOG.error("Error while running post action {}.", name, t);
}
}
}
Map<String, String> connectorDatasets = GSON.fromJson(workflowContext.getWorkflowSpecification().getProperty(Constants.CONNECTOR_DATASETS), STAGE_DATASET_MAP);
// publish all alerts
for (Map.Entry<String, AlertPublisher> alertPublisherEntry : alertPublishers.entrySet()) {
String stageName = alertPublisherEntry.getKey();
AlertPublisher alertPublisher = alertPublisherEntry.getValue();
FileSet alertConnector = workflowContext.getDataset(connectorDatasets.get(stageName));
try (CloseableIterator<Alert> alerts = new AlertReader(alertConnector)) {
if (!alerts.hasNext()) {
continue;
}
StageMetrics stageMetrics = new DefaultStageMetrics(workflowMetrics, stageName);
StageSpec stageSpec = stageSpecs.get(stageName);
AlertPublisherContext alertContext = new DefaultAlertPublisherContext(pipelineRuntime, stageSpec, workflowContext, workflowContext.getAdmin());
alertPublisher.initialize(alertContext);
TrackedIterator<Alert> trackedIterator = new TrackedIterator<>(alerts, stageMetrics, Constants.Metrics.RECORDS_IN);
alertPublisher.publish(trackedIterator);
} catch (Exception e) {
LOG.warn("Stage {} had errors publishing alerts. Alerts may not have been published.", stageName, e);
} finally {
try {
alertPublisher.destroy();
} catch (Exception e) {
LOG.warn("Error destroying alert publisher for stage {}", stageName, e);
}
}
}
ProgramStatus status = getContext().getState().getStatus();
if (status == ProgramStatus.FAILED) {
WRAPPERLOGGER.error("Pipeline '{}' failed.", getContext().getApplicationSpecification().getName());
} else {
WRAPPERLOGGER.info("Pipeline '{}' {}.", getContext().getApplicationSpecification().getName(), status == ProgramStatus.COMPLETED ? "succeeded" : status.name().toLowerCase());
}
MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), workflowContext.getLogicalStartTime(), workflowContext, workflowContext, workflowContext.getNamespace());
// Get resolved plugin properties
Map<String, Map<String, String>> resolvedProperties = new HashMap<>();
for (StageSpec spec : stageSpecs.values()) {
String stageName = spec.getName();
resolvedProperties.put(stageName, workflowContext.getPluginProperties(stageName, macroEvaluator).getProperties());
}
// Add resolved plugin properties to workflow token as a JSON String
workflowContext.getToken().put(RESOLVED_PLUGIN_PROPERTIES_MAP, GSON.toJson(resolvedProperties));
// record only if the Workflow is successful
if (status != ProgramStatus.COMPLETED) {
return;
}
// Collect field operations from each phase
WorkflowToken token = workflowContext.getToken();
List<NodeValue> allNodeValues = token.getAll(Constants.FIELD_OPERATION_KEY_IN_WORKFLOW_TOKEN);
if (allNodeValues.isEmpty()) {
// no field lineage recorded by any stage
return;
}
Map<String, List<FieldOperation>> allStageOperations = new HashMap<>();
for (StageSpec stageSpec : stageSpecs.values()) {
allStageOperations.put(stageSpec.getName(), new ArrayList<>());
}
for (NodeValue nodeValue : allNodeValues) {
Map<String, List<FieldOperation>> stageOperations = GSON.fromJson(nodeValue.getValue().toString(), STAGE_OPERATIONS_MAP);
for (Map.Entry<String, List<FieldOperation>> entry : stageOperations.entrySet()) {
// ignore them
if (allStageOperations.containsKey(entry.getKey())) {
allStageOperations.get(entry.getKey()).addAll(entry.getValue());
}
}
}
FieldLineageProcessor processor = new FieldLineageProcessor(spec);
Set<Operation> processedOperations = processor.validateAndConvert(allStageOperations);
if (!processedOperations.isEmpty()) {
workflowContext.record(processedOperations);
}
}
Aggregations