Search in sources :

Example 6 with WorkflowToken

use of io.cdap.cdap.api.workflow.WorkflowToken in project cdap by caskdata.

the class PipelineAction method run.

@Override
public void run() throws Exception {
    CustomActionContext context = getContext();
    Map<String, String> properties = context.getSpecification().getProperties();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    PipelinePhase phase = phaseSpec.getPhase();
    StageSpec stageSpec = phase.iterator().next();
    PluginContext pluginContext = new PipelinePluginContext(context, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
    PipelineRuntime pipelineRuntime = new PipelineRuntime(context, metrics);
    Action action = pluginContext.newPluginInstance(stageSpec.getName(), new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace()));
    ActionContext actionContext = new BasicActionContext(context, pipelineRuntime, stageSpec);
    if (!context.getDataTracer(stageSpec.getName()).isEnabled()) {
        action.run(actionContext);
    }
    WorkflowToken token = context.getWorkflowToken();
    if (token == null) {
        throw new IllegalStateException("WorkflowToken cannot be null when action is executed through Workflow.");
    }
    for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
        token.put(entry.getKey(), entry.getValue());
    }
}
Also used : Action(io.cdap.cdap.etl.api.action.Action) AbstractCustomAction(io.cdap.cdap.api.customaction.AbstractCustomAction) CustomAction(io.cdap.cdap.api.customaction.CustomAction) PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) PipelinePluginContext(io.cdap.cdap.etl.common.plugin.PipelinePluginContext) PluginContext(io.cdap.cdap.api.plugin.PluginContext) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) CustomActionContext(io.cdap.cdap.api.customaction.CustomActionContext) ActionContext(io.cdap.cdap.etl.api.action.ActionContext) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) CustomActionContext(io.cdap.cdap.api.customaction.CustomActionContext) BatchPhaseSpec(io.cdap.cdap.etl.batch.BatchPhaseSpec) HashMap(java.util.HashMap) Map(java.util.Map) PipelinePluginContext(io.cdap.cdap.etl.common.plugin.PipelinePluginContext)

Example 7 with WorkflowToken

use of io.cdap.cdap.api.workflow.WorkflowToken in project cdap by caskdata.

the class MapReducePreparer method prepare.

public List<Finisher> prepare(BatchPhaseSpec phaseSpec, Job job) throws TransactionFailureException, InstantiationException, IOException {
    this.job = job;
    this.hConf = job.getConfiguration();
    hConf.setBoolean("mapreduce.map.speculative", false);
    hConf.setBoolean("mapreduce.reduce.speculative", false);
    sinkOutputs = new HashMap<>();
    inputAliasToStage = new HashMap<>();
    // Collect field operations emitted by various stages in this MapReduce program
    stageOperations = new HashMap<>();
    List<Finisher> finishers = prepare(phaseSpec);
    hConf.set(ETLMapReduce.SINK_OUTPUTS_KEY, GSON.toJson(sinkOutputs));
    hConf.set(ETLMapReduce.INPUT_ALIAS_KEY, GSON.toJson(inputAliasToStage));
    WorkflowToken token = context.getWorkflowToken();
    if (token != null) {
        for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
            token.put(entry.getKey(), entry.getValue());
        }
        // Put the collected field operations in workflow token
        token.put(Constants.FIELD_OPERATION_KEY_IN_WORKFLOW_TOKEN, GSON.toJson(stageOperations));
    }
    // token is null when just the mapreduce job is run but not the entire workflow
    // we still want things to work in that case.
    hConf.set(ETLMapReduce.RUNTIME_ARGS_KEY, GSON.toJson(pipelineRuntime.getArguments().asMap()));
    return finishers;
}
Also used : Finisher(io.cdap.cdap.etl.common.submit.Finisher) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) HashMap(java.util.HashMap) Map(java.util.Map)

Example 8 with WorkflowToken

use of io.cdap.cdap.api.workflow.WorkflowToken in project cdap by cdapio.

the class BasicConditionContext method createStageStatistics.

private Map<String, StageStatistics> createStageStatistics(WorkflowContext context) {
    Map<String, StageStatistics> stageStatistics = new HashMap<>();
    WorkflowToken token = context.getToken();
    for (WorkflowToken.Scope scope : Arrays.asList(WorkflowToken.Scope.SYSTEM, WorkflowToken.Scope.USER)) {
        Map<String, List<NodeValue>> all = token.getAll(scope);
        for (Map.Entry<String, List<NodeValue>> entry : all.entrySet()) {
            if (!entry.getKey().startsWith(Constants.StageStatistics.PREFIX + ".")) {
                continue;
            }
            String stageKey = entry.getKey().substring(Constants.StageStatistics.PREFIX.length() + 1);
            String stageName;
            if (stageKey.endsWith(Constants.StageStatistics.INPUT_RECORDS)) {
                stageName = stageKey.substring(0, stageKey.length() - Constants.StageStatistics.INPUT_RECORDS.length() - 1);
            } else if (stageKey.endsWith(Constants.StageStatistics.OUTPUT_RECORDS)) {
                stageName = stageKey.substring(0, stageKey.length() - Constants.StageStatistics.OUTPUT_RECORDS.length() - 1);
            } else if (stageKey.endsWith(Constants.StageStatistics.ERROR_RECORDS)) {
                stageName = stageKey.substring(0, stageKey.length() - Constants.StageStatistics.ERROR_RECORDS.length() - 1);
            } else {
                // should not happen
                LOG.warn(String.format("Ignoring key '%s' in the Workflow token while generating stage statistics " + "because it is not in the form " + "'stage.statistics.<stage_name>.<input|output|error>.records'.", stageKey));
                continue;
            }
            // Since stage names are unique and properties for each stage tracked are unique(input, output, and error)
            // there should only be single node who added this particular key in the Workflow
            long value = entry.getValue().get(0).getValue().getAsLong();
            StageStatistics statistics = stageStatistics.get(stageName);
            if (statistics == null) {
                statistics = new BasicStageStatistics(0, 0, 0);
                stageStatistics.put(stageName, statistics);
            }
            long numOfInputRecords = statistics.getInputRecordsCount();
            long numOfOutputRecords = statistics.getOutputRecordsCount();
            long numOfErrorRecords = statistics.getErrorRecordsCount();
            if (stageKey.endsWith(Constants.StageStatistics.INPUT_RECORDS)) {
                numOfInputRecords = value;
            } else if (stageKey.endsWith(Constants.StageStatistics.OUTPUT_RECORDS)) {
                numOfOutputRecords = value;
            } else {
                numOfErrorRecords = value;
            }
            stageStatistics.put(stageName, new BasicStageStatistics(numOfInputRecords, numOfOutputRecords, numOfErrorRecords));
        }
    }
    return stageStatistics;
}
Also used : HashMap(java.util.HashMap) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) List(java.util.List) StageStatistics(io.cdap.cdap.etl.api.condition.StageStatistics) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Map(java.util.Map)

Example 9 with WorkflowToken

use of io.cdap.cdap.api.workflow.WorkflowToken in project cdap by cdapio.

the class PipelineAction method run.

@Override
public void run() throws Exception {
    CustomActionContext context = getContext();
    Map<String, String> properties = context.getSpecification().getProperties();
    BatchPhaseSpec phaseSpec = GSON.fromJson(properties.get(Constants.PIPELINEID), BatchPhaseSpec.class);
    PipelinePhase phase = phaseSpec.getPhase();
    StageSpec stageSpec = phase.iterator().next();
    PluginContext pluginContext = new PipelinePluginContext(context, metrics, phaseSpec.isStageLoggingEnabled(), phaseSpec.isProcessTimingEnabled());
    PipelineRuntime pipelineRuntime = new PipelineRuntime(context, metrics);
    Action action = pluginContext.newPluginInstance(stageSpec.getName(), new DefaultMacroEvaluator(pipelineRuntime.getArguments(), context.getLogicalStartTime(), context, context, context.getNamespace()));
    ActionContext actionContext = new BasicActionContext(context, pipelineRuntime, stageSpec);
    if (!context.getDataTracer(stageSpec.getName()).isEnabled()) {
        action.run(actionContext);
    }
    WorkflowToken token = context.getWorkflowToken();
    if (token == null) {
        throw new IllegalStateException("WorkflowToken cannot be null when action is executed through Workflow.");
    }
    for (Map.Entry<String, String> entry : pipelineRuntime.getArguments().getAddedArguments().entrySet()) {
        token.put(entry.getKey(), entry.getValue());
    }
}
Also used : Action(io.cdap.cdap.etl.api.action.Action) AbstractCustomAction(io.cdap.cdap.api.customaction.AbstractCustomAction) CustomAction(io.cdap.cdap.api.customaction.CustomAction) PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) PipelinePluginContext(io.cdap.cdap.etl.common.plugin.PipelinePluginContext) PluginContext(io.cdap.cdap.api.plugin.PluginContext) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) CustomActionContext(io.cdap.cdap.api.customaction.CustomActionContext) ActionContext(io.cdap.cdap.etl.api.action.ActionContext) PipelinePhase(io.cdap.cdap.etl.common.PipelinePhase) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) CustomActionContext(io.cdap.cdap.api.customaction.CustomActionContext) BatchPhaseSpec(io.cdap.cdap.etl.batch.BatchPhaseSpec) HashMap(java.util.HashMap) Map(java.util.Map) PipelinePluginContext(io.cdap.cdap.etl.common.plugin.PipelinePluginContext)

Example 10 with WorkflowToken

use of io.cdap.cdap.api.workflow.WorkflowToken in project cdap by cdapio.

the class SmartWorkflow method destroy.

@Override
public void destroy() {
    WorkflowContext workflowContext = getContext();
    PipelineRuntime pipelineRuntime = new PipelineRuntime(workflowContext, workflowMetrics);
    // Execute the post actions only if pipeline is not running in preview mode.
    if (!workflowContext.getDataTracer(PostAction.PLUGIN_TYPE).isEnabled()) {
        for (Map.Entry<String, PostAction> endingActionEntry : postActions.entrySet()) {
            String name = endingActionEntry.getKey();
            PostAction action = endingActionEntry.getValue();
            StageSpec stageSpec = stageSpecs.get(name);
            BatchActionContext context = new WorkflowBackedActionContext(workflowContext, pipelineRuntime, stageSpec);
            try {
                action.run(context);
            } catch (Throwable t) {
                LOG.error("Error while running post action {}.", name, t);
            }
        }
    }
    Map<String, String> connectorDatasets = GSON.fromJson(workflowContext.getWorkflowSpecification().getProperty(Constants.CONNECTOR_DATASETS), STAGE_DATASET_MAP);
    // publish all alerts
    for (Map.Entry<String, AlertPublisher> alertPublisherEntry : alertPublishers.entrySet()) {
        String stageName = alertPublisherEntry.getKey();
        AlertPublisher alertPublisher = alertPublisherEntry.getValue();
        FileSet alertConnector = workflowContext.getDataset(connectorDatasets.get(stageName));
        try (CloseableIterator<Alert> alerts = new AlertReader(alertConnector)) {
            if (!alerts.hasNext()) {
                continue;
            }
            StageMetrics stageMetrics = new DefaultStageMetrics(workflowMetrics, stageName);
            StageSpec stageSpec = stageSpecs.get(stageName);
            AlertPublisherContext alertContext = new DefaultAlertPublisherContext(pipelineRuntime, stageSpec, workflowContext, workflowContext.getAdmin());
            alertPublisher.initialize(alertContext);
            TrackedIterator<Alert> trackedIterator = new TrackedIterator<>(alerts, stageMetrics, Constants.Metrics.RECORDS_IN);
            alertPublisher.publish(trackedIterator);
        } catch (Exception e) {
            LOG.warn("Stage {} had errors publishing alerts. Alerts may not have been published.", stageName, e);
        } finally {
            try {
                alertPublisher.destroy();
            } catch (Exception e) {
                LOG.warn("Error destroying alert publisher for stage {}", stageName, e);
            }
        }
    }
    ProgramStatus status = getContext().getState().getStatus();
    if (status == ProgramStatus.FAILED) {
        WRAPPERLOGGER.error("Pipeline '{}' failed.", getContext().getApplicationSpecification().getName());
    } else {
        WRAPPERLOGGER.info("Pipeline '{}' {}.", getContext().getApplicationSpecification().getName(), status == ProgramStatus.COMPLETED ? "succeeded" : status.name().toLowerCase());
    }
    MacroEvaluator macroEvaluator = new DefaultMacroEvaluator(pipelineRuntime.getArguments(), workflowContext.getLogicalStartTime(), workflowContext, workflowContext, workflowContext.getNamespace());
    // Get resolved plugin properties
    Map<String, Map<String, String>> resolvedProperties = new HashMap<>();
    for (StageSpec spec : stageSpecs.values()) {
        String stageName = spec.getName();
        resolvedProperties.put(stageName, workflowContext.getPluginProperties(stageName, macroEvaluator).getProperties());
    }
    // Add resolved plugin properties to workflow token as a JSON String
    workflowContext.getToken().put(RESOLVED_PLUGIN_PROPERTIES_MAP, GSON.toJson(resolvedProperties));
    // record only if the Workflow is successful
    if (status != ProgramStatus.COMPLETED) {
        return;
    }
    // Collect field operations from each phase
    WorkflowToken token = workflowContext.getToken();
    List<NodeValue> allNodeValues = token.getAll(Constants.FIELD_OPERATION_KEY_IN_WORKFLOW_TOKEN);
    if (allNodeValues.isEmpty()) {
        // no field lineage recorded by any stage
        return;
    }
    Map<String, List<FieldOperation>> allStageOperations = new HashMap<>();
    for (StageSpec stageSpec : stageSpecs.values()) {
        allStageOperations.put(stageSpec.getName(), new ArrayList<>());
    }
    for (NodeValue nodeValue : allNodeValues) {
        Map<String, List<FieldOperation>> stageOperations = GSON.fromJson(nodeValue.getValue().toString(), STAGE_OPERATIONS_MAP);
        for (Map.Entry<String, List<FieldOperation>> entry : stageOperations.entrySet()) {
            // ignore them
            if (allStageOperations.containsKey(entry.getKey())) {
                allStageOperations.get(entry.getKey()).addAll(entry.getValue());
            }
        }
    }
    FieldLineageProcessor processor = new FieldLineageProcessor(spec);
    Set<Operation> processedOperations = processor.validateAndConvert(allStageOperations);
    if (!processedOperations.isEmpty()) {
        workflowContext.record(processedOperations);
    }
}
Also used : NodeValue(io.cdap.cdap.api.workflow.NodeValue) PipelineRuntime(io.cdap.cdap.etl.common.PipelineRuntime) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) MacroEvaluator(io.cdap.cdap.api.macro.MacroEvaluator) BatchActionContext(io.cdap.cdap.etl.api.batch.BatchActionContext) WorkflowBackedActionContext(io.cdap.cdap.etl.batch.WorkflowBackedActionContext) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) WorkflowToken(io.cdap.cdap.api.workflow.WorkflowToken) FieldOperation(io.cdap.cdap.etl.api.lineage.field.FieldOperation) Operation(io.cdap.cdap.api.lineage.field.Operation) AlertReader(io.cdap.cdap.etl.batch.connector.AlertReader) StageSpec(io.cdap.cdap.etl.proto.v2.spec.StageSpec) DefaultMacroEvaluator(io.cdap.cdap.etl.common.DefaultMacroEvaluator) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) StageMetrics(io.cdap.cdap.etl.api.StageMetrics) DefaultStageMetrics(io.cdap.cdap.etl.common.DefaultStageMetrics) AlertPublisherContext(io.cdap.cdap.etl.api.AlertPublisherContext) DefaultAlertPublisherContext(io.cdap.cdap.etl.common.DefaultAlertPublisherContext) FieldLineageProcessor(io.cdap.cdap.etl.lineage.FieldLineageProcessor) AlertPublisher(io.cdap.cdap.etl.api.AlertPublisher) FileSet(io.cdap.cdap.api.dataset.lib.FileSet) TrackedIterator(io.cdap.cdap.etl.common.TrackedIterator) WorkflowContext(io.cdap.cdap.api.workflow.WorkflowContext) DisjointConnectionsException(io.cdap.cdap.etl.planner.DisjointConnectionsException) ValidationException(io.cdap.cdap.etl.api.validation.ValidationException) Alert(io.cdap.cdap.etl.api.Alert) PostAction(io.cdap.cdap.etl.api.batch.PostAction) DefaultAlertPublisherContext(io.cdap.cdap.etl.common.DefaultAlertPublisherContext) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DefaultStageMetrics(io.cdap.cdap.etl.common.DefaultStageMetrics) ProgramStatus(io.cdap.cdap.api.ProgramStatus)

Aggregations

WorkflowToken (io.cdap.cdap.api.workflow.WorkflowToken)21 Map (java.util.Map)14 HashMap (java.util.HashMap)12 Value (io.cdap.cdap.api.workflow.Value)6 DefaultMacroEvaluator (io.cdap.cdap.etl.common.DefaultMacroEvaluator)6 PipelineRuntime (io.cdap.cdap.etl.common.PipelineRuntime)6 StageSpec (io.cdap.cdap.etl.proto.v2.spec.StageSpec)6 MacroEvaluator (io.cdap.cdap.api.macro.MacroEvaluator)4 PluginContext (io.cdap.cdap.api.plugin.PluginContext)4 NodeValue (io.cdap.cdap.api.workflow.NodeValue)4 BatchPhaseSpec (io.cdap.cdap.etl.batch.BatchPhaseSpec)4 PipelinePhase (io.cdap.cdap.etl.common.PipelinePhase)4 PipelinePluginContext (io.cdap.cdap.etl.common.plugin.PipelinePluginContext)4 Finisher (io.cdap.cdap.etl.common.submit.Finisher)3 List (java.util.List)3 ProgramStatus (io.cdap.cdap.api.ProgramStatus)2 AbstractCustomAction (io.cdap.cdap.api.customaction.AbstractCustomAction)2 CustomAction (io.cdap.cdap.api.customaction.CustomAction)2 CustomActionContext (io.cdap.cdap.api.customaction.CustomActionContext)2 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)2