Search in sources :

Example 1 with FAILED_WITH_TERMINAL_ERROR

use of com.netflix.conductor.common.metadata.tasks.Task.Status.FAILED_WITH_TERMINAL_ERROR in project conductor by Netflix.

the class WorkflowExecutor method retry.

/**
 * Gets the last instance of each failed task and reschedule each
 * Gets all cancelled tasks and schedule all of them except JOIN (join should change status to INPROGRESS)
 * Switch workflow back to RUNNING status and call decider.
 *
 * @param workflowId the id of the workflow to be retried
 */
public void retry(String workflowId, boolean resumeSubworkflowTasks) {
    Workflow workflow = executionDAOFacade.getWorkflowById(workflowId, true);
    if (!workflow.getStatus().isTerminal()) {
        throw new ApplicationException(CONFLICT, "Workflow is still running.  status=" + workflow.getStatus());
    }
    if (workflow.getTasks().isEmpty()) {
        throw new ApplicationException(CONFLICT, "Workflow has not started yet");
    }
    if (resumeSubworkflowTasks) {
        Optional<Task> lTask = workflow.getTasks().stream().filter(this::findLastFailedOrTimeOutTask).findFirst();
        if (lTask.isPresent()) {
            workflow = findLastFailedSubWorkflow(lTask.get(), workflow);
        }
    }
    // Get all FAILED or CANCELED tasks that are not COMPLETED (or reach other terminal states) on further executions.
    // // Eg: for Seq of tasks task1.CANCELED, task1.COMPLETED, task1 shouldn't be retried.
    // Throw an exception if there are no FAILED tasks.
    // Handle JOIN task CANCELED status as special case.
    Map<String, Task> retriableMap = new HashMap<>();
    for (Task task : workflow.getTasks()) {
        switch(task.getStatus()) {
            case FAILED:
            case FAILED_WITH_TERMINAL_ERROR:
            case TIMED_OUT:
                retriableMap.put(task.getReferenceTaskName(), task);
                break;
            case CANCELED:
                if (task.getTaskType().equalsIgnoreCase(TaskType.JOIN.toString()) || task.getTaskType().equalsIgnoreCase(TaskType.DO_WHILE.toString())) {
                    task.setStatus(IN_PROGRESS);
                // Task doesn't have to be updated yet. Will be updated along with other Workflow tasks downstream.
                } else {
                    retriableMap.put(task.getReferenceTaskName(), task);
                }
                break;
            default:
                retriableMap.remove(task.getReferenceTaskName());
                break;
        }
    }
    if (retriableMap.values().size() == 0) {
        throw new ApplicationException(CONFLICT, "There are no retriable tasks! Use restart if you want to attempt entire workflow execution again.");
    }
    // Update Workflow with new status.
    // This should load Workflow from archive, if archived.
    workflow.setStatus(WorkflowStatus.RUNNING);
    workflow.setLastRetriedTime(System.currentTimeMillis());
    // Add to decider queue
    queueDAO.push(DECIDER_QUEUE, workflow.getWorkflowId(), workflow.getPriority(), config.getSweepFrequency());
    executionDAOFacade.updateWorkflow(workflow);
    // taskToBeRescheduled would set task `retried` to true, and hence it's important to updateTasks after obtaining task copy from taskToBeRescheduled.
    final Workflow finalWorkflow = workflow;
    List<Task> retriableTasks = retriableMap.values().stream().sorted(Comparator.comparingInt(Task::getSeq)).map(task -> taskToBeRescheduled(finalWorkflow, task)).collect(Collectors.toList());
    dedupAndAddTasks(workflow, retriableTasks);
    // Note: updateTasks before updateWorkflow might fail when Workflow is archived and doesn't exist in primary store.
    executionDAOFacade.updateTasks(workflow.getTasks());
    scheduleTask(workflow, retriableTasks);
    decide(workflowId);
    if (resumeSubworkflowTasks) {
        updateParentWorkflowRecursively(workflow);
    } else if (StringUtils.isNotEmpty(workflow.getParentWorkflowId())) {
        updateParentWorkflow(workflow);
        decide(workflow.getParentWorkflowId());
    }
}
Also used : TaskUtils(com.netflix.conductor.common.utils.TaskUtils) SubWorkflow(com.netflix.conductor.core.execution.tasks.SubWorkflow) ExecutionLockService(com.netflix.conductor.service.ExecutionLockService) Arrays(java.util.Arrays) INVALID_INPUT(com.netflix.conductor.core.execution.ApplicationException.Code.INVALID_INPUT) NOT_FOUND(com.netflix.conductor.core.execution.ApplicationException.Code.NOT_FOUND) QueueUtils(com.netflix.conductor.core.utils.QueueUtils) LoggerFactory(org.slf4j.LoggerFactory) MetadataDAO(com.netflix.conductor.dao.MetadataDAO) CANCELED(com.netflix.conductor.common.metadata.tasks.Task.Status.CANCELED) Task(com.netflix.conductor.common.metadata.tasks.Task) ExecutionDAOFacade(com.netflix.conductor.core.orchestration.ExecutionDAOFacade) StringUtils(org.apache.commons.lang3.StringUtils) WorkflowSystemTask(com.netflix.conductor.core.execution.tasks.WorkflowSystemTask) RerunWorkflowRequest(com.netflix.conductor.common.metadata.workflow.RerunWorkflowRequest) Workflow(com.netflix.conductor.common.run.Workflow) IN_PROGRESS(com.netflix.conductor.common.metadata.tasks.Task.Status.IN_PROGRESS) Map(java.util.Map) PollData(com.netflix.conductor.common.metadata.tasks.PollData) SKIPPED(com.netflix.conductor.common.metadata.tasks.Task.Status.SKIPPED) Terminate(com.netflix.conductor.core.execution.tasks.Terminate) Code(com.netflix.conductor.core.execution.ApplicationException.Code) TaskDef(com.netflix.conductor.common.metadata.tasks.TaskDef) Status.valueOf(com.netflix.conductor.common.metadata.tasks.Task.Status.valueOf) Trace(com.netflix.conductor.annotations.Trace) WorkflowStatus(com.netflix.conductor.common.run.Workflow.WorkflowStatus) MetadataMapperService(com.netflix.conductor.core.metadata.MetadataMapperService) Predicate(java.util.function.Predicate) Collectors(java.util.stream.Collectors) WorkflowTask(com.netflix.conductor.common.metadata.workflow.WorkflowTask) SCHEDULED(com.netflix.conductor.common.metadata.tasks.Task.Status.SCHEDULED) Objects(java.util.Objects) QueueDAO(com.netflix.conductor.dao.QueueDAO) List(java.util.List) TIMED_OUT(com.netflix.conductor.common.metadata.tasks.Task.Status.TIMED_OUT) Optional(java.util.Optional) Status(com.netflix.conductor.common.metadata.tasks.TaskResult.Status) Configuration(com.netflix.conductor.core.config.Configuration) CONFLICT(com.netflix.conductor.core.execution.ApplicationException.Code.CONFLICT) WorkflowContext(com.netflix.conductor.core.WorkflowContext) IDGenerator(com.netflix.conductor.core.utils.IDGenerator) HashMap(java.util.HashMap) TaskResult(com.netflix.conductor.common.metadata.tasks.TaskResult) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Inject(javax.inject.Inject) SUB_WORKFLOW(com.netflix.conductor.common.metadata.workflow.TaskType.SUB_WORKFLOW) SkipTaskRequest(com.netflix.conductor.common.metadata.workflow.SkipTaskRequest) FAILED_WITH_TERMINAL_ERROR(com.netflix.conductor.common.metadata.tasks.Task.Status.FAILED_WITH_TERMINAL_ERROR) Logger(org.slf4j.Logger) RetryUtil(com.netflix.conductor.common.utils.RetryUtil) WorkflowDef(com.netflix.conductor.common.metadata.workflow.WorkflowDef) FAILED(com.netflix.conductor.common.metadata.tasks.Task.Status.FAILED) Monitors(com.netflix.conductor.metrics.Monitors) TERMINATE(com.netflix.conductor.common.metadata.workflow.TaskType.TERMINATE) Preconditions(com.google.common.base.Preconditions) TaskType(com.netflix.conductor.common.metadata.workflow.TaskType) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) Task(com.netflix.conductor.common.metadata.tasks.Task) WorkflowSystemTask(com.netflix.conductor.core.execution.tasks.WorkflowSystemTask) WorkflowTask(com.netflix.conductor.common.metadata.workflow.WorkflowTask) HashMap(java.util.HashMap) SubWorkflow(com.netflix.conductor.core.execution.tasks.SubWorkflow) Workflow(com.netflix.conductor.common.run.Workflow)

Aggregations

VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions (com.google.common.base.Preconditions)1 Trace (com.netflix.conductor.annotations.Trace)1 PollData (com.netflix.conductor.common.metadata.tasks.PollData)1 Task (com.netflix.conductor.common.metadata.tasks.Task)1 CANCELED (com.netflix.conductor.common.metadata.tasks.Task.Status.CANCELED)1 FAILED (com.netflix.conductor.common.metadata.tasks.Task.Status.FAILED)1 FAILED_WITH_TERMINAL_ERROR (com.netflix.conductor.common.metadata.tasks.Task.Status.FAILED_WITH_TERMINAL_ERROR)1 IN_PROGRESS (com.netflix.conductor.common.metadata.tasks.Task.Status.IN_PROGRESS)1 SCHEDULED (com.netflix.conductor.common.metadata.tasks.Task.Status.SCHEDULED)1 SKIPPED (com.netflix.conductor.common.metadata.tasks.Task.Status.SKIPPED)1 TIMED_OUT (com.netflix.conductor.common.metadata.tasks.Task.Status.TIMED_OUT)1 Status.valueOf (com.netflix.conductor.common.metadata.tasks.Task.Status.valueOf)1 TaskDef (com.netflix.conductor.common.metadata.tasks.TaskDef)1 TaskResult (com.netflix.conductor.common.metadata.tasks.TaskResult)1 Status (com.netflix.conductor.common.metadata.tasks.TaskResult.Status)1 RerunWorkflowRequest (com.netflix.conductor.common.metadata.workflow.RerunWorkflowRequest)1 SkipTaskRequest (com.netflix.conductor.common.metadata.workflow.SkipTaskRequest)1 TaskType (com.netflix.conductor.common.metadata.workflow.TaskType)1 SUB_WORKFLOW (com.netflix.conductor.common.metadata.workflow.TaskType.SUB_WORKFLOW)1