use of com.netflix.conductor.common.metadata.tasks.Task.Status.TIMED_OUT in project conductor by Netflix.
the class DeciderService method decide.
private DeciderOutcome decide(final Workflow workflow, List<Task> preScheduledTasks) throws TerminateWorkflowException {
DeciderOutcome outcome = new DeciderOutcome();
if (workflow.getStatus().isTerminal()) {
// you cannot evaluate a terminal workflow
LOGGER.debug("Workflow {} is already finished. Reason: {}", workflow, workflow.getReasonForIncompletion());
return outcome;
}
checkWorkflowTimeout(workflow);
if (workflow.getStatus().equals(WorkflowStatus.PAUSED)) {
LOGGER.debug("Workflow " + workflow.getWorkflowId() + " is paused");
return outcome;
}
// Filter the list of tasks and include only tasks that are not retried, not executed
// marked to be skipped and not part of System tasks that is DECISION, FORK, JOIN
// This list will be empty for a new workflow being started
List<Task> pendingTasks = workflow.getTasks().stream().filter(isNonPendingTask).collect(Collectors.toList());
// Get all the tasks that have not completed their lifecycle yet
// This list will be empty for a new workflow
Set<String> executedTaskRefNames = workflow.getTasks().stream().filter(Task::isExecuted).map(Task::getReferenceTaskName).collect(Collectors.toSet());
Map<String, Task> tasksToBeScheduled = new LinkedHashMap<>();
preScheduledTasks.forEach(preScheduledTask -> {
tasksToBeScheduled.put(preScheduledTask.getReferenceTaskName(), preScheduledTask);
});
// A new workflow does not enter this code branch
for (Task pendingTask : pendingTasks) {
if (SystemTaskType.is(pendingTask.getTaskType()) && !pendingTask.getStatus().isTerminal()) {
tasksToBeScheduled.putIfAbsent(pendingTask.getReferenceTaskName(), pendingTask);
executedTaskRefNames.remove(pendingTask.getReferenceTaskName());
}
Optional<TaskDef> taskDefinition = pendingTask.getTaskDefinition();
if (!taskDefinition.isPresent()) {
taskDefinition = Optional.ofNullable(workflow.getWorkflowDefinition().getTaskByRefName(pendingTask.getReferenceTaskName())).map(WorkflowTask::getTaskDefinition);
}
if (taskDefinition.isPresent()) {
checkTaskTimeout(taskDefinition.get(), pendingTask);
checkTaskPollTimeout(taskDefinition.get(), pendingTask);
// If the task has not been updated for "responseTimeoutSeconds" then mark task as TIMED_OUT
if (isResponseTimedOut(taskDefinition.get(), pendingTask)) {
timeoutTask(taskDefinition.get(), pendingTask);
}
}
if (!pendingTask.getStatus().isSuccessful()) {
WorkflowTask workflowTask = pendingTask.getWorkflowTask();
if (workflowTask == null) {
workflowTask = workflow.getWorkflowDefinition().getTaskByRefName(pendingTask.getReferenceTaskName());
}
Optional<Task> retryTask = retry(taskDefinition.orElse(null), workflowTask, pendingTask, workflow);
if (retryTask.isPresent()) {
tasksToBeScheduled.put(retryTask.get().getReferenceTaskName(), retryTask.get());
executedTaskRefNames.remove(retryTask.get().getReferenceTaskName());
outcome.tasksToBeUpdated.add(pendingTask);
} else {
pendingTask.setStatus(COMPLETED_WITH_ERRORS);
}
}
if (!pendingTask.isExecuted() && !pendingTask.isRetried() && pendingTask.getStatus().isTerminal()) {
pendingTask.setExecuted(true);
List<Task> nextTasks = getNextTask(workflow, pendingTask);
if (pendingTask.isLoopOverTask() && !TaskType.DO_WHILE.name().equals(pendingTask.getTaskType()) && !nextTasks.isEmpty()) {
nextTasks = filterNextLoopOverTasks(nextTasks, pendingTask, workflow);
}
nextTasks.forEach(nextTask -> tasksToBeScheduled.putIfAbsent(nextTask.getReferenceTaskName(), nextTask));
outcome.tasksToBeUpdated.add(pendingTask);
LOGGER.debug("Scheduling Tasks from {}, next = {} for workflowId: {}", pendingTask.getTaskDefName(), nextTasks.stream().map(Task::getTaskDefName).collect(Collectors.toList()), workflow.getWorkflowId());
}
}
// All the tasks that need to scheduled are added to the outcome, in case of
List<Task> unScheduledTasks = tasksToBeScheduled.values().stream().filter(task -> !executedTaskRefNames.contains(task.getReferenceTaskName())).collect(Collectors.toList());
if (!unScheduledTasks.isEmpty()) {
LOGGER.debug("Scheduling Tasks: {} for workflow: {}", unScheduledTasks.stream().map(Task::getTaskDefName).collect(Collectors.toList()), workflow.getWorkflowId());
outcome.tasksToBeScheduled.addAll(unScheduledTasks);
}
if (containsSuccessfulTerminateTask.test(workflow) || (outcome.tasksToBeScheduled.isEmpty() && checkForWorkflowCompletion(workflow))) {
LOGGER.debug("Marking workflow: {} as complete.", workflow);
outcome.isComplete = true;
}
return outcome;
}
use of com.netflix.conductor.common.metadata.tasks.Task.Status.TIMED_OUT in project conductor by Netflix.
the class AbstractWorkflowServiceTest method testSimpleWorkflowWithResponseTimeout.
@Test
public void testSimpleWorkflowWithResponseTimeout() throws Exception {
createWFWithResponseTimeout();
String correlationId = "unit_test_1";
Map<String, Object> workflowInput = new HashMap<>();
String inputParam1 = "p1 value";
workflowInput.put("param1", inputParam1);
workflowInput.put("param2", "p2 value");
String workflowId = startOrLoadWorkflowExecution("RTOWF", 1, correlationId, workflowInput, null, null);
logger.debug("testSimpleWorkflowWithResponseTimeout.wfid=" + workflowId);
assertNotNull(workflowId);
Workflow workflow = workflowExecutionService.getExecutionStatus(workflowId, true);
assertNotNull(workflow);
assertEquals(RUNNING, workflow.getStatus());
// The very first task is the one that should be scheduled.
assertEquals(1, workflow.getTasks().size());
assertEquals(1, queueDAO.getSize("task_rt"));
// Polling for the first task should return the first task
Task task = workflowExecutionService.poll("task_rt", "task1.junit.worker.testTimeout");
assertNotNull(task);
assertEquals("task_rt", task.getTaskType());
assertTrue(workflowExecutionService.ackTaskReceived(task.getTaskId()));
assertEquals(workflowId, task.getWorkflowInstanceId());
// As the task_rt is out of the queue, the next poll should not get it
Task nullTask = workflowExecutionService.poll("task_rt", "task1.junit.worker.testTimeout");
assertNull(nullTask);
Thread.sleep(10000);
workflowExecutor.decide(workflowId);
assertEquals(1, queueDAO.getSize("task_rt"));
// The first task would be timed_out and a new task will be scheduled
workflow = workflowExecutionService.getExecutionStatus(workflowId, true);
assertNotNull(workflow);
assertEquals(RUNNING, workflow.getStatus());
assertEquals(2, workflow.getTasks().size());
assertTrue(workflow.getTasks().stream().allMatch(t -> t.getReferenceTaskName().equals("task_rt_t1")));
assertEquals(TIMED_OUT, workflow.getTasks().get(0).getStatus());
assertEquals(SCHEDULED, workflow.getTasks().get(1).getStatus());
// Polling now should get the same task back because it should have been put back in the queue
Task taskAgain = workflowExecutionService.poll("task_rt", "task1.junit.worker");
assertNotNull(taskAgain);
// update task with callback after seconds greater than the response timeout
taskAgain.setStatus(IN_PROGRESS);
taskAgain.setCallbackAfterSeconds(2);
workflowExecutionService.updateTask(taskAgain);
workflow = workflowExecutionService.getExecutionStatus(workflowId, true);
assertNotNull(workflow);
assertEquals(WorkflowStatus.RUNNING, workflow.getStatus());
assertEquals(2, workflow.getTasks().size());
assertEquals(SCHEDULED, workflow.getTasks().get(1).getStatus());
// wait for callback after seconds which is longer than response timeout seconds and then call decide
Thread.sleep(2010);
// Ensure unacks are processed.
queueDAO.processUnacks(taskAgain.getTaskDefName());
workflowExecutor.decide(workflowId);
workflow = workflowExecutionService.getExecutionStatus(workflowId, true);
assertNotNull(workflow);
// Poll for task again
taskAgain = workflowExecutionService.poll("task_rt", "task1.junit.worker");
assertNotNull(taskAgain);
taskAgain.getOutputData().put("op", "task1.Done");
taskAgain.setStatus(COMPLETED);
workflowExecutionService.updateTask(taskAgain);
// poll for next task
task = workflowExecutionService.poll("junit_task_2", "task2.junit.worker.testTimeout");
assertNotNull(task);
assertEquals("junit_task_2", task.getTaskType());
assertTrue(workflowExecutionService.ackTaskReceived(task.getTaskId()));
task.setStatus(COMPLETED);
task.setReasonForIncompletion("unit test failure");
workflowExecutionService.updateTask(task);
workflow = workflowExecutionService.getExecutionStatus(workflowId, true);
assertNotNull(workflow);
assertEquals(WorkflowStatus.COMPLETED, workflow.getStatus());
}
use of com.netflix.conductor.common.metadata.tasks.Task.Status.TIMED_OUT in project conductor by Netflix.
the class WorkflowExecutor method retry.
/**
* Gets the last instance of each failed task and reschedule each
* Gets all cancelled tasks and schedule all of them except JOIN (join should change status to INPROGRESS)
* Switch workflow back to RUNNING status and call decider.
*
* @param workflowId the id of the workflow to be retried
*/
public void retry(String workflowId, boolean resumeSubworkflowTasks) {
Workflow workflow = executionDAOFacade.getWorkflowById(workflowId, true);
if (!workflow.getStatus().isTerminal()) {
throw new ApplicationException(CONFLICT, "Workflow is still running. status=" + workflow.getStatus());
}
if (workflow.getTasks().isEmpty()) {
throw new ApplicationException(CONFLICT, "Workflow has not started yet");
}
if (resumeSubworkflowTasks) {
Optional<Task> lTask = workflow.getTasks().stream().filter(this::findLastFailedOrTimeOutTask).findFirst();
if (lTask.isPresent()) {
workflow = findLastFailedSubWorkflow(lTask.get(), workflow);
}
}
// Get all FAILED or CANCELED tasks that are not COMPLETED (or reach other terminal states) on further executions.
// // Eg: for Seq of tasks task1.CANCELED, task1.COMPLETED, task1 shouldn't be retried.
// Throw an exception if there are no FAILED tasks.
// Handle JOIN task CANCELED status as special case.
Map<String, Task> retriableMap = new HashMap<>();
for (Task task : workflow.getTasks()) {
switch(task.getStatus()) {
case FAILED:
case FAILED_WITH_TERMINAL_ERROR:
case TIMED_OUT:
retriableMap.put(task.getReferenceTaskName(), task);
break;
case CANCELED:
if (task.getTaskType().equalsIgnoreCase(TaskType.JOIN.toString()) || task.getTaskType().equalsIgnoreCase(TaskType.DO_WHILE.toString())) {
task.setStatus(IN_PROGRESS);
// Task doesn't have to be updated yet. Will be updated along with other Workflow tasks downstream.
} else {
retriableMap.put(task.getReferenceTaskName(), task);
}
break;
default:
retriableMap.remove(task.getReferenceTaskName());
break;
}
}
if (retriableMap.values().size() == 0) {
throw new ApplicationException(CONFLICT, "There are no retriable tasks! Use restart if you want to attempt entire workflow execution again.");
}
// Update Workflow with new status.
// This should load Workflow from archive, if archived.
workflow.setStatus(WorkflowStatus.RUNNING);
workflow.setLastRetriedTime(System.currentTimeMillis());
// Add to decider queue
queueDAO.push(DECIDER_QUEUE, workflow.getWorkflowId(), workflow.getPriority(), config.getSweepFrequency());
executionDAOFacade.updateWorkflow(workflow);
// taskToBeRescheduled would set task `retried` to true, and hence it's important to updateTasks after obtaining task copy from taskToBeRescheduled.
final Workflow finalWorkflow = workflow;
List<Task> retriableTasks = retriableMap.values().stream().sorted(Comparator.comparingInt(Task::getSeq)).map(task -> taskToBeRescheduled(finalWorkflow, task)).collect(Collectors.toList());
dedupAndAddTasks(workflow, retriableTasks);
// Note: updateTasks before updateWorkflow might fail when Workflow is archived and doesn't exist in primary store.
executionDAOFacade.updateTasks(workflow.getTasks());
scheduleTask(workflow, retriableTasks);
decide(workflowId);
if (resumeSubworkflowTasks) {
updateParentWorkflowRecursively(workflow);
} else if (StringUtils.isNotEmpty(workflow.getParentWorkflowId())) {
updateParentWorkflow(workflow);
decide(workflow.getParentWorkflowId());
}
}
Aggregations