use of org.ow2.proactive.scheduler.common.exception.UnknownTaskException in project scheduling by ow2-proactive.
the class LiveJobs method preemptTask.
TerminationData preemptTask(JobId jobId, String taskName, int restartDelay) throws UnknownJobException, UnknownTaskException {
JobData jobData = lockJob(jobId);
if (jobData == null) {
throw new UnknownJobException(jobId);
}
try {
InternalTask task = jobData.job.getTask(taskName);
tlogger.info(task.getId(), "preempting task " + task.getId());
if (!task.getStatus().isTaskAlive()) {
tlogger.info(task.getId(), "task isn't alive: " + task.getStatus());
return emptyResult(task.getId());
}
RunningTaskData taskData = runningTasksData.remove(TaskIdWrapper.wrap(task.getId()));
if (taskData == null) {
throw new IllegalStateException("Task " + task.getId() + " is not running.");
}
TaskResultImpl taskResult = taskResultCreator.getTaskResult(dbManager, jobData.job, task, new TaskPreemptedException("Preempted by admin"), new SimpleTaskLogs("", "Preempted by admin"));
TerminationData terminationData = createAndFillTerminationData(taskResult, taskData, jobData.job, TerminationData.TerminationStatus.ABORTED);
long waitTime = restartDelay * 1000L;
restartTaskOnError(jobData, task, TaskStatus.PENDING, taskResult, waitTime, terminationData);
return terminationData;
} finally {
jobData.unlock();
}
}
use of org.ow2.proactive.scheduler.common.exception.UnknownTaskException in project scheduling by ow2-proactive.
the class LiveJobs method taskTerminatedWithResult.
public TerminationData taskTerminatedWithResult(TaskId taskId, TaskResultImpl result) {
JobData jobData = lockJob(taskId.getJobId());
if (jobData == null) {
return emptyResult(taskId);
}
try {
InternalTask task;
try {
task = jobData.job.getTask(taskId);
} catch (UnknownTaskException e) {
logger.error("Unexpected exception", e);
return emptyResult(taskId);
}
if (task.getStatus() != TaskStatus.RUNNING) {
tlogger.info(taskId, "task isn't running anymore");
return emptyResult(taskId);
}
TaskIdWrapper taskIdWrapper = TaskIdWrapper.wrap(taskId);
RunningTaskData taskData = runningTasksData.remove(taskIdWrapper);
if (taskData == null) {
tlogger.info(taskId, "Task " + taskId + " terminates after a recovery of the scheduler");
taskData = new RunningTaskData(task, jobData.job.getOwner(), jobData.job.getCredentials(), task.getExecuterInformation().getLauncher());
}
TerminationData terminationData = createAndFillTerminationData(result, taskData, jobData.job, TerminationData.TerminationStatus.NORMAL);
boolean errorOccurred = result.hadException();
tlogger.info(taskId, "finished with" + (errorOccurred ? "" : "out") + " errors");
if (errorOccurred) {
tlogger.error(taskId, "task has terminated with an error", result.getException());
task.decreaseNumberOfExecutionLeft();
boolean requiresPauseJobOnError = onErrorPolicyInterpreter.requiresPauseJobOnError(task);
int numberOfExecutionLeft = task.getNumberOfExecutionLeft();
if (numberOfExecutionLeft <= 0 && onErrorPolicyInterpreter.requiresCancelJobOnError(task)) {
tlogger.info(taskId, "no retry left and task is tagged with cancel job on error");
jobData.job.increaseNumberOfFaultyTasks(taskId);
endJob(jobData, terminationData, task, result, "An error occurred in your task and the maximum number of executions has been reached. " + "You also ask to cancel the job in such a situation!", JobStatus.CANCELED);
jlogger.info(taskId.getJobId(), "job has been canceled");
return terminationData;
} else if (numberOfExecutionLeft > 0) {
tlogger.info(taskId, "number of execution left is " + numberOfExecutionLeft);
if (onErrorPolicyInterpreter.requiresPauseTaskOnError(task) || requiresPauseJobOnError) {
long waitTime = jobData.job.getNextWaitingTime(task.getMaxNumberOfExecution() - numberOfExecutionLeft);
restartTaskOnError(jobData, task, TaskStatus.WAITING_ON_ERROR, result, waitTime, terminationData);
tlogger.info(taskId, "new restart is scheduled");
return terminationData;
} else {
jobData.job.increaseNumberOfFaultyTasks(taskId);
long waitTime = jobData.job.getNextWaitingTime(task.getMaxNumberOfExecution() - numberOfExecutionLeft);
restartTaskOnError(jobData, task, TaskStatus.WAITING_ON_ERROR, result, waitTime, terminationData);
tlogger.info(taskId, "new restart is scheduled");
return terminationData;
}
} else if (numberOfExecutionLeft <= 0) {
if (!onErrorPolicyInterpreter.requiresPauseTaskOnError(task) && !onErrorPolicyInterpreter.requiresPauseJobOnError(task) && !onErrorPolicyInterpreter.requiresCancelJobOnError(task)) {
jobData.job.increaseNumberOfFaultyTasks(taskId);
// remove the parent tasks results if task fails and job is canceled
task.removeParentTasksResults();
} else if (onErrorPolicyInterpreter.requiresPauseTaskOnError(task)) {
suspendTaskOnError(jobData, task, result.getTaskDuration());
tlogger.info(taskId, "Task always contains errors after automatic restart, so it stays in In_Error state");
return terminationData;
} else if (requiresPauseJobOnError) {
suspendTaskOnError(jobData, task, result.getTaskDuration());
pauseJob(task.getJobId());
logger.info("Task always contains errors after automatic restart, so Job is always paused on error");
return terminationData;
}
if (requiresPauseJobOnError) {
pauseJob(task.getJobId());
}
}
} else {
// remove the parent tasks results if task finished with no error
task.removeParentTasksResults();
}
terminateTask(jobData, task, errorOccurred, result, terminationData);
return terminationData;
} finally {
jobData.unlock();
}
}
use of org.ow2.proactive.scheduler.common.exception.UnknownTaskException in project scheduling by ow2-proactive.
the class LiveJobs method restartWaitingTask.
void restartWaitingTask(TaskId taskId) {
JobData jobData = lockJob(taskId.getJobId());
if (jobData == null) {
return;
}
try {
InternalTask task = jobData.job.getTask(taskId);
if (!task.getStatus().isTaskAlive()) {
tlogger.warn(taskId, "task to be restarted isn't alive " + task.getStatus());
return;
}
jobData.job.reStartTask(task);
} catch (UnknownTaskException e) {
logger.error("Unexpected exception", e);
} finally {
jobData.unlock();
}
}
use of org.ow2.proactive.scheduler.common.exception.UnknownTaskException in project scheduling by ow2-proactive.
the class LiveJobs method restartAllInErrorTasks.
public Boolean restartAllInErrorTasks(JobId jobId) {
JobData jobData = lockJob(jobId);
if (jobData == null) {
return false;
}
try {
InternalJob job = jobData.job;
for (TaskState taskState : job.getTasks()) {
try {
restartInErrorTask(jobId, taskState.getName());
} catch (UnknownTaskException e) {
logger.error("", e);
jlogger.error(jobId, "", e);
tlogger.error(taskState.getId(), "", e);
}
}
setJobStatusToInErrorIfNotPaused(job);
dbManager.updateJobAndTasksState(job);
updateJobInSchedulerState(job, SchedulerEvent.JOB_RESTARTED_FROM_ERROR);
return Boolean.TRUE;
} finally {
jobData.unlock();
}
}
use of org.ow2.proactive.scheduler.common.exception.UnknownTaskException in project scheduling by ow2-proactive.
the class LiveJobs method updateTasksInSchedulerState.
private void updateTasksInSchedulerState(InternalJob job, Set<TaskId> tasksToUpdate) {
for (TaskId tid : tasksToUpdate) {
try {
InternalTask t = job.getTask(tid);
TaskInfo ti = new TaskInfoImpl((TaskInfoImpl) t.getTaskInfo());
listener.taskStateUpdated(job.getOwner(), new NotificationData<>(SchedulerEvent.TASK_RUNNING_TO_FINISHED, ti));
} catch (UnknownTaskException e) {
logger.error(e);
}
}
}
Aggregations