use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.
the class LiveJobs method preemptTask.
TerminationData preemptTask(JobId jobId, String taskName, int restartDelay) throws UnknownJobException, UnknownTaskException {
JobData jobData = lockJob(jobId);
if (jobData == null) {
throw new UnknownJobException(jobId);
}
try {
InternalTask task = jobData.job.getTask(taskName);
tlogger.info(task.getId(), "preempting task " + task.getId());
if (!task.getStatus().isTaskAlive()) {
tlogger.info(task.getId(), "task isn't alive: " + task.getStatus());
return emptyResult(task.getId());
}
RunningTaskData taskData = runningTasksData.remove(TaskIdWrapper.wrap(task.getId()));
if (taskData == null) {
throw new IllegalStateException("Task " + task.getId() + " is not running.");
}
TaskResultImpl taskResult = taskResultCreator.getTaskResult(dbManager, jobData.job, task, new TaskPreemptedException("Preempted by admin"), new SimpleTaskLogs("", "Preempted by admin"));
TerminationData terminationData = createAndFillTerminationData(taskResult, taskData, jobData.job, TerminationData.TerminationStatus.ABORTED);
long waitTime = restartDelay * 1000L;
restartTaskOnError(jobData, task, TaskStatus.PENDING, taskResult, waitTime, terminationData);
return terminationData;
} finally {
jobData.unlock();
}
}
use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.
the class LiveJobs method taskTerminatedWithResult.
public TerminationData taskTerminatedWithResult(TaskId taskId, TaskResultImpl result) {
JobData jobData = lockJob(taskId.getJobId());
if (jobData == null) {
return emptyResult(taskId);
}
try {
InternalTask task;
try {
task = jobData.job.getTask(taskId);
} catch (UnknownTaskException e) {
logger.error("Unexpected exception", e);
return emptyResult(taskId);
}
if (task.getStatus() != TaskStatus.RUNNING) {
tlogger.info(taskId, "task isn't running anymore");
return emptyResult(taskId);
}
TaskIdWrapper taskIdWrapper = TaskIdWrapper.wrap(taskId);
RunningTaskData taskData = runningTasksData.remove(taskIdWrapper);
if (taskData == null) {
tlogger.info(taskId, "Task " + taskId + " terminates after a recovery of the scheduler");
taskData = new RunningTaskData(task, jobData.job.getOwner(), jobData.job.getCredentials(), task.getExecuterInformation().getLauncher());
}
TerminationData terminationData = createAndFillTerminationData(result, taskData, jobData.job, TerminationData.TerminationStatus.NORMAL);
boolean errorOccurred = result.hadException();
tlogger.info(taskId, "finished with" + (errorOccurred ? "" : "out") + " errors");
if (errorOccurred) {
tlogger.error(taskId, "task has terminated with an error", result.getException());
task.decreaseNumberOfExecutionLeft();
boolean requiresPauseJobOnError = onErrorPolicyInterpreter.requiresPauseJobOnError(task);
int numberOfExecutionLeft = task.getNumberOfExecutionLeft();
if (numberOfExecutionLeft <= 0 && onErrorPolicyInterpreter.requiresCancelJobOnError(task)) {
tlogger.info(taskId, "no retry left and task is tagged with cancel job on error");
jobData.job.increaseNumberOfFaultyTasks(taskId);
endJob(jobData, terminationData, task, result, "An error occurred in your task and the maximum number of executions has been reached. " + "You also ask to cancel the job in such a situation!", JobStatus.CANCELED);
jlogger.info(taskId.getJobId(), "job has been canceled");
return terminationData;
} else if (numberOfExecutionLeft > 0) {
tlogger.info(taskId, "number of execution left is " + numberOfExecutionLeft);
if (onErrorPolicyInterpreter.requiresPauseTaskOnError(task) || requiresPauseJobOnError) {
long waitTime = jobData.job.getNextWaitingTime(task.getMaxNumberOfExecution() - numberOfExecutionLeft);
restartTaskOnError(jobData, task, TaskStatus.WAITING_ON_ERROR, result, waitTime, terminationData);
tlogger.info(taskId, "new restart is scheduled");
return terminationData;
} else {
jobData.job.increaseNumberOfFaultyTasks(taskId);
long waitTime = jobData.job.getNextWaitingTime(task.getMaxNumberOfExecution() - numberOfExecutionLeft);
restartTaskOnError(jobData, task, TaskStatus.WAITING_ON_ERROR, result, waitTime, terminationData);
tlogger.info(taskId, "new restart is scheduled");
return terminationData;
}
} else if (numberOfExecutionLeft <= 0) {
if (!onErrorPolicyInterpreter.requiresPauseTaskOnError(task) && !onErrorPolicyInterpreter.requiresPauseJobOnError(task) && !onErrorPolicyInterpreter.requiresCancelJobOnError(task)) {
jobData.job.increaseNumberOfFaultyTasks(taskId);
// remove the parent tasks results if task fails and job is canceled
task.removeParentTasksResults();
} else if (onErrorPolicyInterpreter.requiresPauseTaskOnError(task)) {
suspendTaskOnError(jobData, task, result.getTaskDuration());
tlogger.info(taskId, "Task always contains errors after automatic restart, so it stays in In_Error state");
return terminationData;
} else if (requiresPauseJobOnError) {
suspendTaskOnError(jobData, task, result.getTaskDuration());
pauseJob(task.getJobId());
logger.info("Task always contains errors after automatic restart, so Job is always paused on error");
return terminationData;
}
if (requiresPauseJobOnError) {
pauseJob(task.getJobId());
}
}
} else {
// remove the parent tasks results if task finished with no error
task.removeParentTasksResults();
}
terminateTask(jobData, task, errorOccurred, result, terminationData);
return terminationData;
} finally {
jobData.unlock();
}
}
use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.
the class LiveJobs method endJob.
private void endJob(JobData jobData, TerminationData terminationData, InternalTask task, TaskResultImpl taskResult, String errorMsg, JobStatus jobStatus) {
JobId jobId = jobData.job.getId();
jobs.remove(jobId);
terminationData.addJobToTerminate(jobId);
InternalJob job = jobData.job;
SchedulerEvent event;
if (job.getStatus() == JobStatus.PENDING) {
event = SchedulerEvent.JOB_PENDING_TO_FINISHED;
} else {
event = SchedulerEvent.JOB_RUNNING_TO_FINISHED;
}
if (task != null) {
jlogger.info(job.getId(), "ending request caused by task " + task.getId());
} else {
jlogger.info(job.getId(), "ending request");
}
for (Iterator<RunningTaskData> i = runningTasksData.values().iterator(); i.hasNext(); ) {
RunningTaskData taskData = i.next();
if (taskData.getTask().getJobId().equals(jobId)) {
i.remove();
// remove previous read progress
taskData.getTask().setProgress(0);
terminationData.addTaskData(job, taskData, TerminationData.TerminationStatus.ABORTED, taskResult);
}
}
// if job has been killed
if (jobStatus == JobStatus.KILLED) {
Set<TaskId> tasksToUpdate = job.failed(null, jobStatus);
dbManager.updateAfterJobKilled(job, tasksToUpdate);
updateTasksInSchedulerState(job, tasksToUpdate);
} else {
// finished state (failed/canceled)
if (jobStatus != JobStatus.FINISHED) {
Set<TaskId> tasksToUpdate = job.failed(task.getId(), jobStatus);
// store the exception into jobResult / To prevent from empty
// task result (when job canceled), create one
boolean noResult = (jobStatus == JobStatus.CANCELED && taskResult == null);
if (jobStatus == JobStatus.FAILED || noResult) {
taskResult = new TaskResultImpl(task.getId(), new Exception(errorMsg), new SimpleTaskLogs("", errorMsg), -1);
}
dbManager.updateAfterJobFailed(job, task, taskResult, tasksToUpdate);
updateTasksInSchedulerState(job, tasksToUpdate);
}
}
// update job and tasks events list and send it to front-end
updateJobInSchedulerState(job, event);
jlogger.info(job.getId(), "finished (" + jobStatus + ")");
}
use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.
the class LiveJobs method restartWaitingTask.
void restartWaitingTask(TaskId taskId) {
JobData jobData = lockJob(taskId.getJobId());
if (jobData == null) {
return;
}
try {
InternalTask task = jobData.job.getTask(taskId);
if (!task.getStatus().isTaskAlive()) {
tlogger.warn(taskId, "task to be restarted isn't alive " + task.getStatus());
return;
}
jobData.job.reStartTask(task);
} catch (UnknownTaskException e) {
logger.error("Unexpected exception", e);
} finally {
jobData.unlock();
}
}
use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.
the class LiveJobs method suspendTaskOnError.
private void suspendTaskOnError(JobData jobData, InternalTask task, long taskDuration) {
InternalJob job = jobData.job;
job.setInErrorTime(System.currentTimeMillis());
job.setTaskPausedOnError(task);
setJobStatusToInErrorIfNotPaused(job);
job.incrementNumberOfInErrorTasksBy(1);
task.setInErrorTime(task.getStartTime() + taskDuration);
dbManager.updateJobAndTasksState(job);
updateTaskPausedOnerrorState(job, task.getId());
updateJobInSchedulerState(job, SchedulerEvent.JOB_IN_ERROR);
}
Aggregations