use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.
the class LiveJobs method restartWaitingTask.
void restartWaitingTask(TaskId taskId) {
JobData jobData = lockJob(taskId.getJobId());
if (jobData == null) {
return;
}
try {
InternalTask task = jobData.job.getTask(taskId);
if (!task.getStatus().isTaskAlive()) {
tlogger.warn(taskId, "task to be restarted isn't alive " + task.getStatus());
return;
}
jobData.job.reStartTask(task);
} catch (UnknownTaskException e) {
logger.error("Unexpected exception", e);
} finally {
jobData.unlock();
}
}
use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.
the class LiveJobs method suspendTaskOnError.
private void suspendTaskOnError(JobData jobData, InternalTask task, long taskDuration) {
InternalJob job = jobData.job;
job.setInErrorTime(System.currentTimeMillis());
job.setTaskPausedOnError(task);
setJobStatusToInErrorIfNotPaused(job);
job.incrementNumberOfInErrorTasksBy(1);
task.setInErrorTime(task.getStartTime() + taskDuration);
dbManager.updateJobAndTasksState(job);
updateTaskPausedOnerrorState(job, task.getId());
updateJobInSchedulerState(job, SchedulerEvent.JOB_IN_ERROR);
}
use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.
the class LiveJobs method restartTaskOnNodeFailure.
private void restartTaskOnNodeFailure(InternalTask task, JobData jobData, TerminationData terminationData) {
final String errorMsg = "An error has occurred due to a node failure and the maximum amount of retries property has been reached.";
task.setProgress(0);
task.decreaseNumberOfExecutionOnFailureLeft();
tlogger.info(task.getId(), "number of retry on failure left " + task.getNumberOfExecutionOnFailureLeft());
InternalJob job = jobData.job;
if (task.getNumberOfExecutionOnFailureLeft() > 0) {
task.setStatus(TaskStatus.WAITING_ON_FAILURE);
job.newWaitingTask();
listener.taskStateUpdated(job.getOwner(), new NotificationData<TaskInfo>(SchedulerEvent.TASK_WAITING_FOR_RESTART, new TaskInfoImpl((TaskInfoImpl) task.getTaskInfo())));
job.reStartTask(task);
dbManager.taskRestarted(job, task, null);
tlogger.info(task.getId(), " is waiting for restart");
} else {
job.incrementNumberOfFailedTasksBy(1);
endJob(jobData, terminationData, task, null, errorMsg, JobStatus.FAILED);
}
}
use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.
the class LiveJobs method jobRecovered.
void jobRecovered(InternalJob job) {
jobs.put(job.getId(), new JobData(job));
for (InternalTask task : job.getITasks()) {
if (task.getStatus() == TaskStatus.RUNNING) {
logger.info("Recover task " + task.getId() + " (" + task.getName() + ") of job " + job.getId() + " (" + job.getName() + ")");
runningTasksData.put(TaskIdWrapper.wrap(task.getId()), new RunningTaskData(task, job.getOwner(), job.getCredentials(), task.getExecuterInformation().getLauncher()));
}
}
}
use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.
the class LiveJobs method restartTaskOnNodeFailure.
TerminationData restartTaskOnNodeFailure(InternalTask task) {
JobData jobData = lockJob(task.getJobId());
if (jobData == null) {
return emptyResult(task.getId());
}
try {
TaskId taskId = task.getId();
if (task.getStatus() != TaskStatus.RUNNING) {
return emptyResult(taskId);
}
RunningTaskData taskData = runningTasksData.remove(TaskIdWrapper.wrap(taskId));
if (taskData == null) {
throw new IllegalStateException("Task " + task.getId() + " is not running.");
}
TerminationData result = TerminationData.newTerminationData();
result.addTaskData(jobData.job, taskData, TerminationData.TerminationStatus.NODEFAILED, null);
restartTaskOnNodeFailure(task, jobData, result);
return result;
} finally {
jobData.unlock();
}
}
Aggregations