use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.
the class LiveJobs method restartTaskOnNodeFailure.
private void restartTaskOnNodeFailure(InternalTask task, JobData jobData, TerminationData terminationData) {
final String errorMsg = "An error has occurred due to a node failure and the maximum amount of retries property has been reached.";
task.setProgress(0);
task.decreaseNumberOfExecutionOnFailureLeft();
tlogger.info(task.getId(), "number of retry on failure left " + task.getNumberOfExecutionOnFailureLeft());
InternalJob job = jobData.job;
if (task.getNumberOfExecutionOnFailureLeft() > 0) {
task.setStatus(TaskStatus.WAITING_ON_FAILURE);
job.newWaitingTask();
listener.taskStateUpdated(job.getOwner(), new NotificationData<TaskInfo>(SchedulerEvent.TASK_WAITING_FOR_RESTART, new TaskInfoImpl((TaskInfoImpl) task.getTaskInfo())));
job.reStartTask(task);
dbManager.taskRestarted(job, task, null);
tlogger.info(task.getId(), " is waiting for restart");
} else {
job.incrementNumberOfFailedTasksBy(1);
endJob(jobData, terminationData, task, null, errorMsg, JobStatus.FAILED);
}
}
use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.
the class LiveJobs method jobRecovered.
void jobRecovered(InternalJob job) {
jobs.put(job.getId(), new JobData(job));
for (InternalTask task : job.getITasks()) {
if (task.getStatus() == TaskStatus.RUNNING) {
logger.info("Recover task " + task.getId() + " (" + task.getName() + ") of job " + job.getId() + " (" + job.getName() + ")");
runningTasksData.put(TaskIdWrapper.wrap(task.getId()), new RunningTaskData(task, job.getOwner(), job.getCredentials(), task.getExecuterInformation().getLauncher()));
}
}
}
use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.
the class LiveJobs method restartTaskOnNodeFailure.
TerminationData restartTaskOnNodeFailure(InternalTask task) {
JobData jobData = lockJob(task.getJobId());
if (jobData == null) {
return emptyResult(task.getId());
}
try {
TaskId taskId = task.getId();
if (task.getStatus() != TaskStatus.RUNNING) {
return emptyResult(taskId);
}
RunningTaskData taskData = runningTasksData.remove(TaskIdWrapper.wrap(taskId));
if (taskData == null) {
throw new IllegalStateException("Task " + task.getId() + " is not running.");
}
TerminationData result = TerminationData.newTerminationData();
result.addTaskData(jobData.job, taskData, TerminationData.TerminationStatus.NODEFAILED, null);
restartTaskOnNodeFailure(task, jobData, result);
return result;
} finally {
jobData.unlock();
}
}
use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.
the class LiveJobs method updateTasksInSchedulerState.
private void updateTasksInSchedulerState(InternalJob job, Set<TaskId> tasksToUpdate) {
for (TaskId tid : tasksToUpdate) {
try {
InternalTask t = job.getTask(tid);
TaskInfo ti = new TaskInfoImpl((TaskInfoImpl) t.getTaskInfo());
listener.taskStateUpdated(job.getOwner(), new NotificationData<>(SchedulerEvent.TASK_RUNNING_TO_FINISHED, ti));
} catch (UnknownTaskException e) {
logger.error(e);
}
}
}
use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.
the class SchedulingMethodImpl method selectAndStartTasks.
private int selectAndStartTasks(Policy currentPolicy, Map<JobId, JobDescriptor> jobMap, Set<String> freeResources, LinkedList<EligibleTaskDescriptor> fullListOfTaskRetrievedFromPolicy) {
int numberOfTaskStarted = 0;
VariableBatchSizeIterator progressiveIterator = new VariableBatchSizeIterator(fullListOfTaskRetrievedFromPolicy);
while (progressiveIterator.hasMoreElements() && !freeResources.isEmpty()) {
LinkedList<EligibleTaskDescriptor> taskRetrievedFromPolicy = new LinkedList<>(progressiveIterator.getNextElements(freeResources.size()));
if (logger.isDebugEnabled()) {
loggingEligibleTasksDetails(fullListOfTaskRetrievedFromPolicy, taskRetrievedFromPolicy);
}
updateVariablesForTasksToSchedule(taskRetrievedFromPolicy);
for (EligibleTaskDescriptor etd : taskRetrievedFromPolicy) {
// load and Initialize the executable container
loadAndInit(((EligibleTaskDescriptorImpl) etd).getInternal());
}
while (!taskRetrievedFromPolicy.isEmpty()) {
if (freeResources.isEmpty()) {
break;
}
// get the next compatible tasks from the whole returned policy tasks
LinkedList<EligibleTaskDescriptor> tasksToSchedule = new LinkedList<>();
int neededResourcesNumber = 0;
while (!taskRetrievedFromPolicy.isEmpty() && neededResourcesNumber == 0) {
// the loop will search for next compatible task until it find something
neededResourcesNumber = getNextcompatibleTasks(jobMap, taskRetrievedFromPolicy, freeResources.size(), tasksToSchedule);
}
if (logger.isDebugEnabled()) {
logger.debug("tasksToSchedule : " + tasksToSchedule);
}
logger.debug("required number of nodes : " + neededResourcesNumber);
if (neededResourcesNumber == 0 || tasksToSchedule.isEmpty()) {
break;
}
NodeSet nodeSet = getRMNodes(jobMap, neededResourcesNumber, tasksToSchedule, freeResources);
if (nodeSet != null) {
freeResources.removeAll(nodeSet.getAllNodesUrls());
}
// start selected tasks
Node node = null;
InternalJob currentJob = null;
try {
while (nodeSet != null && !nodeSet.isEmpty()) {
EligibleTaskDescriptor taskDescriptor = tasksToSchedule.removeFirst();
currentJob = ((JobDescriptorImpl) jobMap.get(taskDescriptor.getJobId())).getInternal();
InternalTask internalTask = ((EligibleTaskDescriptorImpl) taskDescriptor).getInternal();
if (currentPolicy.isTaskExecutable(nodeSet, taskDescriptor)) {
// create launcher and try to start the task
node = nodeSet.get(0);
if (createExecution(nodeSet, node, currentJob, internalTask, taskDescriptor)) {
numberOfTaskStarted++;
}
}
// if every task that should be launched have been removed
if (tasksToSchedule.isEmpty()) {
// get back unused nodes to the RManager
if (!nodeSet.isEmpty()) {
releaseNodes(currentJob, nodeSet);
freeResources.addAll(nodeSet.getAllNodesUrls());
}
// and leave the loop
break;
}
}
} catch (ActiveObjectCreationException e1) {
// Something goes wrong with the active object creation (createLauncher)
logger.warn("An exception occured while creating the task launcher.", e1);
// so try to get back every remaining nodes to the resource manager
try {
releaseNodes(currentJob, nodeSet);
freeResources.addAll(nodeSet.getAllNodesUrls());
} catch (Exception e2) {
logger.info("Unable to get back the nodeSet to the RM", e2);
}
if (--activeObjectCreationRetryTimeNumber == 0) {
break;
}
} catch (Exception e1) {
// if we are here, it is that something append while launching the current task.
logger.warn("An exception occured while starting task.", e1);
// so try to get back every remaining nodes to the resource manager
try {
releaseNodes(currentJob, nodeSet);
freeResources.addAll(nodeSet.getAllNodesUrls());
} catch (Exception e2) {
logger.info("Unable to get back the nodeSet to the RM", e2);
}
}
}
if (freeResources.isEmpty()) {
break;
}
if (activeObjectCreationRetryTimeNumber == 0) {
break;
}
}
return numberOfTaskStarted;
}
Aggregations