Search in sources :

Example 76 with Task

use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.

the class LiveJobs method restartWaitingTask.

void restartWaitingTask(TaskId taskId) {
    JobData jobData = lockJob(taskId.getJobId());
    if (jobData == null) {
        return;
    }
    try {
        InternalTask task = jobData.job.getTask(taskId);
        if (!task.getStatus().isTaskAlive()) {
            tlogger.warn(taskId, "task to be restarted isn't alive " + task.getStatus());
            return;
        }
        jobData.job.reStartTask(task);
    } catch (UnknownTaskException e) {
        logger.error("Unexpected exception", e);
    } finally {
        jobData.unlock();
    }
}
Also used : UnknownTaskException(org.ow2.proactive.scheduler.common.exception.UnknownTaskException) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask)

Example 77 with Task

use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.

the class LiveJobs method suspendTaskOnError.

private void suspendTaskOnError(JobData jobData, InternalTask task, long taskDuration) {
    InternalJob job = jobData.job;
    job.setInErrorTime(System.currentTimeMillis());
    job.setTaskPausedOnError(task);
    setJobStatusToInErrorIfNotPaused(job);
    job.incrementNumberOfInErrorTasksBy(1);
    task.setInErrorTime(task.getStartTime() + taskDuration);
    dbManager.updateJobAndTasksState(job);
    updateTaskPausedOnerrorState(job, task.getId());
    updateJobInSchedulerState(job, SchedulerEvent.JOB_IN_ERROR);
}
Also used : InternalJob(org.ow2.proactive.scheduler.job.InternalJob)

Example 78 with Task

use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.

the class LiveJobs method restartTaskOnNodeFailure.

private void restartTaskOnNodeFailure(InternalTask task, JobData jobData, TerminationData terminationData) {
    final String errorMsg = "An error has occurred due to a node failure and the maximum amount of retries property has been reached.";
    task.setProgress(0);
    task.decreaseNumberOfExecutionOnFailureLeft();
    tlogger.info(task.getId(), "number of retry on failure left " + task.getNumberOfExecutionOnFailureLeft());
    InternalJob job = jobData.job;
    if (task.getNumberOfExecutionOnFailureLeft() > 0) {
        task.setStatus(TaskStatus.WAITING_ON_FAILURE);
        job.newWaitingTask();
        listener.taskStateUpdated(job.getOwner(), new NotificationData<TaskInfo>(SchedulerEvent.TASK_WAITING_FOR_RESTART, new TaskInfoImpl((TaskInfoImpl) task.getTaskInfo())));
        job.reStartTask(task);
        dbManager.taskRestarted(job, task, null);
        tlogger.info(task.getId(), " is waiting for restart");
    } else {
        job.incrementNumberOfFailedTasksBy(1);
        endJob(jobData, terminationData, task, null, errorMsg, JobStatus.FAILED);
    }
}
Also used : TaskInfo(org.ow2.proactive.scheduler.common.task.TaskInfo) InternalJob(org.ow2.proactive.scheduler.job.InternalJob) TaskInfoImpl(org.ow2.proactive.scheduler.task.TaskInfoImpl)

Example 79 with Task

use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.

the class LiveJobs method jobRecovered.

void jobRecovered(InternalJob job) {
    jobs.put(job.getId(), new JobData(job));
    for (InternalTask task : job.getITasks()) {
        if (task.getStatus() == TaskStatus.RUNNING) {
            logger.info("Recover task " + task.getId() + " (" + task.getName() + ") of job " + job.getId() + " (" + job.getName() + ")");
            runningTasksData.put(TaskIdWrapper.wrap(task.getId()), new RunningTaskData(task, job.getOwner(), job.getCredentials(), task.getExecuterInformation().getLauncher()));
        }
    }
}
Also used : InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask)

Example 80 with Task

use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.

the class LiveJobs method restartTaskOnNodeFailure.

TerminationData restartTaskOnNodeFailure(InternalTask task) {
    JobData jobData = lockJob(task.getJobId());
    if (jobData == null) {
        return emptyResult(task.getId());
    }
    try {
        TaskId taskId = task.getId();
        if (task.getStatus() != TaskStatus.RUNNING) {
            return emptyResult(taskId);
        }
        RunningTaskData taskData = runningTasksData.remove(TaskIdWrapper.wrap(taskId));
        if (taskData == null) {
            throw new IllegalStateException("Task " + task.getId() + " is not running.");
        }
        TerminationData result = TerminationData.newTerminationData();
        result.addTaskData(jobData.job, taskData, TerminationData.TerminationStatus.NODEFAILED, null);
        restartTaskOnNodeFailure(task, jobData, result);
        return result;
    } finally {
        jobData.unlock();
    }
}
Also used : TaskId(org.ow2.proactive.scheduler.common.task.TaskId)

Aggregations

Test (org.junit.Test)152 InternalTask (org.ow2.proactive.scheduler.task.internal.InternalTask)112 TaskFlowJob (org.ow2.proactive.scheduler.common.job.TaskFlowJob)100 JobId (org.ow2.proactive.scheduler.common.job.JobId)73 InternalJob (org.ow2.proactive.scheduler.job.InternalJob)69 JavaTask (org.ow2.proactive.scheduler.common.task.JavaTask)68 TaskResult (org.ow2.proactive.scheduler.common.task.TaskResult)60 ArrayList (java.util.ArrayList)49 TaskId (org.ow2.proactive.scheduler.common.task.TaskId)48 TaskResultImpl (org.ow2.proactive.scheduler.task.TaskResultImpl)43 Task (org.ow2.proactive.scheduler.common.task.Task)41 JobIdImpl (org.ow2.proactive.scheduler.job.JobIdImpl)41 File (java.io.File)37 Scheduler (org.ow2.proactive.scheduler.common.Scheduler)34 HashMap (java.util.HashMap)33 UnknownJobException (org.ow2.proactive.scheduler.common.exception.UnknownJobException)33 SimpleScript (org.ow2.proactive.scripting.SimpleScript)33 InternalScriptTask (org.ow2.proactive.scheduler.task.internal.InternalScriptTask)27 TaskScript (org.ow2.proactive.scripting.TaskScript)27 UnknownTaskException (org.ow2.proactive.scheduler.common.exception.UnknownTaskException)26