Search in sources :

Example 6 with UnknownTaskException

use of org.ow2.proactive.scheduler.common.exception.UnknownTaskException in project scheduling by ow2-proactive.

the class LiveJobs method preemptTask.

TerminationData preemptTask(JobId jobId, String taskName, int restartDelay) throws UnknownJobException, UnknownTaskException {
    JobData jobData = lockJob(jobId);
    if (jobData == null) {
        throw new UnknownJobException(jobId);
    }
    try {
        InternalTask task = jobData.job.getTask(taskName);
        tlogger.info(task.getId(), "preempting task " + task.getId());
        if (!task.getStatus().isTaskAlive()) {
            tlogger.info(task.getId(), "task isn't alive: " + task.getStatus());
            return emptyResult(task.getId());
        }
        RunningTaskData taskData = runningTasksData.remove(TaskIdWrapper.wrap(task.getId()));
        if (taskData == null) {
            throw new IllegalStateException("Task " + task.getId() + " is not running.");
        }
        TaskResultImpl taskResult = taskResultCreator.getTaskResult(dbManager, jobData.job, task, new TaskPreemptedException("Preempted by admin"), new SimpleTaskLogs("", "Preempted by admin"));
        TerminationData terminationData = createAndFillTerminationData(taskResult, taskData, jobData.job, TerminationData.TerminationStatus.ABORTED);
        long waitTime = restartDelay * 1000L;
        restartTaskOnError(jobData, task, TaskStatus.PENDING, taskResult, waitTime, terminationData);
        return terminationData;
    } finally {
        jobData.unlock();
    }
}
Also used : TaskPreemptedException(org.ow2.proactive.scheduler.common.exception.TaskPreemptedException) SimpleTaskLogs(org.ow2.proactive.scheduler.common.task.SimpleTaskLogs) TaskResultImpl(org.ow2.proactive.scheduler.task.TaskResultImpl) UnknownJobException(org.ow2.proactive.scheduler.common.exception.UnknownJobException) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask)

Example 7 with UnknownTaskException

use of org.ow2.proactive.scheduler.common.exception.UnknownTaskException in project scheduling by ow2-proactive.

the class LiveJobs method taskTerminatedWithResult.

public TerminationData taskTerminatedWithResult(TaskId taskId, TaskResultImpl result) {
    JobData jobData = lockJob(taskId.getJobId());
    if (jobData == null) {
        return emptyResult(taskId);
    }
    try {
        InternalTask task;
        try {
            task = jobData.job.getTask(taskId);
        } catch (UnknownTaskException e) {
            logger.error("Unexpected exception", e);
            return emptyResult(taskId);
        }
        if (task.getStatus() != TaskStatus.RUNNING) {
            tlogger.info(taskId, "task isn't running anymore");
            return emptyResult(taskId);
        }
        TaskIdWrapper taskIdWrapper = TaskIdWrapper.wrap(taskId);
        RunningTaskData taskData = runningTasksData.remove(taskIdWrapper);
        if (taskData == null) {
            tlogger.info(taskId, "Task " + taskId + " terminates after a recovery of the scheduler");
            taskData = new RunningTaskData(task, jobData.job.getOwner(), jobData.job.getCredentials(), task.getExecuterInformation().getLauncher());
        }
        TerminationData terminationData = createAndFillTerminationData(result, taskData, jobData.job, TerminationData.TerminationStatus.NORMAL);
        boolean errorOccurred = result.hadException();
        tlogger.info(taskId, "finished with" + (errorOccurred ? "" : "out") + " errors");
        if (errorOccurred) {
            tlogger.error(taskId, "task has terminated with an error", result.getException());
            task.decreaseNumberOfExecutionLeft();
            boolean requiresPauseJobOnError = onErrorPolicyInterpreter.requiresPauseJobOnError(task);
            int numberOfExecutionLeft = task.getNumberOfExecutionLeft();
            if (numberOfExecutionLeft <= 0 && onErrorPolicyInterpreter.requiresCancelJobOnError(task)) {
                tlogger.info(taskId, "no retry left and task is tagged with cancel job on error");
                jobData.job.increaseNumberOfFaultyTasks(taskId);
                endJob(jobData, terminationData, task, result, "An error occurred in your task and the maximum number of executions has been reached. " + "You also ask to cancel the job in such a situation!", JobStatus.CANCELED);
                jlogger.info(taskId.getJobId(), "job has been canceled");
                return terminationData;
            } else if (numberOfExecutionLeft > 0) {
                tlogger.info(taskId, "number of execution left is " + numberOfExecutionLeft);
                if (onErrorPolicyInterpreter.requiresPauseTaskOnError(task) || requiresPauseJobOnError) {
                    long waitTime = jobData.job.getNextWaitingTime(task.getMaxNumberOfExecution() - numberOfExecutionLeft);
                    restartTaskOnError(jobData, task, TaskStatus.WAITING_ON_ERROR, result, waitTime, terminationData);
                    tlogger.info(taskId, "new restart is scheduled");
                    return terminationData;
                } else {
                    jobData.job.increaseNumberOfFaultyTasks(taskId);
                    long waitTime = jobData.job.getNextWaitingTime(task.getMaxNumberOfExecution() - numberOfExecutionLeft);
                    restartTaskOnError(jobData, task, TaskStatus.WAITING_ON_ERROR, result, waitTime, terminationData);
                    tlogger.info(taskId, "new restart is scheduled");
                    return terminationData;
                }
            } else if (numberOfExecutionLeft <= 0) {
                if (!onErrorPolicyInterpreter.requiresPauseTaskOnError(task) && !onErrorPolicyInterpreter.requiresPauseJobOnError(task) && !onErrorPolicyInterpreter.requiresCancelJobOnError(task)) {
                    jobData.job.increaseNumberOfFaultyTasks(taskId);
                    // remove the parent tasks results if task fails and job is canceled
                    task.removeParentTasksResults();
                } else if (onErrorPolicyInterpreter.requiresPauseTaskOnError(task)) {
                    suspendTaskOnError(jobData, task, result.getTaskDuration());
                    tlogger.info(taskId, "Task always contains errors after automatic restart, so it stays in In_Error state");
                    return terminationData;
                } else if (requiresPauseJobOnError) {
                    suspendTaskOnError(jobData, task, result.getTaskDuration());
                    pauseJob(task.getJobId());
                    logger.info("Task always contains errors after automatic restart, so Job is always paused on error");
                    return terminationData;
                }
                if (requiresPauseJobOnError) {
                    pauseJob(task.getJobId());
                }
            }
        } else {
            // remove the parent tasks results if task finished with no error
            task.removeParentTasksResults();
        }
        terminateTask(jobData, task, errorOccurred, result, terminationData);
        return terminationData;
    } finally {
        jobData.unlock();
    }
}
Also used : UnknownTaskException(org.ow2.proactive.scheduler.common.exception.UnknownTaskException) TaskIdWrapper(org.ow2.proactive.utils.TaskIdWrapper) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask)

Example 8 with UnknownTaskException

use of org.ow2.proactive.scheduler.common.exception.UnknownTaskException in project scheduling by ow2-proactive.

the class LiveJobs method restartWaitingTask.

void restartWaitingTask(TaskId taskId) {
    JobData jobData = lockJob(taskId.getJobId());
    if (jobData == null) {
        return;
    }
    try {
        InternalTask task = jobData.job.getTask(taskId);
        if (!task.getStatus().isTaskAlive()) {
            tlogger.warn(taskId, "task to be restarted isn't alive " + task.getStatus());
            return;
        }
        jobData.job.reStartTask(task);
    } catch (UnknownTaskException e) {
        logger.error("Unexpected exception", e);
    } finally {
        jobData.unlock();
    }
}
Also used : UnknownTaskException(org.ow2.proactive.scheduler.common.exception.UnknownTaskException) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask)

Example 9 with UnknownTaskException

use of org.ow2.proactive.scheduler.common.exception.UnknownTaskException in project scheduling by ow2-proactive.

the class LiveJobs method restartAllInErrorTasks.

public Boolean restartAllInErrorTasks(JobId jobId) {
    JobData jobData = lockJob(jobId);
    if (jobData == null) {
        return false;
    }
    try {
        InternalJob job = jobData.job;
        for (TaskState taskState : job.getTasks()) {
            try {
                restartInErrorTask(jobId, taskState.getName());
            } catch (UnknownTaskException e) {
                logger.error("", e);
                jlogger.error(jobId, "", e);
                tlogger.error(taskState.getId(), "", e);
            }
        }
        setJobStatusToInErrorIfNotPaused(job);
        dbManager.updateJobAndTasksState(job);
        updateJobInSchedulerState(job, SchedulerEvent.JOB_RESTARTED_FROM_ERROR);
        return Boolean.TRUE;
    } finally {
        jobData.unlock();
    }
}
Also used : UnknownTaskException(org.ow2.proactive.scheduler.common.exception.UnknownTaskException) InternalJob(org.ow2.proactive.scheduler.job.InternalJob) TaskState(org.ow2.proactive.scheduler.common.task.TaskState)

Example 10 with UnknownTaskException

use of org.ow2.proactive.scheduler.common.exception.UnknownTaskException in project scheduling by ow2-proactive.

the class LiveJobs method updateTasksInSchedulerState.

private void updateTasksInSchedulerState(InternalJob job, Set<TaskId> tasksToUpdate) {
    for (TaskId tid : tasksToUpdate) {
        try {
            InternalTask t = job.getTask(tid);
            TaskInfo ti = new TaskInfoImpl((TaskInfoImpl) t.getTaskInfo());
            listener.taskStateUpdated(job.getOwner(), new NotificationData<>(SchedulerEvent.TASK_RUNNING_TO_FINISHED, ti));
        } catch (UnknownTaskException e) {
            logger.error(e);
        }
    }
}
Also used : TaskInfo(org.ow2.proactive.scheduler.common.task.TaskInfo) UnknownTaskException(org.ow2.proactive.scheduler.common.exception.UnknownTaskException) TaskId(org.ow2.proactive.scheduler.common.task.TaskId) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) TaskInfoImpl(org.ow2.proactive.scheduler.task.TaskInfoImpl)

Aggregations

UnknownTaskException (org.ow2.proactive.scheduler.common.exception.UnknownTaskException)28 Test (org.junit.Test)27 JobId (org.ow2.proactive.scheduler.common.job.JobId)25 InternalTask (org.ow2.proactive.scheduler.task.internal.InternalTask)25 UnknownJobException (org.ow2.proactive.scheduler.common.exception.UnknownJobException)21 TaskId (org.ow2.proactive.scheduler.common.task.TaskId)21 TaskResultImpl (org.ow2.proactive.scheduler.task.TaskResultImpl)21 TaskResult (org.ow2.proactive.scheduler.common.task.TaskResult)18 InternalJob (org.ow2.proactive.scheduler.job.InternalJob)18 ArrayList (java.util.ArrayList)16 InternalScriptTask (org.ow2.proactive.scheduler.task.internal.InternalScriptTask)16 InternalTaskFlowJob (org.ow2.proactive.scheduler.job.InternalTaskFlowJob)14 JobIdImpl (org.ow2.proactive.scheduler.job.JobIdImpl)14 ExecuterInformation (org.ow2.proactive.scheduler.task.internal.ExecuterInformation)10 HashMap (java.util.HashMap)8 TaskState (org.ow2.proactive.scheduler.common.task.TaskState)8 SchedulerDBManager (org.ow2.proactive.scheduler.core.db.SchedulerDBManager)8 TaskInfoImpl (org.ow2.proactive.scheduler.task.TaskInfoImpl)8 List (java.util.List)7 NotConnectedException (org.ow2.proactive.scheduler.common.exception.NotConnectedException)7