Search in sources :

Example 1 with TaskRestartedException

use of org.ow2.proactive.scheduler.common.exception.TaskRestartedException in project scheduling by ow2-proactive.

the class TestPreemptRestartKillTask method TestPreemtRestartKillTask.

private void TestPreemtRestartKillTask(String jobDescriptorPath) throws Exception {
    log("Submitting job");
    log(schedulerHelper.getSchedulerInterface().getClass().toString());
    schedulerHelper.addExtraNodes(3);
    JobId id = schedulerHelper.submitJob(jobDescriptorPath);
    log("Wait for event job submitted");
    schedulerHelper.waitForEventJobSubmitted(id);
    log("Wait for event t1 running");
    schedulerHelper.waitForEventTaskRunning(id, "t1");
    log("Wait for event t2 running");
    schedulerHelper.waitForEventTaskRunning(id, "t2");
    log("Wait for event t3 running");
    schedulerHelper.waitForEventTaskRunning(id, "t3");
    log("Wait for event t4 running");
    schedulerHelper.waitForEventTaskRunning(id, "t4");
    log("Preempt t1");
    schedulerHelper.getSchedulerInterface().preemptTask(id, "t1", 1);
    log("Wait for event t1 waiting for restart");
    // running jobs list must have only one job, task t1 must have number of execution to 0
    TaskInfo ti1 = schedulerHelper.waitForEventTaskWaitingForRestart(id, "t1");
    // task result for t1 must be available with TaskPreemptedException
    TaskResult tr1 = schedulerHelper.getSchedulerInterface().getTaskResult(id, "t1");
    log("Restart t2");
    schedulerHelper.getSchedulerInterface().restartTask(id, "t2", 1);
    log("Wait for event t2 waiting for restart");
    // running jobs list must have only one job, task t2 must have number of execution to 1
    TaskInfo ti2 = schedulerHelper.waitForEventTaskWaitingForRestart(id, "t2");
    // task result for t2 must be available with TaskRestartedException
    TaskResult tr2 = schedulerHelper.getSchedulerInterface().getTaskResult(id, "t2");
    log("Wait for event t2 running");
    schedulerHelper.waitForEventTaskRunning(id, "t2");
    log("Restart t2 again");
    schedulerHelper.getSchedulerInterface().restartTask(id, "t2", 1);
    log("Wait for event t2 waiting for restart again");
    // running jobs list must have only one job, task t2 must have number of execution to 2
    TaskInfo ti3 = schedulerHelper.waitForEventTaskWaitingForRestart(id, "t2");
    // task result for t2 must be available with TaskRestartedException
    TaskResult tr3 = schedulerHelper.getSchedulerInterface().getTaskResult(id, "t2");
    // ensure every tasks are running at this point
    schedulerHelper.waitForEventTaskRunning(id, "t1");
    schedulerHelper.waitForEventTaskRunning(id, "t2");
    log("Kill t3");
    schedulerHelper.getSchedulerInterface().killTask(id, "t3");
    log("Wait for event t3 finished");
    schedulerHelper.waitForEventTaskFinished(id, "t3");
    // task result for t3 must be available with TaskRestartedException
    TaskResult tr4 = schedulerHelper.getSchedulerInterface().getTaskResult(id, "t3");
    log("Kill t4");
    schedulerHelper.getSchedulerInterface().killTask(id, "t4");
    log("Wait for event job finished");
    // finished jobs list must have only one job
    JobInfo ji4 = schedulerHelper.waitForEventJobFinished(id);
    // task result for t4 must be TaskRestartedException
    JobState j4 = schedulerHelper.getSchedulerInterface().getJobState(id);
    TaskResult tr5 = schedulerHelper.getSchedulerInterface().getTaskResult(id, "t4");
    // check result j1
    assertEquals(2, ti1.getNumberOfExecutionLeft());
    assertTrue(tr1.getException() instanceof TaskPreemptedException);
    // check result j2
    assertEquals(3, ti2.getNumberOfExecutionLeft());
    assertTrue(tr2.getException() instanceof TaskRestartedException);
    // check result j3
    assertEquals(2, ti3.getNumberOfExecutionLeft());
    assertTrue(tr3.getException() instanceof TaskRestartedException);
    // check result tr4
    assertTrue(tr4.getException() instanceof TaskAbortedException);
    // check result j4
    assertEquals(JobStatus.CANCELED, ji4.getStatus());
    TaskStatus t1Status = getTask(j4, "t1").getStatus();
    assertTrue(t1Status.equals(TaskStatus.ABORTED) || t1Status.equals(TaskStatus.NOT_RESTARTED));
    TaskStatus t2Status = getTask(j4, "t2").getStatus();
    assertTrue(t2Status.equals(TaskStatus.ABORTED) || t2Status.equals(TaskStatus.NOT_RESTARTED));
    assertEquals(TaskStatus.FAULTY, getTask(j4, "t3").getStatus());
    assertEquals(TaskStatus.FAULTY, getTask(j4, "t4").getStatus());
    // check result tr5
    // 
    assertTrue(tr5.getException() instanceof Exception);
}
Also used : TaskInfo(org.ow2.proactive.scheduler.common.task.TaskInfo) TaskPreemptedException(org.ow2.proactive.scheduler.common.exception.TaskPreemptedException) JobInfo(org.ow2.proactive.scheduler.common.job.JobInfo) TaskRestartedException(org.ow2.proactive.scheduler.common.exception.TaskRestartedException) TaskResult(org.ow2.proactive.scheduler.common.task.TaskResult) JobState(org.ow2.proactive.scheduler.common.job.JobState) TaskStatus(org.ow2.proactive.scheduler.common.task.TaskStatus) TaskAbortedException(org.ow2.proactive.scheduler.common.exception.TaskAbortedException) JobId(org.ow2.proactive.scheduler.common.job.JobId) TaskAbortedException(org.ow2.proactive.scheduler.common.exception.TaskAbortedException) TaskPreemptedException(org.ow2.proactive.scheduler.common.exception.TaskPreemptedException) TaskRestartedException(org.ow2.proactive.scheduler.common.exception.TaskRestartedException)

Example 2 with TaskRestartedException

use of org.ow2.proactive.scheduler.common.exception.TaskRestartedException in project scheduling by ow2-proactive.

the class TestPreemptRestartKillTaskSchema33 method TestPreemtRestartKillTask.

private void TestPreemtRestartKillTask(String jobDescriptorPath) throws Exception {
    log("Submitting job");
    schedulerHelper.addExtraNodes(3);
    JobId id = schedulerHelper.submitJob(jobDescriptorPath);
    log("Wait for event job submitted");
    schedulerHelper.waitForEventJobSubmitted(id);
    log("Wait for event t1 running");
    schedulerHelper.waitForEventTaskRunning(id, "t1");
    log("Wait for event t2 running");
    schedulerHelper.waitForEventTaskRunning(id, "t2");
    log("Wait for event t3 running");
    schedulerHelper.waitForEventTaskRunning(id, "t3");
    log("Wait for event t4 running");
    schedulerHelper.waitForEventTaskRunning(id, "t4");
    log("Preempt t1");
    schedulerHelper.getSchedulerInterface().preemptTask(id, "t1", 1);
    log("Wait for event t1 waiting for restart");
    // running jobs list must have only one job, task t1 must have number of execution to 0
    TaskInfo ti1 = schedulerHelper.waitForEventTaskWaitingForRestart(id, "t1");
    // task result for t1 must be available with TaskPreemptedException
    TaskResult tr1 = schedulerHelper.getSchedulerInterface().getTaskResult(id, "t1");
    log("Restart t2");
    schedulerHelper.getSchedulerInterface().restartTask(id, "t2", 1);
    log("Wait for event t2 waiting for restart");
    // running jobs list must have only one job, task t2 must have number of execution to 1
    TaskInfo ti2 = schedulerHelper.waitForEventTaskWaitingForRestart(id, "t2");
    // task result for t2 must be available with TaskRestartedException
    TaskResult tr2 = schedulerHelper.getSchedulerInterface().getTaskResult(id, "t2");
    log("Wait for event t2 running");
    schedulerHelper.waitForEventTaskRunning(id, "t2");
    log("Restart t2 again");
    schedulerHelper.getSchedulerInterface().restartTask(id, "t2", 1);
    log("Wait for event t2 waiting for restart again");
    // running jobs list must have only one job, task t2 must have number of execution to 2
    TaskInfo ti3 = schedulerHelper.waitForEventTaskWaitingForRestart(id, "t2");
    // task result for t2 must be available with TaskRestartedException
    TaskResult tr3 = schedulerHelper.getSchedulerInterface().getTaskResult(id, "t2");
    // ensure every tasks are running at this point
    schedulerHelper.waitForEventTaskRunning(id, "t1");
    schedulerHelper.waitForEventTaskRunning(id, "t2");
    log("Kill t3");
    schedulerHelper.getSchedulerInterface().killTask(id, "t3");
    log("Wait for event t3 finished");
    schedulerHelper.waitForEventTaskFinished(id, "t3");
    // task result for t3 must be available with TaskRestartedException
    TaskResult tr4 = schedulerHelper.getSchedulerInterface().getTaskResult(id, "t3");
    log("Kill t4");
    schedulerHelper.getSchedulerInterface().killTask(id, "t4");
    log("Wait for event job finished");
    // finished jobs list must have only one job
    JobInfo ji4 = schedulerHelper.waitForEventJobFinished(id);
    // task result for t4 must be TaskRestartedException
    JobState j4 = schedulerHelper.getSchedulerInterface().getJobState(id);
    TaskResult tr5 = schedulerHelper.getSchedulerInterface().getTaskResult(id, "t4");
    // check result j1
    assertEquals(2, ti1.getNumberOfExecutionLeft());
    assertTrue(tr1.getException() instanceof TaskPreemptedException);
    // check result j2
    assertEquals(3, ti2.getNumberOfExecutionLeft());
    assertTrue(tr2.getException() instanceof TaskRestartedException);
    // check result j3
    assertEquals(2, ti3.getNumberOfExecutionLeft());
    assertTrue(tr3.getException() instanceof TaskRestartedException);
    // check result tr4
    assertTrue(tr4.getException() instanceof TaskAbortedException);
    // check result j4
    assertEquals(JobStatus.CANCELED, ji4.getStatus());
    TaskStatus t1Status = getTask(j4, "t1").getStatus();
    assertTrue(t1Status.equals(TaskStatus.ABORTED) || t1Status.equals(TaskStatus.NOT_RESTARTED));
    TaskStatus t2Status = getTask(j4, "t2").getStatus();
    assertTrue(t2Status.equals(TaskStatus.ABORTED) || t2Status.equals(TaskStatus.NOT_RESTARTED));
    assertEquals(TaskStatus.FAULTY, getTask(j4, "t3").getStatus());
    assertEquals(TaskStatus.FAULTY, getTask(j4, "t4").getStatus());
    // check result tr5
    // 
    assertTrue(tr5.getException() instanceof Exception);
}
Also used : TaskInfo(org.ow2.proactive.scheduler.common.task.TaskInfo) TaskPreemptedException(org.ow2.proactive.scheduler.common.exception.TaskPreemptedException) JobInfo(org.ow2.proactive.scheduler.common.job.JobInfo) TaskRestartedException(org.ow2.proactive.scheduler.common.exception.TaskRestartedException) TaskResult(org.ow2.proactive.scheduler.common.task.TaskResult) JobState(org.ow2.proactive.scheduler.common.job.JobState) TaskStatus(org.ow2.proactive.scheduler.common.task.TaskStatus) TaskAbortedException(org.ow2.proactive.scheduler.common.exception.TaskAbortedException) JobId(org.ow2.proactive.scheduler.common.job.JobId) TaskAbortedException(org.ow2.proactive.scheduler.common.exception.TaskAbortedException) TaskPreemptedException(org.ow2.proactive.scheduler.common.exception.TaskPreemptedException) TaskRestartedException(org.ow2.proactive.scheduler.common.exception.TaskRestartedException)

Example 3 with TaskRestartedException

use of org.ow2.proactive.scheduler.common.exception.TaskRestartedException in project scheduling by ow2-proactive.

the class LiveJobs method restartTask.

TerminationData restartTask(JobId jobId, String taskName, int restartDelay) throws UnknownJobException, UnknownTaskException {
    JobData jobData = lockJob(jobId);
    if (jobData == null) {
        throw new UnknownJobException(jobId);
    }
    try {
        InternalTask task = jobData.job.getTask(taskName);
        tlogger.info(task.getId(), "restarting task " + task.getId());
        if (!task.getStatus().isTaskAlive()) {
            tlogger.warn(task.getId(), "task isn't alive: " + task.getStatus());
            return emptyResult(task.getId());
        }
        TaskIdWrapper taskIdWrapper = TaskIdWrapper.wrap(task.getId());
        RunningTaskData taskData = runningTasksData.remove(taskIdWrapper);
        if (taskData == null) {
            throw new IllegalStateException("Task " + task.getId() + " is not running.");
        }
        TaskResultImpl taskResult = taskResultCreator.getTaskResult(dbManager, jobData.job, task, new TaskRestartedException("Aborted by user"), new SimpleTaskLogs("", "Aborted by user"));
        TerminationData terminationData = createAndFillTerminationData(taskResult, taskData, jobData.job, TerminationData.TerminationStatus.ABORTED);
        task.decreaseNumberOfExecutionLeft();
        if (task.getNumberOfExecutionLeft() <= 0 && onErrorPolicyInterpreter.requiresCancelJobOnError(task)) {
            endJob(jobData, terminationData, task, taskResult, "An error occurred in your task and the maximum number of executions has been reached. " + "You also ask to cancel the job in such a situation !", JobStatus.CANCELED);
            return terminationData;
        } else if (task.getNumberOfExecutionLeft() > 0) {
            long waitTime = restartDelay * 1000l;
            restartTaskOnError(jobData, task, TaskStatus.WAITING_ON_ERROR, taskResult, waitTime, terminationData);
            return terminationData;
        }
        terminateTask(jobData, task, true, taskResult, terminationData);
        return terminationData;
    } finally {
        jobData.unlock();
    }
}
Also used : SimpleTaskLogs(org.ow2.proactive.scheduler.common.task.SimpleTaskLogs) TaskIdWrapper(org.ow2.proactive.utils.TaskIdWrapper) TaskResultImpl(org.ow2.proactive.scheduler.task.TaskResultImpl) UnknownJobException(org.ow2.proactive.scheduler.common.exception.UnknownJobException) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) TaskRestartedException(org.ow2.proactive.scheduler.common.exception.TaskRestartedException)

Aggregations

TaskRestartedException (org.ow2.proactive.scheduler.common.exception.TaskRestartedException)3 TaskAbortedException (org.ow2.proactive.scheduler.common.exception.TaskAbortedException)2 TaskPreemptedException (org.ow2.proactive.scheduler.common.exception.TaskPreemptedException)2 JobId (org.ow2.proactive.scheduler.common.job.JobId)2 JobInfo (org.ow2.proactive.scheduler.common.job.JobInfo)2 JobState (org.ow2.proactive.scheduler.common.job.JobState)2 TaskInfo (org.ow2.proactive.scheduler.common.task.TaskInfo)2 TaskResult (org.ow2.proactive.scheduler.common.task.TaskResult)2 TaskStatus (org.ow2.proactive.scheduler.common.task.TaskStatus)2 UnknownJobException (org.ow2.proactive.scheduler.common.exception.UnknownJobException)1 SimpleTaskLogs (org.ow2.proactive.scheduler.common.task.SimpleTaskLogs)1 TaskResultImpl (org.ow2.proactive.scheduler.task.TaskResultImpl)1 InternalTask (org.ow2.proactive.scheduler.task.internal.InternalTask)1 TaskIdWrapper (org.ow2.proactive.utils.TaskIdWrapper)1