Search in sources :

Example 11 with RecoveredSchedulerState

use of org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState in project scheduling by ow2-proactive.

the class SchedulerTasksStateRecoverIntegrationTest method testRecoverAfterRestart.

@Test
public void testRecoverAfterRestart() throws Exception {
    TaskFlowJob jobDef = new TaskFlowJob();
    jobDef.addTask(createDefaultTask("task1"));
    InternalJob job = defaultSubmitJobAndLoadInternal(true, jobDef);
    InternalTask task = job.getTask("task1");
    job.start();
    startTask(job, task);
    dbManager.jobTaskStarted(job, task, true);
    SchedulerStateRecoverHelper recoverHelper = new SchedulerStateRecoverHelper(dbManager);
    JobStateMatcher expectedJob;
    expectedJob = job(job.getId(), JobStatus.STALLED).withPending(task("task1", TaskStatus.PENDING), true).withEligible("task1");
    RecoveredSchedulerState state;
    state = checkRecoveredState(recoverHelper.recover(-1), state().withRunning(expectedJob));
    job = state.getRunningJobs().get(0);
    task = job.getTask("task1");
    startTask(job, task);
    dbManager.jobTaskStarted(job, task, true);
    job.newWaitingTask();
    job.reStartTask(task);
    dbManager.taskRestarted(job, task, null);
    state = checkRecoveredState(recoverHelper.recover(-1), state().withRunning(expectedJob));
    // check it is possible to load ExecutableContainer for restored task
    job = state.getRunningJobs().get(0);
    ExecutableContainer container = dbManager.loadExecutableContainer(job.getTask("task1"));
    Assert.assertNotNull(container);
}
Also used : RecoveredSchedulerState(org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState) InternalJob(org.ow2.proactive.scheduler.job.InternalJob) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) ExecutableContainer(org.ow2.proactive.scheduler.task.containers.ExecutableContainer) TaskFlowJob(org.ow2.proactive.scheduler.common.job.TaskFlowJob) SchedulerStateRecoverHelper(org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper) Test(org.junit.Test)

Example 12 with RecoveredSchedulerState

use of org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState in project scheduling by ow2-proactive.

the class TestRestoreWorkflowJobs2 method test.

@Test
public void test() throws Exception {
    TaskFlowJob jobDef = createJob();
    InternalJob job = defaultSubmitJobAndLoadInternal(true, jobDef);
    job.start();
    InternalTask mainTask = job.getTask("A");
    startTask(job, mainTask);
    dbManager.jobTaskStarted(job, mainTask, true);
    TaskResultImpl result = new TaskResultImpl(mainTask.getId(), "ok", null, 0);
    FlowAction action = new FlowAction(FlowActionType.IF);
    action.setDupNumber(1);
    action.setTarget("B");
    action.setTargetElse("C");
    ChangedTasksInfo changesInfo = job.terminateTask(false, mainTask.getId(), null, action, result);
    dbManager.updateAfterWorkflowTaskFinished(job, changesInfo, result);
    SchedulerStateRecoverHelper recoverHelper = new SchedulerStateRecoverHelper(dbManager);
    RecoveredSchedulerState state = recoverHelper.recover(-1);
    job = state.getRunningJobs().get(0);
    System.out.println("OK");
}
Also used : RecoveredSchedulerState(org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState) InternalJob(org.ow2.proactive.scheduler.job.InternalJob) ChangedTasksInfo(org.ow2.proactive.scheduler.job.ChangedTasksInfo) TaskResultImpl(org.ow2.proactive.scheduler.task.TaskResultImpl) FlowAction(org.ow2.proactive.scheduler.common.task.flow.FlowAction) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) TaskFlowJob(org.ow2.proactive.scheduler.common.job.TaskFlowJob) SchedulerStateRecoverHelper(org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper) Test(org.junit.Test)

Example 13 with RecoveredSchedulerState

use of org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState in project scheduling by ow2-proactive.

the class SchedulingService method recover.

private void recover(RecoveredSchedulerState recoveredState) {
    List<InternalJob> finishedJobs = recoveredState.getFinishedJobs();
    List<InternalJob> pendingJobs = recoveredState.getPendingJobs();
    List<InternalJob> runningJobs = recoveredState.getRunningJobs();
    jobsRecovered(pendingJobs);
    jobsRecovered(runningJobs);
    recoverTasksState(finishedJobs, false);
    recoverTasksState(runningJobs, true);
    // this log is important for performance tests
    logger.info(SCHEDULING_SERVICE_RECOVER_TASKS_STATE_FINISHED);
    recoverTasksState(pendingJobs, true);
    if (SCHEDULER_REMOVED_JOB_DELAY > 0 || SCHEDULER_AUTO_REMOVED_JOB_DELAY > 0) {
        logger.debug("Removing non-managed jobs");
        Iterator<InternalJob> iterJob = recoveredState.getFinishedJobs().iterator();
        while (iterJob.hasNext()) {
            final InternalJob job = iterJob.next();
            // re-set job removed delay (if job result has been sent to user)
            long toWait = 0;
            if (job.isToBeRemoved()) {
                toWait = SCHEDULER_REMOVED_JOB_DELAY * SCHEDULER_AUTO_REMOVED_JOB_DELAY == 0 ? SCHEDULER_REMOVED_JOB_DELAY + SCHEDULER_AUTO_REMOVED_JOB_DELAY : Math.min(SCHEDULER_REMOVED_JOB_DELAY, SCHEDULER_AUTO_REMOVED_JOB_DELAY);
            } else {
                toWait = SCHEDULER_AUTO_REMOVED_JOB_DELAY;
            }
            if (toWait > 0) {
                scheduleJobRemove(job.getId(), System.currentTimeMillis() + toWait);
                jlogger.debug(job.getId(), "will be removed in " + (SCHEDULER_REMOVED_JOB_DELAY / 1000) + "sec");
            }
        }
    }
}
Also used : InternalJob(org.ow2.proactive.scheduler.job.InternalJob)

Example 14 with RecoveredSchedulerState

use of org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState in project scheduling by ow2-proactive.

the class SchedulerStateRecoverHelper method recover.

public RecoveredSchedulerState recover(long loadJobPeriod, RMProxy rmProxy) {
    List<InternalJob> notFinishedJobs = dbManager.loadNotFinishedJobs(true);
    Vector<InternalJob> pendingJobs = new Vector<>();
    Vector<InternalJob> runningJobs = new Vector<>();
    ExecutorService recoverRunningTasksThreadPool = Executors.newFixedThreadPool(PASchedulerProperties.SCHEDULER_PARALLEL_SCHEDULER_STATE_RECOVER_NBTHREAD.getValueAsInt());
    for (InternalJob job : notFinishedJobs) {
        recoverJob(rmProxy, pendingJobs, runningJobs, job, recoverRunningTasksThreadPool);
    }
    recoverRunningTasksThreadPool.shutdown();
    boolean terminatedWithoutTimeout;
    try {
        terminatedWithoutTimeout = recoverRunningTasksThreadPool.awaitTermination(PASchedulerProperties.SCHEDULER_PARALLEL_SCHEDULER_STATE_RECOVER_TIMEOUT.getValueAsInt(), TimeUnit.MINUTES);
    } catch (InterruptedException e) {
        logger.error("Interrupted while waiting for the Scheduler state to be recovered", e);
        Thread.currentThread().interrupt();
        throw new SchedulerStateNotRecoveredException(e);
    }
    failIfSchedulerStateRecoveryTimeout(terminatedWithoutTimeout);
    applyJobUpdates(notFinishedJobs);
    Vector<InternalJob> finishedJobs = new Vector<>();
    for (Iterator<InternalJob> iterator = runningJobs.iterator(); iterator.hasNext(); ) {
        InternalJob job = iterator.next();
        try {
            List<InternalTask> tasksList = copyAndSort(job.getITasks());
            // simulate the running execution to recreate the tree.
            for (InternalTask task : tasksList) {
                job.recoverTask(task.getId());
            }
            if (job.getStatus() == JobStatus.PAUSED) {
                job.setStatus(JobStatus.STALLED);
                job.setPaused();
                // update the count of pending and running task.
                job.setNumberOfPendingTasks(job.getNumberOfPendingTasks() + job.getNumberOfRunningTasks());
                job.setNumberOfRunningTasks(0);
            }
        } catch (Exception e) {
            logger.error("Failed to recover job " + job.getId() + " " + job.getName() + " job might be in a inconsistent state", e);
            jobLogger.error(job.getId(), "Failed to recover job, job might be in an inconsistent state", e);
            // partially cancel job (not tasks) and move it to finished jobs to avoid running it
            iterator.remove();
            job.setStatus(JobStatus.CANCELED);
            finishedJobs.add(job);
            dbManager.updateJobAndTasksState(job);
        }
    }
    finishedJobs.addAll(dbManager.loadFinishedJobs(false, loadJobPeriod));
    logger.info("[Recovering counters] " + " Pending: " + pendingJobs.size() + " Running: " + runningJobs.size() + " Finished: " + finishedJobs.size());
    return new RecoveredSchedulerState(pendingJobs, runningJobs, finishedJobs);
}
Also used : InternalJob(org.ow2.proactive.scheduler.job.InternalJob) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) ExecutorService(java.util.concurrent.ExecutorService) Vector(java.util.Vector)

Example 15 with RecoveredSchedulerState

use of org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState in project scheduling by ow2-proactive.

the class SchedulerStateRecoverHelperTest method testRecoverWithKilledJobOnly.

@Test
public void testRecoverWithKilledJobOnly() throws Exception {
    InternalJob job = createJob(JobStatus.KILLED);
    changeTasksState(job, TaskStatus.FINISHED);
    ImmutableMap<String, TaskStatus> tasksStatus = ImmutableMap.of("Ta", TaskStatus.FINISHED, "Tb", TaskStatus.ABORTED, "Tc", TaskStatus.PENDING);
    changeTasksState(job, tasksStatus);
    RecoveredSchedulerState recoveredState = new Scenario(job).execute();
    assertThat(recoveredState.getFinishedJobs().get(0).getStatus()).isEqualTo(JobStatus.KILLED);
    assertTasksStatus(recoveredState.getFinishedJobs(), tasksStatus);
}
Also used : RecoveredSchedulerState(org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState) InternalJob(org.ow2.proactive.scheduler.job.InternalJob) TaskStatus(org.ow2.proactive.scheduler.common.task.TaskStatus) Test(org.junit.Test)

Aggregations

RecoveredSchedulerState (org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState)20 InternalJob (org.ow2.proactive.scheduler.job.InternalJob)20 Test (org.junit.Test)19 SchedulerStateRecoverHelper (org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper)7 TaskStatus (org.ow2.proactive.scheduler.common.task.TaskStatus)6 InternalTask (org.ow2.proactive.scheduler.task.internal.InternalTask)6 TaskFlowJob (org.ow2.proactive.scheduler.common.job.TaskFlowJob)5 ExecutorService (java.util.concurrent.ExecutorService)2 TaskDescriptor (org.ow2.proactive.scheduler.common.TaskDescriptor)2 JavaTask (org.ow2.proactive.scheduler.common.task.JavaTask)2 TaskResultImpl (org.ow2.proactive.scheduler.task.TaskResultImpl)2 ImmutableList (com.google.common.collect.ImmutableList)1 KeyException (java.security.KeyException)1 Vector (java.util.Vector)1 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1 ScheduledThreadPoolExecutor (java.util.concurrent.ScheduledThreadPoolExecutor)1 PAActiveObject (org.objectweb.proactive.api.PAActiveObject)1 ActiveObject (org.objectweb.proactive.extensions.annotation.ActiveObject)1 NamedThreadFactory (org.objectweb.proactive.utils.NamedThreadFactory)1 DatabaseManagerException (org.ow2.proactive.db.DatabaseManagerException)1