use of org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState in project scheduling by ow2-proactive.
the class SchedulerTasksStateRecoverIntegrationTest method testRecoverAfterRestart.
@Test
public void testRecoverAfterRestart() throws Exception {
TaskFlowJob jobDef = new TaskFlowJob();
jobDef.addTask(createDefaultTask("task1"));
InternalJob job = defaultSubmitJobAndLoadInternal(true, jobDef);
InternalTask task = job.getTask("task1");
job.start();
startTask(job, task);
dbManager.jobTaskStarted(job, task, true);
SchedulerStateRecoverHelper recoverHelper = new SchedulerStateRecoverHelper(dbManager);
JobStateMatcher expectedJob;
expectedJob = job(job.getId(), JobStatus.STALLED).withPending(task("task1", TaskStatus.PENDING), true).withEligible("task1");
RecoveredSchedulerState state;
state = checkRecoveredState(recoverHelper.recover(-1), state().withRunning(expectedJob));
job = state.getRunningJobs().get(0);
task = job.getTask("task1");
startTask(job, task);
dbManager.jobTaskStarted(job, task, true);
job.newWaitingTask();
job.reStartTask(task);
dbManager.taskRestarted(job, task, null);
state = checkRecoveredState(recoverHelper.recover(-1), state().withRunning(expectedJob));
// check it is possible to load ExecutableContainer for restored task
job = state.getRunningJobs().get(0);
ExecutableContainer container = dbManager.loadExecutableContainer(job.getTask("task1"));
Assert.assertNotNull(container);
}
use of org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState in project scheduling by ow2-proactive.
the class TestRestoreWorkflowJobs2 method test.
@Test
public void test() throws Exception {
TaskFlowJob jobDef = createJob();
InternalJob job = defaultSubmitJobAndLoadInternal(true, jobDef);
job.start();
InternalTask mainTask = job.getTask("A");
startTask(job, mainTask);
dbManager.jobTaskStarted(job, mainTask, true);
TaskResultImpl result = new TaskResultImpl(mainTask.getId(), "ok", null, 0);
FlowAction action = new FlowAction(FlowActionType.IF);
action.setDupNumber(1);
action.setTarget("B");
action.setTargetElse("C");
ChangedTasksInfo changesInfo = job.terminateTask(false, mainTask.getId(), null, action, result);
dbManager.updateAfterWorkflowTaskFinished(job, changesInfo, result);
SchedulerStateRecoverHelper recoverHelper = new SchedulerStateRecoverHelper(dbManager);
RecoveredSchedulerState state = recoverHelper.recover(-1);
job = state.getRunningJobs().get(0);
System.out.println("OK");
}
use of org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState in project scheduling by ow2-proactive.
the class SchedulingService method recover.
private void recover(RecoveredSchedulerState recoveredState) {
List<InternalJob> finishedJobs = recoveredState.getFinishedJobs();
List<InternalJob> pendingJobs = recoveredState.getPendingJobs();
List<InternalJob> runningJobs = recoveredState.getRunningJobs();
jobsRecovered(pendingJobs);
jobsRecovered(runningJobs);
recoverTasksState(finishedJobs, false);
recoverTasksState(runningJobs, true);
// this log is important for performance tests
logger.info(SCHEDULING_SERVICE_RECOVER_TASKS_STATE_FINISHED);
recoverTasksState(pendingJobs, true);
if (SCHEDULER_REMOVED_JOB_DELAY > 0 || SCHEDULER_AUTO_REMOVED_JOB_DELAY > 0) {
logger.debug("Removing non-managed jobs");
Iterator<InternalJob> iterJob = recoveredState.getFinishedJobs().iterator();
while (iterJob.hasNext()) {
final InternalJob job = iterJob.next();
// re-set job removed delay (if job result has been sent to user)
long toWait = 0;
if (job.isToBeRemoved()) {
toWait = SCHEDULER_REMOVED_JOB_DELAY * SCHEDULER_AUTO_REMOVED_JOB_DELAY == 0 ? SCHEDULER_REMOVED_JOB_DELAY + SCHEDULER_AUTO_REMOVED_JOB_DELAY : Math.min(SCHEDULER_REMOVED_JOB_DELAY, SCHEDULER_AUTO_REMOVED_JOB_DELAY);
} else {
toWait = SCHEDULER_AUTO_REMOVED_JOB_DELAY;
}
if (toWait > 0) {
scheduleJobRemove(job.getId(), System.currentTimeMillis() + toWait);
jlogger.debug(job.getId(), "will be removed in " + (SCHEDULER_REMOVED_JOB_DELAY / 1000) + "sec");
}
}
}
}
use of org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState in project scheduling by ow2-proactive.
the class SchedulerStateRecoverHelper method recover.
public RecoveredSchedulerState recover(long loadJobPeriod, RMProxy rmProxy) {
List<InternalJob> notFinishedJobs = dbManager.loadNotFinishedJobs(true);
Vector<InternalJob> pendingJobs = new Vector<>();
Vector<InternalJob> runningJobs = new Vector<>();
ExecutorService recoverRunningTasksThreadPool = Executors.newFixedThreadPool(PASchedulerProperties.SCHEDULER_PARALLEL_SCHEDULER_STATE_RECOVER_NBTHREAD.getValueAsInt());
for (InternalJob job : notFinishedJobs) {
recoverJob(rmProxy, pendingJobs, runningJobs, job, recoverRunningTasksThreadPool);
}
recoverRunningTasksThreadPool.shutdown();
boolean terminatedWithoutTimeout;
try {
terminatedWithoutTimeout = recoverRunningTasksThreadPool.awaitTermination(PASchedulerProperties.SCHEDULER_PARALLEL_SCHEDULER_STATE_RECOVER_TIMEOUT.getValueAsInt(), TimeUnit.MINUTES);
} catch (InterruptedException e) {
logger.error("Interrupted while waiting for the Scheduler state to be recovered", e);
Thread.currentThread().interrupt();
throw new SchedulerStateNotRecoveredException(e);
}
failIfSchedulerStateRecoveryTimeout(terminatedWithoutTimeout);
applyJobUpdates(notFinishedJobs);
Vector<InternalJob> finishedJobs = new Vector<>();
for (Iterator<InternalJob> iterator = runningJobs.iterator(); iterator.hasNext(); ) {
InternalJob job = iterator.next();
try {
List<InternalTask> tasksList = copyAndSort(job.getITasks());
// simulate the running execution to recreate the tree.
for (InternalTask task : tasksList) {
job.recoverTask(task.getId());
}
if (job.getStatus() == JobStatus.PAUSED) {
job.setStatus(JobStatus.STALLED);
job.setPaused();
// update the count of pending and running task.
job.setNumberOfPendingTasks(job.getNumberOfPendingTasks() + job.getNumberOfRunningTasks());
job.setNumberOfRunningTasks(0);
}
} catch (Exception e) {
logger.error("Failed to recover job " + job.getId() + " " + job.getName() + " job might be in a inconsistent state", e);
jobLogger.error(job.getId(), "Failed to recover job, job might be in an inconsistent state", e);
// partially cancel job (not tasks) and move it to finished jobs to avoid running it
iterator.remove();
job.setStatus(JobStatus.CANCELED);
finishedJobs.add(job);
dbManager.updateJobAndTasksState(job);
}
}
finishedJobs.addAll(dbManager.loadFinishedJobs(false, loadJobPeriod));
logger.info("[Recovering counters] " + " Pending: " + pendingJobs.size() + " Running: " + runningJobs.size() + " Finished: " + finishedJobs.size());
return new RecoveredSchedulerState(pendingJobs, runningJobs, finishedJobs);
}
use of org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState in project scheduling by ow2-proactive.
the class SchedulerStateRecoverHelperTest method testRecoverWithKilledJobOnly.
@Test
public void testRecoverWithKilledJobOnly() throws Exception {
InternalJob job = createJob(JobStatus.KILLED);
changeTasksState(job, TaskStatus.FINISHED);
ImmutableMap<String, TaskStatus> tasksStatus = ImmutableMap.of("Ta", TaskStatus.FINISHED, "Tb", TaskStatus.ABORTED, "Tc", TaskStatus.PENDING);
changeTasksState(job, tasksStatus);
RecoveredSchedulerState recoveredState = new Scenario(job).execute();
assertThat(recoveredState.getFinishedJobs().get(0).getStatus()).isEqualTo(JobStatus.KILLED);
assertTasksStatus(recoveredState.getFinishedJobs(), tasksStatus);
}
Aggregations