use of org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper in project scheduling by ow2-proactive.
the class TestRestoreWorkflowJobs method test.
@Test
public void test() throws Exception {
TaskFlowJob jobDef = createJob();
InternalJob job = defaultSubmitJobAndLoadInternal(true, jobDef);
job.start();
InternalTask mainTask = job.getTask("T");
startTask(job, mainTask);
dbManager.jobTaskStarted(job, mainTask, true);
TaskResultImpl result = new TaskResultImpl(mainTask.getId(), "ok", null, 0);
FlowAction action = new FlowAction(FlowActionType.REPLICATE);
action.setDupNumber(2);
ChangedTasksInfo changesInfo = job.terminateTask(false, mainTask.getId(), null, action, result);
dbManager.updateAfterWorkflowTaskFinished(job, changesInfo, result);
SchedulerStateRecoverHelper recoverHelper = new SchedulerStateRecoverHelper(dbManager);
JobStateMatcher expectedJob;
expectedJob = job(job.getId(), JobStatus.STALLED).withFinished(task("T", TaskStatus.FINISHED).checkFinished(), true).withPending(task("T1", TaskStatus.SUBMITTED), true).withPending(task("T1*1", TaskStatus.SUBMITTED), true).withPending(task("T2", TaskStatus.SUBMITTED), true).withPending(task("T3", TaskStatus.SUBMITTED), true).withPending(task("T2*1", TaskStatus.SUBMITTED), true).withPending(task("T3*1", TaskStatus.SUBMITTED), true).withPending(task("T4", TaskStatus.SUBMITTED), true).withEligible("T1", "T1*1");
checkRecoveredState(recoverHelper.recover(-1), state().withRunning(expectedJob));
}
use of org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper in project scheduling by ow2-proactive.
the class SchedulerFrontend method initActivity.
/**
* @see org.objectweb.proactive.InitActive#initActivity(org.objectweb.proactive.Body)
*/
@Override
public void initActivity(Body body) {
try {
// setting up the policy
logger.debug("Setting up scheduler security policy");
ClientsPolicy.init();
// creating the scheduler authentication interface.
// if this fails then it will not continue.
logger.debug("Creating scheduler authentication interface...");
authentication = PAActiveObject.newActive(SchedulerAuthentication.class, new Object[] { PAActiveObject.getStubOnThis() });
// creating scheduler core
DataSpaceServiceStarter dsServiceStarter = DataSpaceServiceStarter.getDataSpaceServiceStarter();
dsServiceStarter.startNamingService();
ExecutorService clientThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_CLIENT_POOL_NBTHREAD.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("ClientRequestsThreadPool", false, 3));
ExecutorService internalThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_INTERNAL_POOL_NBTHREAD.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("InternalOperationsThreadPool", false, 7));
ExecutorService taskPingerThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_TASK_PINGER_POOL_NBTHREAD.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("TaskPingerThreadPool", false, 2));
ScheduledExecutorService scheduledThreadPool = new ScheduledThreadPoolExecutor(PASchedulerProperties.SCHEDULER_SCHEDULED_POOL_NBTHREAD.getValueAsInt(), new NamedThreadFactory("SchedulingServiceTimerThread", false, 2));
// at this point we must wait the resource manager
RMConnection.waitAndJoin(rmURL.toString());
RMProxiesManager rmProxiesManager = RMProxiesManager.createRMProxiesManager(rmURL);
RMProxy rmProxy = rmProxiesManager.getRmProxy();
long loadJobPeriod = -1;
if (PASchedulerProperties.SCHEDULER_DB_LOAD_JOB_PERIOD.isSet()) {
String periodStr = PASchedulerProperties.SCHEDULER_DB_LOAD_JOB_PERIOD.getValueAsString();
if (periodStr != null && !periodStr.isEmpty()) {
try {
loadJobPeriod = Tools.parsePeriod(periodStr);
} catch (IllegalArgumentException e) {
logger.warn("Invalid load job period string: " + periodStr + ", this setting is ignored", e);
}
}
}
logger.debug("Booting jmx...");
this.jmxHelper.boot(authentication);
publicStore = startSynchronizationService();
RecoveredSchedulerState recoveredState = new SchedulerStateRecoverHelper(dbManager).recover(loadJobPeriod, rmProxy, initialStatus);
this.frontendState = new SchedulerFrontendState(recoveredState.getSchedulerState(), jmxHelper, dbManager);
SchedulingInfrastructure infrastructure = new SchedulingInfrastructureImpl(dbManager, rmProxiesManager, dsServiceStarter, clientThreadPool, internalThreadPool, taskPingerThreadPool, scheduledThreadPool);
this.spacesSupport = infrastructure.getSpacesSupport();
ServerJobAndTaskLogs.getInstance().setSpacesSupport(this.spacesSupport);
this.corePublicKey = Credentials.getPublicKey(PASchedulerProperties.getAbsolutePath(PASchedulerProperties.SCHEDULER_AUTH_PUBKEY_PATH.getValueAsString()));
this.schedulingService = new SchedulingService(infrastructure, frontendState, recoveredState, policyFullName, null, publicStore);
recoveredState.enableLiveLogsForRunningTasks(schedulingService);
releaseBusyNodesWithNoRunningTask(rmProxy, recoveredState);
logger.debug("Registering scheduler...");
PAActiveObject.registerByName(authentication, SchedulerConstants.SCHEDULER_DEFAULT_NAME);
authentication.setActivated(true);
Tools.logAvailableScriptEngines(logger);
if (PASchedulerProperties.SCHEDULER_MEM_MONITORING_FREQ.isSet()) {
logger.debug("Starting the memory monitoring process...");
metricsMonitorScheduler = new it.sauronsoftware.cron4j.Scheduler();
String cronExpr = PASchedulerProperties.SCHEDULER_MEM_MONITORING_FREQ.getValueAsString();
metricsMonitorScheduler.schedule(cronExpr, new TableSizeMonitorRunner(dbManager.getTransactionHelper()));
metricsMonitorScheduler.schedule(cronExpr, new JobsMemoryMonitorRunner(dbManager.getSessionFactory().getStatistics(), recoveredState.getSchedulerState()));
metricsMonitorScheduler.start();
}
} catch (Exception e) {
logger.fatal("Failed to start Scheduler", e);
e.printStackTrace();
System.exit(1);
}
}
use of org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper in project scheduling by ow2-proactive.
the class TestLoadSchedulerClientState method testStateAfterJobEnd.
@Test
public void testStateAfterJobEnd() throws Exception {
TaskFlowJob jobDef = new TaskFlowJob();
jobDef.addTask(createDefaultTask("task1"));
InternalJob job = defaultSubmitJobAndLoadInternal(false, jobDef);
dbManager.removeJob(job.getId(), System.currentTimeMillis(), true);
jobDef = new TaskFlowJob();
jobDef.addTask(createDefaultTask("task1"));
jobDef.addTask(createDefaultTask("task2"));
job = defaultSubmitJobAndLoadInternal(true, jobDef);
InternalTask task1 = job.getTask("task1");
InternalTask task2 = job.getTask("task2");
job.start();
startTask(job, task1);
dbManager.jobTaskStarted(job, task1, true);
startTask(job, task2);
dbManager.jobTaskStarted(job, task2, false);
// task 2 finished with error, stop job
Set<TaskId> ids = job.failed(task2.getId(), JobStatus.CANCELED);
TaskResultImpl res = new TaskResultImpl(null, new TestException("message", "data"), null, 0);
dbManager.updateAfterJobFailed(job, task2, res, ids);
SchedulerStateRecoverHelper stateRecoverHelper = new SchedulerStateRecoverHelper(dbManager);
JobStateMatcher expectedJob = job(job.getId(), JobStatus.CANCELED).withFinished(task("task1", TaskStatus.ABORTED).checkFinished(), false).withFinished(task("task2", TaskStatus.FAULTY).checkFinished()).checkFinished();
checkRecoveredState(stateRecoverHelper.recover(-1), state().withFinished(expectedJob));
}
use of org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper in project scheduling by ow2-proactive.
the class TestLoadSchedulerClientState method testStateAfterTaskFinished.
@Test
public void testStateAfterTaskFinished() throws Exception {
TaskFlowJob jobDef = new TaskFlowJob();
JavaTask taskDef1 = createDefaultTask("task1");
JavaTask taskDef2 = createDefaultTask("task2");
taskDef2.addDependence(taskDef1);
jobDef.addTask(taskDef1);
jobDef.addTask(taskDef2);
InternalJob job = defaultSubmitJobAndLoadInternal(true, jobDef);
InternalTask task1 = job.getTask("task1");
job.start();
startTask(job, task1);
dbManager.jobTaskStarted(job, task1, true);
TaskResultImpl result = new TaskResultImpl(null, new TestResult(1, "res1"), null, 1000);
terminateTask(job, task1, result);
dbManager.updateAfterTaskFinished(job, task1, result);
SchedulerStateRecoverHelper stateRecoverHelper = new SchedulerStateRecoverHelper(dbManager);
RecoveredSchedulerState recovered;
recovered = stateRecoverHelper.recover(-1);
JobStateMatcher expectedJob;
expectedJob = job(job.getId(), JobStatus.STALLED).withFinished(task("task1", TaskStatus.FINISHED).checkFinished()).withPending(task("task2", TaskStatus.SUBMITTED), true).withEligible("task2");
checkRecoveredState(recovered, state().withRunning(expectedJob));
job = recovered.getRunningJobs().get(0);
InternalTask task2 = job.getTask("task2");
startTask(job, task2);
dbManager.jobTaskStarted(job, task2, false);
expectedJob = job(job.getId(), JobStatus.STALLED).withFinished(task("task1", TaskStatus.FINISHED).checkFinished()).withPending(task("task2", TaskStatus.SUBMITTED), true).withEligible("task2");
recovered = stateRecoverHelper.recover(-1);
checkRecoveredState(recovered, state().withRunning(expectedJob));
job = recovered.getRunningJobs().get(0);
task2 = job.getTask("task2");
startTask(job, task2);
dbManager.jobTaskStarted(job, task2, false);
terminateTask(job, task2, result);
dbManager.updateAfterTaskFinished(job, task2, result);
expectedJob = job(job.getId(), JobStatus.FINISHED).withFinished(task("task1", TaskStatus.FINISHED).checkFinished()).withFinished(task("task2", TaskStatus.FINISHED).checkFinished());
recovered = stateRecoverHelper.recover(-1);
checkRecoveredState(recovered, state().withFinished(expectedJob));
}
use of org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper in project scheduling by ow2-proactive.
the class TestLoadSchedulerClientState method testClientStateLoading.
@Test
public void testClientStateLoading() throws Exception {
TaskFlowJob job1 = new TaskFlowJob();
job1.setName(this.getClass().getSimpleName());
job1.setDescription("desc1");
job1.setProjectName("p1");
job1.setInputSpace("is1");
job1.setOutputSpace("os1");
job1.setMaxNumberOfExecution(22);
job1.setOnTaskError(OnTaskError.CONTINUE_JOB_EXECUTION);
JavaTask task1 = createDefaultTask("task1");
task1.setDescription("d1");
task1.setOnTaskError(OnTaskError.CANCEL_JOB);
task1.setMaxNumberOfExecution(4);
task1.setPreciousLogs(true);
task1.setPreciousResult(true);
task1.setRunAsMe(true);
task1.setWallTime(440000);
JavaTask task2 = createDefaultTask("task2");
task2.setDescription("d2");
// If it is set to none, the job level behavior will overwrite the task level none behavior.
task2.setOnTaskError(OnTaskError.NONE);
task2.setMaxNumberOfExecution(3);
task2.setPreciousLogs(false);
task2.setPreciousResult(false);
task2.setRunAsMe(false);
task2.setWallTime(240000);
JavaTask task3 = createDefaultTask("task3");
task1.addDependence(task2);
task1.addDependence(task3);
task2.addDependence(task3);
job1.addTask(task1);
job1.addTask(task2);
job1.addTask(task3);
job1.setPriority(JobPriority.LOW);
Map<String, String> genericInfo = new HashMap<>();
genericInfo.put("p1", "v1");
genericInfo.put("p2", "v2");
job1.setGenericInformation(genericInfo);
InternalJob jobData1 = defaultSubmitJob(job1);
TaskFlowJob job2 = new TaskFlowJob();
job2.setName(this.getClass().getSimpleName() + "_2");
job2.setGenericInformation(new HashMap<String, String>());
job2.addTask(createDefaultTask("task1"));
job2.setPriority(JobPriority.HIGH);
InternalJob jobData2 = defaultSubmitJob(job2);
System.out.println("Load scheduler client state");
SchedulerStateRecoverHelper stateRecoverHelper = new SchedulerStateRecoverHelper(dbManager);
SchedulerState state = stateRecoverHelper.recover(-1).getSchedulerState();
Assert.assertEquals("Unexpected jobs number", 2, state.getPendingJobs().size());
JobState jobState;
jobState = checkJobData(state.getPendingJobs(), jobData1.getId(), job1, 3);
checkTaskData(task1, findTask(jobState, "task1"), "task2", "task3");
checkTaskData(task2, findTask(jobState, "task2"), "task3");
checkTaskData(task3, findTask(jobState, "task3"));
checkJobData(state.getPendingJobs(), jobData2.getId(), job2, 1);
}
Aggregations