Search in sources :

Example 6 with SchedulerStateRecoverHelper

use of org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper in project scheduling by ow2-proactive.

the class TestRestoreWorkflowJobs method test.

@Test
public void test() throws Exception {
    TaskFlowJob jobDef = createJob();
    InternalJob job = defaultSubmitJobAndLoadInternal(true, jobDef);
    job.start();
    InternalTask mainTask = job.getTask("T");
    startTask(job, mainTask);
    dbManager.jobTaskStarted(job, mainTask, true);
    TaskResultImpl result = new TaskResultImpl(mainTask.getId(), "ok", null, 0);
    FlowAction action = new FlowAction(FlowActionType.REPLICATE);
    action.setDupNumber(2);
    ChangedTasksInfo changesInfo = job.terminateTask(false, mainTask.getId(), null, action, result);
    dbManager.updateAfterWorkflowTaskFinished(job, changesInfo, result);
    SchedulerStateRecoverHelper recoverHelper = new SchedulerStateRecoverHelper(dbManager);
    JobStateMatcher expectedJob;
    expectedJob = job(job.getId(), JobStatus.STALLED).withFinished(task("T", TaskStatus.FINISHED).checkFinished(), true).withPending(task("T1", TaskStatus.SUBMITTED), true).withPending(task("T1*1", TaskStatus.SUBMITTED), true).withPending(task("T2", TaskStatus.SUBMITTED), true).withPending(task("T3", TaskStatus.SUBMITTED), true).withPending(task("T2*1", TaskStatus.SUBMITTED), true).withPending(task("T3*1", TaskStatus.SUBMITTED), true).withPending(task("T4", TaskStatus.SUBMITTED), true).withEligible("T1", "T1*1");
    checkRecoveredState(recoverHelper.recover(-1), state().withRunning(expectedJob));
}
Also used : InternalJob(org.ow2.proactive.scheduler.job.InternalJob) ChangedTasksInfo(org.ow2.proactive.scheduler.job.ChangedTasksInfo) TaskResultImpl(org.ow2.proactive.scheduler.task.TaskResultImpl) FlowAction(org.ow2.proactive.scheduler.common.task.flow.FlowAction) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) TaskFlowJob(org.ow2.proactive.scheduler.common.job.TaskFlowJob) SchedulerStateRecoverHelper(org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper) Test(org.junit.Test)

Example 7 with SchedulerStateRecoverHelper

use of org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper in project scheduling by ow2-proactive.

the class SchedulerFrontend method initActivity.

/**
 * @see org.objectweb.proactive.InitActive#initActivity(org.objectweb.proactive.Body)
 */
@Override
public void initActivity(Body body) {
    try {
        // setting up the policy
        logger.debug("Setting up scheduler security policy");
        ClientsPolicy.init();
        // creating the scheduler authentication interface.
        // if this fails then it will not continue.
        logger.debug("Creating scheduler authentication interface...");
        authentication = PAActiveObject.newActive(SchedulerAuthentication.class, new Object[] { PAActiveObject.getStubOnThis() });
        // creating scheduler core
        DataSpaceServiceStarter dsServiceStarter = DataSpaceServiceStarter.getDataSpaceServiceStarter();
        dsServiceStarter.startNamingService();
        ExecutorService clientThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_CLIENT_POOL_NBTHREAD.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("ClientRequestsThreadPool", false, 3));
        ExecutorService internalThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_INTERNAL_POOL_NBTHREAD.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("InternalOperationsThreadPool", false, 7));
        ExecutorService taskPingerThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_TASK_PINGER_POOL_NBTHREAD.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("TaskPingerThreadPool", false, 2));
        ScheduledExecutorService scheduledThreadPool = new ScheduledThreadPoolExecutor(PASchedulerProperties.SCHEDULER_SCHEDULED_POOL_NBTHREAD.getValueAsInt(), new NamedThreadFactory("SchedulingServiceTimerThread", false, 2));
        // at this point we must wait the resource manager
        RMConnection.waitAndJoin(rmURL.toString());
        RMProxiesManager rmProxiesManager = RMProxiesManager.createRMProxiesManager(rmURL);
        RMProxy rmProxy = rmProxiesManager.getRmProxy();
        long loadJobPeriod = -1;
        if (PASchedulerProperties.SCHEDULER_DB_LOAD_JOB_PERIOD.isSet()) {
            String periodStr = PASchedulerProperties.SCHEDULER_DB_LOAD_JOB_PERIOD.getValueAsString();
            if (periodStr != null && !periodStr.isEmpty()) {
                try {
                    loadJobPeriod = Tools.parsePeriod(periodStr);
                } catch (IllegalArgumentException e) {
                    logger.warn("Invalid load job period string: " + periodStr + ", this setting is ignored", e);
                }
            }
        }
        logger.debug("Booting jmx...");
        this.jmxHelper.boot(authentication);
        publicStore = startSynchronizationService();
        RecoveredSchedulerState recoveredState = new SchedulerStateRecoverHelper(dbManager).recover(loadJobPeriod, rmProxy, initialStatus);
        this.frontendState = new SchedulerFrontendState(recoveredState.getSchedulerState(), jmxHelper, dbManager);
        SchedulingInfrastructure infrastructure = new SchedulingInfrastructureImpl(dbManager, rmProxiesManager, dsServiceStarter, clientThreadPool, internalThreadPool, taskPingerThreadPool, scheduledThreadPool);
        this.spacesSupport = infrastructure.getSpacesSupport();
        ServerJobAndTaskLogs.getInstance().setSpacesSupport(this.spacesSupport);
        this.corePublicKey = Credentials.getPublicKey(PASchedulerProperties.getAbsolutePath(PASchedulerProperties.SCHEDULER_AUTH_PUBKEY_PATH.getValueAsString()));
        this.schedulingService = new SchedulingService(infrastructure, frontendState, recoveredState, policyFullName, null, publicStore);
        recoveredState.enableLiveLogsForRunningTasks(schedulingService);
        releaseBusyNodesWithNoRunningTask(rmProxy, recoveredState);
        logger.debug("Registering scheduler...");
        PAActiveObject.registerByName(authentication, SchedulerConstants.SCHEDULER_DEFAULT_NAME);
        authentication.setActivated(true);
        Tools.logAvailableScriptEngines(logger);
        if (PASchedulerProperties.SCHEDULER_MEM_MONITORING_FREQ.isSet()) {
            logger.debug("Starting the memory monitoring process...");
            metricsMonitorScheduler = new it.sauronsoftware.cron4j.Scheduler();
            String cronExpr = PASchedulerProperties.SCHEDULER_MEM_MONITORING_FREQ.getValueAsString();
            metricsMonitorScheduler.schedule(cronExpr, new TableSizeMonitorRunner(dbManager.getTransactionHelper()));
            metricsMonitorScheduler.schedule(cronExpr, new JobsMemoryMonitorRunner(dbManager.getSessionFactory().getStatistics(), recoveredState.getSchedulerState()));
            metricsMonitorScheduler.start();
        }
    } catch (Exception e) {
        logger.fatal("Failed to start Scheduler", e);
        e.printStackTrace();
        System.exit(1);
    }
}
Also used : ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) NamedThreadFactory(org.objectweb.proactive.utils.NamedThreadFactory) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) TimeUnit(java.util.concurrent.TimeUnit) SchedulerAuthentication(org.ow2.proactive.scheduler.authentication.SchedulerAuthentication) FileSystemException(org.objectweb.proactive.extensions.dataspaces.exceptions.FileSystemException) KeyException(java.security.KeyException) UnknownJobException(org.ow2.proactive.scheduler.common.exception.UnknownJobException) TaskCouldNotRestartException(org.ow2.proactive.scheduler.common.exception.TaskCouldNotRestartException) InvalidChannelException(org.ow2.proactive.scheduler.synchronization.InvalidChannelException) JobCreationException(org.ow2.proactive.scheduler.common.exception.JobCreationException) PermissionException(org.ow2.proactive.scheduler.common.exception.PermissionException) NotConnectedException(org.ow2.proactive.scheduler.common.exception.NotConnectedException) AlreadyConnectedException(org.ow2.proactive.scheduler.common.exception.AlreadyConnectedException) UnknownTaskException(org.ow2.proactive.scheduler.common.exception.UnknownTaskException) TaskCouldNotStartException(org.ow2.proactive.scheduler.common.exception.TaskCouldNotStartException) JobValidationException(org.ow2.proactive.scheduler.common.exception.JobValidationException) JobAlreadyFinishedException(org.ow2.proactive.scheduler.common.exception.JobAlreadyFinishedException) SubmissionClosedException(org.ow2.proactive.scheduler.common.exception.SubmissionClosedException) DatabaseManagerException(org.ow2.proactive.db.DatabaseManagerException) TaskSkippedException(org.ow2.proactive.scheduler.common.exception.TaskSkippedException) ProActiveException(org.objectweb.proactive.core.ProActiveException) SignalApiException(org.ow2.proactive.scheduler.signal.SignalApiException) RecoveredSchedulerState(org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState) RMProxy(org.ow2.proactive.scheduler.core.rmproxies.RMProxy) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutorService(java.util.concurrent.ExecutorService) SchedulerStateRecoverHelper(org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper) TableSizeMonitorRunner(org.ow2.proactive.scheduler.core.helpers.TableSizeMonitorRunner) DataSpacesFileObject(org.objectweb.proactive.extensions.dataspaces.api.DataSpacesFileObject) ActiveObject(org.objectweb.proactive.extensions.annotation.ActiveObject) PAActiveObject(org.objectweb.proactive.api.PAActiveObject) JobsMemoryMonitorRunner(org.ow2.proactive.scheduler.core.helpers.JobsMemoryMonitorRunner) RMProxiesManager(org.ow2.proactive.scheduler.core.rmproxies.RMProxiesManager)

Example 8 with SchedulerStateRecoverHelper

use of org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper in project scheduling by ow2-proactive.

the class TestLoadSchedulerClientState method testStateAfterJobEnd.

@Test
public void testStateAfterJobEnd() throws Exception {
    TaskFlowJob jobDef = new TaskFlowJob();
    jobDef.addTask(createDefaultTask("task1"));
    InternalJob job = defaultSubmitJobAndLoadInternal(false, jobDef);
    dbManager.removeJob(job.getId(), System.currentTimeMillis(), true);
    jobDef = new TaskFlowJob();
    jobDef.addTask(createDefaultTask("task1"));
    jobDef.addTask(createDefaultTask("task2"));
    job = defaultSubmitJobAndLoadInternal(true, jobDef);
    InternalTask task1 = job.getTask("task1");
    InternalTask task2 = job.getTask("task2");
    job.start();
    startTask(job, task1);
    dbManager.jobTaskStarted(job, task1, true);
    startTask(job, task2);
    dbManager.jobTaskStarted(job, task2, false);
    // task 2 finished with error, stop job
    Set<TaskId> ids = job.failed(task2.getId(), JobStatus.CANCELED);
    TaskResultImpl res = new TaskResultImpl(null, new TestException("message", "data"), null, 0);
    dbManager.updateAfterJobFailed(job, task2, res, ids);
    SchedulerStateRecoverHelper stateRecoverHelper = new SchedulerStateRecoverHelper(dbManager);
    JobStateMatcher expectedJob = job(job.getId(), JobStatus.CANCELED).withFinished(task("task1", TaskStatus.ABORTED).checkFinished(), false).withFinished(task("task2", TaskStatus.FAULTY).checkFinished()).checkFinished();
    checkRecoveredState(stateRecoverHelper.recover(-1), state().withFinished(expectedJob));
}
Also used : InternalJob(org.ow2.proactive.scheduler.job.InternalJob) TaskId(org.ow2.proactive.scheduler.common.task.TaskId) TaskResultImpl(org.ow2.proactive.scheduler.task.TaskResultImpl) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) TaskFlowJob(org.ow2.proactive.scheduler.common.job.TaskFlowJob) SchedulerStateRecoverHelper(org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper) Test(org.junit.Test)

Example 9 with SchedulerStateRecoverHelper

use of org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper in project scheduling by ow2-proactive.

the class TestLoadSchedulerClientState method testStateAfterTaskFinished.

@Test
public void testStateAfterTaskFinished() throws Exception {
    TaskFlowJob jobDef = new TaskFlowJob();
    JavaTask taskDef1 = createDefaultTask("task1");
    JavaTask taskDef2 = createDefaultTask("task2");
    taskDef2.addDependence(taskDef1);
    jobDef.addTask(taskDef1);
    jobDef.addTask(taskDef2);
    InternalJob job = defaultSubmitJobAndLoadInternal(true, jobDef);
    InternalTask task1 = job.getTask("task1");
    job.start();
    startTask(job, task1);
    dbManager.jobTaskStarted(job, task1, true);
    TaskResultImpl result = new TaskResultImpl(null, new TestResult(1, "res1"), null, 1000);
    terminateTask(job, task1, result);
    dbManager.updateAfterTaskFinished(job, task1, result);
    SchedulerStateRecoverHelper stateRecoverHelper = new SchedulerStateRecoverHelper(dbManager);
    RecoveredSchedulerState recovered;
    recovered = stateRecoverHelper.recover(-1);
    JobStateMatcher expectedJob;
    expectedJob = job(job.getId(), JobStatus.STALLED).withFinished(task("task1", TaskStatus.FINISHED).checkFinished()).withPending(task("task2", TaskStatus.SUBMITTED), true).withEligible("task2");
    checkRecoveredState(recovered, state().withRunning(expectedJob));
    job = recovered.getRunningJobs().get(0);
    InternalTask task2 = job.getTask("task2");
    startTask(job, task2);
    dbManager.jobTaskStarted(job, task2, false);
    expectedJob = job(job.getId(), JobStatus.STALLED).withFinished(task("task1", TaskStatus.FINISHED).checkFinished()).withPending(task("task2", TaskStatus.SUBMITTED), true).withEligible("task2");
    recovered = stateRecoverHelper.recover(-1);
    checkRecoveredState(recovered, state().withRunning(expectedJob));
    job = recovered.getRunningJobs().get(0);
    task2 = job.getTask("task2");
    startTask(job, task2);
    dbManager.jobTaskStarted(job, task2, false);
    terminateTask(job, task2, result);
    dbManager.updateAfterTaskFinished(job, task2, result);
    expectedJob = job(job.getId(), JobStatus.FINISHED).withFinished(task("task1", TaskStatus.FINISHED).checkFinished()).withFinished(task("task2", TaskStatus.FINISHED).checkFinished());
    recovered = stateRecoverHelper.recover(-1);
    checkRecoveredState(recovered, state().withFinished(expectedJob));
}
Also used : RecoveredSchedulerState(org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState) InternalJob(org.ow2.proactive.scheduler.job.InternalJob) TaskResultImpl(org.ow2.proactive.scheduler.task.TaskResultImpl) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) TaskFlowJob(org.ow2.proactive.scheduler.common.job.TaskFlowJob) SchedulerStateRecoverHelper(org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper) JavaTask(org.ow2.proactive.scheduler.common.task.JavaTask) Test(org.junit.Test)

Example 10 with SchedulerStateRecoverHelper

use of org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper in project scheduling by ow2-proactive.

the class TestLoadSchedulerClientState method testClientStateLoading.

@Test
public void testClientStateLoading() throws Exception {
    TaskFlowJob job1 = new TaskFlowJob();
    job1.setName(this.getClass().getSimpleName());
    job1.setDescription("desc1");
    job1.setProjectName("p1");
    job1.setInputSpace("is1");
    job1.setOutputSpace("os1");
    job1.setMaxNumberOfExecution(22);
    job1.setOnTaskError(OnTaskError.CONTINUE_JOB_EXECUTION);
    JavaTask task1 = createDefaultTask("task1");
    task1.setDescription("d1");
    task1.setOnTaskError(OnTaskError.CANCEL_JOB);
    task1.setMaxNumberOfExecution(4);
    task1.setPreciousLogs(true);
    task1.setPreciousResult(true);
    task1.setRunAsMe(true);
    task1.setWallTime(440000);
    JavaTask task2 = createDefaultTask("task2");
    task2.setDescription("d2");
    // If it is set to none, the job level behavior will overwrite the task level none behavior.
    task2.setOnTaskError(OnTaskError.NONE);
    task2.setMaxNumberOfExecution(3);
    task2.setPreciousLogs(false);
    task2.setPreciousResult(false);
    task2.setRunAsMe(false);
    task2.setWallTime(240000);
    JavaTask task3 = createDefaultTask("task3");
    task1.addDependence(task2);
    task1.addDependence(task3);
    task2.addDependence(task3);
    job1.addTask(task1);
    job1.addTask(task2);
    job1.addTask(task3);
    job1.setPriority(JobPriority.LOW);
    Map<String, String> genericInfo = new HashMap<>();
    genericInfo.put("p1", "v1");
    genericInfo.put("p2", "v2");
    job1.setGenericInformation(genericInfo);
    InternalJob jobData1 = defaultSubmitJob(job1);
    TaskFlowJob job2 = new TaskFlowJob();
    job2.setName(this.getClass().getSimpleName() + "_2");
    job2.setGenericInformation(new HashMap<String, String>());
    job2.addTask(createDefaultTask("task1"));
    job2.setPriority(JobPriority.HIGH);
    InternalJob jobData2 = defaultSubmitJob(job2);
    System.out.println("Load scheduler client state");
    SchedulerStateRecoverHelper stateRecoverHelper = new SchedulerStateRecoverHelper(dbManager);
    SchedulerState state = stateRecoverHelper.recover(-1).getSchedulerState();
    Assert.assertEquals("Unexpected jobs number", 2, state.getPendingJobs().size());
    JobState jobState;
    jobState = checkJobData(state.getPendingJobs(), jobData1.getId(), job1, 3);
    checkTaskData(task1, findTask(jobState, "task1"), "task2", "task3");
    checkTaskData(task2, findTask(jobState, "task2"), "task3");
    checkTaskData(task3, findTask(jobState, "task3"));
    checkJobData(state.getPendingJobs(), jobData2.getId(), job2, 1);
}
Also used : InternalJob(org.ow2.proactive.scheduler.job.InternalJob) SchedulerState(org.ow2.proactive.scheduler.common.SchedulerState) RecoveredSchedulerState(org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState) HashMap(java.util.HashMap) TaskFlowJob(org.ow2.proactive.scheduler.common.job.TaskFlowJob) SchedulerStateRecoverHelper(org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper) JobState(org.ow2.proactive.scheduler.common.job.JobState) JavaTask(org.ow2.proactive.scheduler.common.task.JavaTask) Test(org.junit.Test)

Aggregations

SchedulerStateRecoverHelper (org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper)14 InternalJob (org.ow2.proactive.scheduler.job.InternalJob)12 Test (org.junit.Test)11 TaskFlowJob (org.ow2.proactive.scheduler.common.job.TaskFlowJob)9 RecoveredSchedulerState (org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState)8 InternalTask (org.ow2.proactive.scheduler.task.internal.InternalTask)8 TaskResultImpl (org.ow2.proactive.scheduler.task.TaskResultImpl)5 JavaTask (org.ow2.proactive.scheduler.common.task.JavaTask)3 TaskDescriptor (org.ow2.proactive.scheduler.common.TaskDescriptor)2 JobState (org.ow2.proactive.scheduler.common.job.JobState)2 FlowAction (org.ow2.proactive.scheduler.common.task.flow.FlowAction)2 SchedulerDBManager (org.ow2.proactive.scheduler.core.db.SchedulerDBManager)2 ChangedTasksInfo (org.ow2.proactive.scheduler.job.ChangedTasksInfo)2 ImmutableList (com.google.common.collect.ImmutableList)1 KeyException (java.security.KeyException)1 HashMap (java.util.HashMap)1 ExecutorService (java.util.concurrent.ExecutorService)1 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1 ScheduledThreadPoolExecutor (java.util.concurrent.ScheduledThreadPoolExecutor)1 TimeUnit (java.util.concurrent.TimeUnit)1