Search in sources :

Example 1 with DataSpaceServiceStarter

use of org.ow2.proactive.scheduler.core.DataSpaceServiceStarter in project scheduling by ow2-proactive.

the class SchedulingMethodImpl method createExecution.

/**
 * Create launcher and try to start the task.
 *
 * @param nodeSet the node set containing every available nodes that can be used for execution
 * @param node the node on which to start the task
 * @param job the job that owns the task to be started
 * @param task the task to be started
 * @param taskDescriptor the descriptor of the task to be started
 */
protected boolean createExecution(NodeSet nodeSet, Node node, InternalJob job, InternalTask task, TaskDescriptor taskDescriptor) throws Exception {
    TaskLauncher launcher = null;
    LiveJobs.JobData jobData = null;
    try {
        schedulingMainLoopTimingLogger.start("jobLockAcquisition");
        jobData = schedulingService.lockJob(job.getId());
        schedulingMainLoopTimingLogger.end("jobLockAcquisition");
        // task is not paused
        if (nodeSet.size() >= task.getNumberOfNodesNeeded() && (task.getStatus() != TaskStatus.PAUSED) && (jobData != null)) {
            schedulingMainLoopTimingLogger.start("startDataspaceApp");
            // start dataspace app for this job
            DataSpaceServiceStarter dsStarter = schedulingService.getInfrastructure().getDataSpaceServiceStarter();
            job.startDataSpaceApplication(dsStarter.getNamingService(), ImmutableList.of(task));
            job.setSynchronizationAPI(schedulingService.getSynchronizationAPI());
            schedulingMainLoopTimingLogger.end("startDataspaceApp");
            NodeSet nodes = new NodeSet();
            String sessionid = getRMProxiesManager().getUserRMProxy(job.getOwner(), job.getCredentials()).getSessionid();
            try {
                // create launcher
                schedulingMainLoopTimingLogger.start("createLauncher");
                launcher = task.createLauncher(node, sessionid);
                schedulingMainLoopTimingLogger.end("createLauncher");
                activeObjectCreationRetryTimeNumber = ACTIVEOBJECT_CREATION_RETRY_TIME_NUMBER;
                nodeSet.remove(0);
                // we will need to update this code once topology will be allowed for single-node task
                if (task.isParallel()) {
                    nodes = new NodeSet(nodeSet);
                    task.getExecuterInformation().addNodes(nodes);
                    nodeSet.clear();
                }
                // set nodes in the executable container
                task.getExecutableContainer().setNodes(nodes);
                tlogger.debug(task.getId(), "deploying");
                // above 500 parent tasks, it is worth adjusting.
                if (taskDescriptor.getParents().size() > 500) {
                    dotaskActionTimeout = (int) (taskDescriptor.getParents().size() / 500.0 * PASchedulerProperties.SCHEDULER_STARTTASK_TIMEOUT.getValueAsInt());
                } else {
                    // reset the dotaskActionTimeout to its default value otherwise.
                    dotaskActionTimeout = PASchedulerProperties.SCHEDULER_STARTTASK_TIMEOUT.getValueAsInt();
                }
                schedulingMainLoopTimingLogger.start("areNodesRecoverable");
                boolean taskRecoverable = getRMProxiesManager().getRmProxy().areNodesRecoverable(nodes);
                schedulingMainLoopTimingLogger.end("areNodesRecoverable");
                schedulingMainLoopTimingLogger.start("terminateNotificationNodeURL");
                String terminateNotificationNodeURL = PAActiveObject.getActiveObjectNode(terminateNotification).getNodeInformation().getURL();
                TaskRecoveryData taskRecoveryData = new TaskRecoveryData(terminateNotificationNodeURL, taskRecoverable);
                schedulingMainLoopTimingLogger.end("terminateNotificationNodeURL");
                schedulingMainLoopTimingLogger.start("submitWithTimeout");
                threadPool.submitWithTimeout(new TimedDoTaskAction(job, taskDescriptor, launcher, schedulingService, terminateNotification, corePrivateKey, taskRecoveryData, sessionid), dotaskActionTimeout, TimeUnit.MILLISECONDS);
                schedulingMainLoopTimingLogger.end("submitWithTimeout");
                // we advertise here that the task is started, however
                // this is not entirely true: the only thing we are sure
                // about at this point is that we submitted to the thread
                // pool the action that will call the "doTask" of the task
                // launcher. There is thus a small gap here where the task
                // is seen as started whereas it is not yet started. We
                // cannot easily move the task started notification because
                // 1) it makes the job lock acquisition less predictable
                // (because the TimeDoTaskAction will have to compete with
                // the SchedulingMethodImpl)
                // and more importantly 2) the
                // SchedulingMethodImpl#createExecution may happen to be
                // called a second time for the task that is currently being
                // started by the TimedDoTaskAction.
                schedulingMainLoopTimingLogger.start("finalizeStarting");
                finalizeStarting(job, task, node, launcher);
                schedulingMainLoopTimingLogger.end("finalizeStarting");
                return true;
            } catch (Exception t) {
                try {
                    // if there was a problem, free nodeSet for multi-nodes task
                    nodes.add(node);
                    releaseNodes(job, nodes);
                } catch (Throwable ni) {
                // miam miam
                }
                throw t;
            }
        } else {
            return false;
        }
    } finally {
        if (jobData != null) {
            jobData.unlock();
        }
    }
}
Also used : NodeSet(org.ow2.proactive.utils.NodeSet) TaskLauncher(org.ow2.proactive.scheduler.task.TaskLauncher) ActiveObjectCreationException(org.objectweb.proactive.ActiveObjectCreationException) TopologyDisabledException(org.ow2.proactive.resourcemanager.frontend.topology.TopologyDisabledException) InvalidScriptException(org.ow2.proactive.scripting.InvalidScriptException) RMProxyCreationException(org.ow2.proactive.scheduler.core.rmproxies.RMProxyCreationException) IOException(java.io.IOException) TaskRecoveryData(org.ow2.proactive.scheduler.task.internal.TaskRecoveryData)

Example 2 with DataSpaceServiceStarter

use of org.ow2.proactive.scheduler.core.DataSpaceServiceStarter in project scheduling by ow2-proactive.

the class SchedulerFrontend method initActivity.

/**
 * @see org.objectweb.proactive.InitActive#initActivity(org.objectweb.proactive.Body)
 */
@Override
public void initActivity(Body body) {
    try {
        // setting up the policy
        logger.debug("Setting up scheduler security policy");
        ClientsPolicy.init();
        // creating the scheduler authentication interface.
        // if this fails then it will not continue.
        logger.debug("Creating scheduler authentication interface...");
        authentication = PAActiveObject.newActive(SchedulerAuthentication.class, new Object[] { PAActiveObject.getStubOnThis() });
        // creating scheduler core
        DataSpaceServiceStarter dsServiceStarter = DataSpaceServiceStarter.getDataSpaceServiceStarter();
        dsServiceStarter.startNamingService();
        ExecutorService clientThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_CLIENT_POOL_NBTHREAD.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("ClientRequestsThreadPool", false, 3));
        ExecutorService internalThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_INTERNAL_POOL_NBTHREAD.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("InternalOperationsThreadPool", false, 7));
        ExecutorService taskPingerThreadPool = PAExecutors.newCachedBoundedThreadPool(1, PASchedulerProperties.SCHEDULER_TASK_PINGER_POOL_NBTHREAD.getValueAsInt(), 120L, TimeUnit.SECONDS, new NamedThreadFactory("TaskPingerThreadPool", false, 2));
        ScheduledExecutorService scheduledThreadPool = new ScheduledThreadPoolExecutor(PASchedulerProperties.SCHEDULER_SCHEDULED_POOL_NBTHREAD.getValueAsInt(), new NamedThreadFactory("SchedulingServiceTimerThread", false, 2));
        // at this point we must wait the resource manager
        RMConnection.waitAndJoin(rmURL.toString());
        RMProxiesManager rmProxiesManager = RMProxiesManager.createRMProxiesManager(rmURL);
        RMProxy rmProxy = rmProxiesManager.getRmProxy();
        long loadJobPeriod = -1;
        if (PASchedulerProperties.SCHEDULER_DB_LOAD_JOB_PERIOD.isSet()) {
            String periodStr = PASchedulerProperties.SCHEDULER_DB_LOAD_JOB_PERIOD.getValueAsString();
            if (periodStr != null && !periodStr.isEmpty()) {
                try {
                    loadJobPeriod = Tools.parsePeriod(periodStr);
                } catch (IllegalArgumentException e) {
                    logger.warn("Invalid load job period string: " + periodStr + ", this setting is ignored", e);
                }
            }
        }
        logger.debug("Booting jmx...");
        this.jmxHelper.boot(authentication);
        publicStore = startSynchronizationService();
        RecoveredSchedulerState recoveredState = new SchedulerStateRecoverHelper(dbManager).recover(loadJobPeriod, rmProxy, initialStatus);
        this.frontendState = new SchedulerFrontendState(recoveredState.getSchedulerState(), jmxHelper, dbManager);
        SchedulingInfrastructure infrastructure = new SchedulingInfrastructureImpl(dbManager, rmProxiesManager, dsServiceStarter, clientThreadPool, internalThreadPool, taskPingerThreadPool, scheduledThreadPool);
        this.spacesSupport = infrastructure.getSpacesSupport();
        ServerJobAndTaskLogs.getInstance().setSpacesSupport(this.spacesSupport);
        this.corePublicKey = Credentials.getPublicKey(PASchedulerProperties.getAbsolutePath(PASchedulerProperties.SCHEDULER_AUTH_PUBKEY_PATH.getValueAsString()));
        this.schedulingService = new SchedulingService(infrastructure, frontendState, recoveredState, policyFullName, null, publicStore);
        recoveredState.enableLiveLogsForRunningTasks(schedulingService);
        releaseBusyNodesWithNoRunningTask(rmProxy, recoveredState);
        logger.debug("Registering scheduler...");
        PAActiveObject.registerByName(authentication, SchedulerConstants.SCHEDULER_DEFAULT_NAME);
        authentication.setActivated(true);
        Tools.logAvailableScriptEngines(logger);
        if (PASchedulerProperties.SCHEDULER_MEM_MONITORING_FREQ.isSet()) {
            logger.debug("Starting the memory monitoring process...");
            metricsMonitorScheduler = new it.sauronsoftware.cron4j.Scheduler();
            String cronExpr = PASchedulerProperties.SCHEDULER_MEM_MONITORING_FREQ.getValueAsString();
            metricsMonitorScheduler.schedule(cronExpr, new TableSizeMonitorRunner(dbManager.getTransactionHelper()));
            metricsMonitorScheduler.schedule(cronExpr, new JobsMemoryMonitorRunner(dbManager.getSessionFactory().getStatistics(), recoveredState.getSchedulerState()));
            metricsMonitorScheduler.start();
        }
    } catch (Exception e) {
        logger.fatal("Failed to start Scheduler", e);
        e.printStackTrace();
        System.exit(1);
    }
}
Also used : ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) NamedThreadFactory(org.objectweb.proactive.utils.NamedThreadFactory) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) TimeUnit(java.util.concurrent.TimeUnit) SchedulerAuthentication(org.ow2.proactive.scheduler.authentication.SchedulerAuthentication) FileSystemException(org.objectweb.proactive.extensions.dataspaces.exceptions.FileSystemException) KeyException(java.security.KeyException) UnknownJobException(org.ow2.proactive.scheduler.common.exception.UnknownJobException) TaskCouldNotRestartException(org.ow2.proactive.scheduler.common.exception.TaskCouldNotRestartException) InvalidChannelException(org.ow2.proactive.scheduler.synchronization.InvalidChannelException) JobCreationException(org.ow2.proactive.scheduler.common.exception.JobCreationException) PermissionException(org.ow2.proactive.scheduler.common.exception.PermissionException) NotConnectedException(org.ow2.proactive.scheduler.common.exception.NotConnectedException) AlreadyConnectedException(org.ow2.proactive.scheduler.common.exception.AlreadyConnectedException) UnknownTaskException(org.ow2.proactive.scheduler.common.exception.UnknownTaskException) TaskCouldNotStartException(org.ow2.proactive.scheduler.common.exception.TaskCouldNotStartException) JobValidationException(org.ow2.proactive.scheduler.common.exception.JobValidationException) JobAlreadyFinishedException(org.ow2.proactive.scheduler.common.exception.JobAlreadyFinishedException) SubmissionClosedException(org.ow2.proactive.scheduler.common.exception.SubmissionClosedException) DatabaseManagerException(org.ow2.proactive.db.DatabaseManagerException) TaskSkippedException(org.ow2.proactive.scheduler.common.exception.TaskSkippedException) ProActiveException(org.objectweb.proactive.core.ProActiveException) SignalApiException(org.ow2.proactive.scheduler.signal.SignalApiException) RecoveredSchedulerState(org.ow2.proactive.scheduler.core.db.RecoveredSchedulerState) RMProxy(org.ow2.proactive.scheduler.core.rmproxies.RMProxy) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutorService(java.util.concurrent.ExecutorService) SchedulerStateRecoverHelper(org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper) TableSizeMonitorRunner(org.ow2.proactive.scheduler.core.helpers.TableSizeMonitorRunner) DataSpacesFileObject(org.objectweb.proactive.extensions.dataspaces.api.DataSpacesFileObject) ActiveObject(org.objectweb.proactive.extensions.annotation.ActiveObject) PAActiveObject(org.objectweb.proactive.api.PAActiveObject) JobsMemoryMonitorRunner(org.ow2.proactive.scheduler.core.helpers.JobsMemoryMonitorRunner) RMProxiesManager(org.ow2.proactive.scheduler.core.rmproxies.RMProxiesManager)

Example 3 with DataSpaceServiceStarter

use of org.ow2.proactive.scheduler.core.DataSpaceServiceStarter in project scheduling by ow2-proactive.

the class TestDataSpaceConfiguration method runStarter.

public Boolean runStarter() throws Exception {
    File spFile = new File(IOSPACE);
    File spFileWithUserDir = new File(IOSPACE, username);
    spFile.mkdirs();
    spFileWithUserDir.mkdirs();
    spFileWithUserDir.deleteOnExit();
    spFile.deleteOnExit();
    filesServerIn = new FileSystemServerDeployer("space", IOSPACE, true, true);
    String[] spaceurls = filesServerIn.getVFSRootURLs();
    String[] userdirUrls = DataSpaceServiceStarter.urlsWithUserDir(spaceurls, username);
    ArrayList<String> expected = new ArrayList<>();
    expected.addAll(Arrays.asList(spaceurls));
    ArrayList<String> expectedWithUserDir = new ArrayList<>();
    expectedWithUserDir.addAll(Arrays.asList(userdirUrls));
    PASchedulerProperties.DATASPACE_DEFAULTINPUT_URL.updateProperty(DataSpaceServiceStarter.urlsToDSConfigProperty(spaceurls));
    PASchedulerProperties.DATASPACE_DEFAULTINPUT_LOCALPATH.updateProperty(IOSPACE);
    PASchedulerProperties.DATASPACE_DEFAULTINPUT_HOSTNAME.updateProperty(HOSTNAME);
    PASchedulerProperties.DATASPACE_DEFAULTOUTPUT_URL.updateProperty(DataSpaceServiceStarter.urlsToDSConfigProperty(spaceurls));
    PASchedulerProperties.DATASPACE_DEFAULTOUTPUT_LOCALPATH.updateProperty(IOSPACE);
    PASchedulerProperties.DATASPACE_DEFAULTOUTPUT_HOSTNAME.updateProperty(HOSTNAME);
    PASchedulerProperties.DATASPACE_DEFAULTGLOBAL_URL.updateProperty(DataSpaceServiceStarter.urlsToDSConfigProperty(spaceurls));
    PASchedulerProperties.DATASPACE_DEFAULTGLOBAL_LOCALPATH.updateProperty(IOSPACE);
    PASchedulerProperties.DATASPACE_DEFAULTGLOBAL_HOSTNAME.updateProperty(HOSTNAME);
    PASchedulerProperties.DATASPACE_DEFAULTUSER_URL.updateProperty(DataSpaceServiceStarter.urlsToDSConfigProperty(spaceurls));
    PASchedulerProperties.DATASPACE_DEFAULTUSER_LOCALPATH.updateProperty(IOSPACE);
    PASchedulerProperties.DATASPACE_DEFAULTUSER_HOSTNAME.updateProperty(HOSTNAME);
    DataSpaceServiceStarter dsServiceStarter = DataSpaceServiceStarter.getDataSpaceServiceStarter();
    dsServiceStarter.startNamingService();
    Set<SpaceInstanceInfo> predefinedSpaces = new HashSet<>();
    NamingService namingService = dsServiceStarter.getNamingService();
    TaskDataSpaceApplication jdsa = new TaskDataSpaceApplication(appid, dsServiceStarter.getNamingService());
    jdsa.startDataSpaceApplication(null, null, null, null, username, null, null);
    DataSpacesNodes.configureApplication(PAActiveObject.getNode(), appid, dsServiceStarter.getNamingServiceURL());
    DataSpacesFileObject INPUT = PADataSpaces.resolveDefaultInput();
    DataSpacesFileObject OUTPUT = PADataSpaces.resolveDefaultOutput();
    DataSpacesFileObject GLOBAL = PADataSpaces.resolveOutput(SchedulerConstants.GLOBALSPACE_NAME);
    DataSpacesFileObject USER = PADataSpaces.resolveOutput(SchedulerConstants.USERSPACE_NAME);
    Assert.assertEquals(expectedWithUserDir, INPUT.getAllRealURIs());
    Assert.assertEquals(expectedWithUserDir, OUTPUT.getAllRealURIs());
    Assert.assertEquals(expected, GLOBAL.getAllRealURIs());
    Assert.assertEquals(expectedWithUserDir, USER.getAllRealURIs());
    jdsa.terminateDataSpaceApplication();
    return true;
}
Also used : FileSystemServerDeployer(org.objectweb.proactive.extensions.vfsprovider.FileSystemServerDeployer) DataSpaceServiceStarter(org.ow2.proactive.scheduler.core.DataSpaceServiceStarter) SpaceInstanceInfo(org.objectweb.proactive.extensions.dataspaces.core.SpaceInstanceInfo) DataSpacesFileObject(org.objectweb.proactive.extensions.dataspaces.api.DataSpacesFileObject) ArrayList(java.util.ArrayList) TaskDataSpaceApplication(org.ow2.proactive.scheduler.job.TaskDataSpaceApplication) NamingService(org.objectweb.proactive.extensions.dataspaces.core.naming.NamingService) File(java.io.File) HashSet(java.util.HashSet)

Example 4 with DataSpaceServiceStarter

use of org.ow2.proactive.scheduler.core.DataSpaceServiceStarter in project scheduling by ow2-proactive.

the class SchedulerDataspace method init.

public void init(JobId jobId, TaskId taskId, String user) throws Exception {
    dataSpaceServiceStarter = DataSpaceServiceStarter.getDataSpaceServiceStarter();
    dataSpaceServiceStarter.startNamingService();
    taskDataSpaceApplication = new TaskDataSpaceApplication(taskId.toString(), dataSpaceServiceStarter.getNamingService());
    taskDataSpaceApplication.startDataSpaceApplication(null, null, null, null, user, null, jobId);
}
Also used : TaskDataSpaceApplication(org.ow2.proactive.scheduler.job.TaskDataSpaceApplication)

Aggregations

DataSpacesFileObject (org.objectweb.proactive.extensions.dataspaces.api.DataSpacesFileObject)2 TaskDataSpaceApplication (org.ow2.proactive.scheduler.job.TaskDataSpaceApplication)2 File (java.io.File)1 IOException (java.io.IOException)1 KeyException (java.security.KeyException)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 ExecutorService (java.util.concurrent.ExecutorService)1 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1 ScheduledThreadPoolExecutor (java.util.concurrent.ScheduledThreadPoolExecutor)1 TimeUnit (java.util.concurrent.TimeUnit)1 ActiveObjectCreationException (org.objectweb.proactive.ActiveObjectCreationException)1 PAActiveObject (org.objectweb.proactive.api.PAActiveObject)1 ProActiveException (org.objectweb.proactive.core.ProActiveException)1 ActiveObject (org.objectweb.proactive.extensions.annotation.ActiveObject)1 SpaceInstanceInfo (org.objectweb.proactive.extensions.dataspaces.core.SpaceInstanceInfo)1 NamingService (org.objectweb.proactive.extensions.dataspaces.core.naming.NamingService)1 FileSystemException (org.objectweb.proactive.extensions.dataspaces.exceptions.FileSystemException)1 FileSystemServerDeployer (org.objectweb.proactive.extensions.vfsprovider.FileSystemServerDeployer)1 NamedThreadFactory (org.objectweb.proactive.utils.NamedThreadFactory)1