Search in sources :

Example 1 with TaskRecoveryData

use of org.ow2.proactive.scheduler.task.internal.TaskRecoveryData in project scheduling by ow2-proactive.

the class SchedulingMethodImpl method createExecution.

/**
 * Create launcher and try to start the task.
 *
 * @param nodeSet the node set containing every available nodes that can be used for execution
 * @param node the node on which to start the task
 * @param job the job that owns the task to be started
 * @param task the task to be started
 * @param taskDescriptor the descriptor of the task to be started
 */
protected boolean createExecution(NodeSet nodeSet, Node node, InternalJob job, InternalTask task, TaskDescriptor taskDescriptor) throws Exception {
    TaskLauncher launcher = null;
    LiveJobs.JobData jobData = null;
    try {
        jobData = schedulingService.lockJob(job.getId());
        // task is not paused
        if (nodeSet.size() >= task.getNumberOfNodesNeeded() && (task.getStatus() != TaskStatus.PAUSED) && (jobData != null)) {
            // start dataspace app for this job
            DataSpaceServiceStarter dsStarter = schedulingService.getInfrastructure().getDataSpaceServiceStarter();
            job.startDataSpaceApplication(dsStarter.getNamingService(), ImmutableList.of(task));
            NodeSet nodes = new NodeSet();
            try {
                // create launcher
                launcher = task.createLauncher(node);
                activeObjectCreationRetryTimeNumber = ACTIVEOBJECT_CREATION_RETRY_TIME_NUMBER;
                nodeSet.remove(0);
                // we will need to update this code once topology will be allowed for single-node task
                if (task.isParallel()) {
                    nodes = new NodeSet(nodeSet);
                    task.getExecuterInformation().addNodes(nodes);
                    nodeSet.clear();
                }
                // set nodes in the executable container
                task.getExecutableContainer().setNodes(nodes);
                tlogger.debug(task.getId(), "deploying");
                // above 500 parent tasks, it is worth adjusting.
                if (taskDescriptor.getParents().size() > 500) {
                    dotaskActionTimeout = (int) (taskDescriptor.getParents().size() / 500.0 * PASchedulerProperties.SCHEDULER_STARTTASK_TIMEOUT.getValueAsInt());
                } else {
                    // reset the dotaskActionTimeout to its default value otherwise.
                    dotaskActionTimeout = PASchedulerProperties.SCHEDULER_STARTTASK_TIMEOUT.getValueAsInt();
                }
                boolean taskRecoverable = getRMProxiesManager().getRmProxy().areNodesRecoverable(nodes);
                String terminateNotificationNodeURL = PAActiveObject.getActiveObjectNode(terminateNotification).getNodeInformation().getURL();
                TaskRecoveryData taskRecoveryData = new TaskRecoveryData(terminateNotificationNodeURL, taskRecoverable);
                threadPool.submitWithTimeout(new TimedDoTaskAction(job, taskDescriptor, launcher, schedulingService, terminateNotification, corePrivateKey, taskRecoveryData), dotaskActionTimeout, TimeUnit.MILLISECONDS);
                // we advertise here that the task is started, however
                // this is not entirely true: the only thing we are sure
                // about at this point is that we submitted to the thread
                // pool the action that will call the "doTask" of the task
                // launcher. There is thus a small gap here where the task
                // is seen as started whereas it is not yet started. We
                // cannot easily move the task started notification because
                // 1) it makes the job lock acquisition less predictable
                // (because the TimeDoTaskAction will have to compete with
                // the SchedulingMethodImpl)
                // and more importantly 2) the
                // SchedulingMethodImpl#createExecution may happen to be
                // called a second time for the task that is currently being
                // started by the TimedDoTaskAction.
                finalizeStarting(job, task, node, launcher);
                return true;
            } catch (Exception t) {
                try {
                    // if there was a problem, free nodeSet for multi-nodes task
                    nodes.add(node);
                    releaseNodes(job, nodes);
                } catch (Throwable ni) {
                // miam miam
                }
                throw t;
            }
        } else {
            return false;
        }
    } finally {
        if (jobData != null) {
            jobData.unlock();
        }
    }
}
Also used : NodeSet(org.ow2.proactive.utils.NodeSet) TaskLauncher(org.ow2.proactive.scheduler.task.TaskLauncher) ActiveObjectCreationException(org.objectweb.proactive.ActiveObjectCreationException) TopologyDisabledException(org.ow2.proactive.resourcemanager.frontend.topology.TopologyDisabledException) InvalidScriptException(org.ow2.proactive.scripting.InvalidScriptException) RMProxyCreationException(org.ow2.proactive.scheduler.core.rmproxies.RMProxyCreationException) IOException(java.io.IOException) TaskRecoveryData(org.ow2.proactive.scheduler.task.internal.TaskRecoveryData)

Aggregations

IOException (java.io.IOException)1 ActiveObjectCreationException (org.objectweb.proactive.ActiveObjectCreationException)1 TopologyDisabledException (org.ow2.proactive.resourcemanager.frontend.topology.TopologyDisabledException)1 RMProxyCreationException (org.ow2.proactive.scheduler.core.rmproxies.RMProxyCreationException)1 TaskLauncher (org.ow2.proactive.scheduler.task.TaskLauncher)1 TaskRecoveryData (org.ow2.proactive.scheduler.task.internal.TaskRecoveryData)1 InvalidScriptException (org.ow2.proactive.scripting.InvalidScriptException)1 NodeSet (org.ow2.proactive.utils.NodeSet)1