Search in sources :

Example 1 with TaskDescriptor

use of org.ow2.proactive.scheduler.common.TaskDescriptor in project scheduling by ow2-proactive.

the class SchedulingMethodImpl method selectAndStartTasks.

private int selectAndStartTasks(Policy currentPolicy, Map<JobId, JobDescriptor> jobMap, Set<String> freeResources, LinkedList<EligibleTaskDescriptor> fullListOfTaskRetrievedFromPolicy) {
    int numberOfTaskStarted = 0;
    VariableBatchSizeIterator progressiveIterator = new VariableBatchSizeIterator(fullListOfTaskRetrievedFromPolicy);
    while (progressiveIterator.hasMoreElements() && !freeResources.isEmpty()) {
        LinkedList<EligibleTaskDescriptor> taskRetrievedFromPolicy = new LinkedList<>(progressiveIterator.getNextElements(freeResources.size()));
        if (logger.isDebugEnabled()) {
            loggingEligibleTasksDetails(fullListOfTaskRetrievedFromPolicy, taskRetrievedFromPolicy);
        }
        updateVariablesForTasksToSchedule(taskRetrievedFromPolicy);
        for (EligibleTaskDescriptor etd : taskRetrievedFromPolicy) {
            // load and Initialize the executable container
            loadAndInit(((EligibleTaskDescriptorImpl) etd).getInternal());
        }
        while (!taskRetrievedFromPolicy.isEmpty()) {
            if (freeResources.isEmpty()) {
                break;
            }
            // get the next compatible tasks from the whole returned policy tasks
            LinkedList<EligibleTaskDescriptor> tasksToSchedule = new LinkedList<>();
            int neededResourcesNumber = 0;
            while (!taskRetrievedFromPolicy.isEmpty() && neededResourcesNumber == 0) {
                // the loop will search for next compatible task until it find something
                neededResourcesNumber = getNextcompatibleTasks(jobMap, taskRetrievedFromPolicy, freeResources.size(), tasksToSchedule);
            }
            if (logger.isDebugEnabled()) {
                logger.debug("tasksToSchedule : " + tasksToSchedule);
            }
            logger.debug("required number of nodes : " + neededResourcesNumber);
            if (neededResourcesNumber == 0 || tasksToSchedule.isEmpty()) {
                break;
            }
            NodeSet nodeSet = getRMNodes(jobMap, neededResourcesNumber, tasksToSchedule, freeResources);
            if (nodeSet != null) {
                freeResources.removeAll(nodeSet.getAllNodesUrls());
            }
            // start selected tasks
            Node node = null;
            InternalJob currentJob = null;
            try {
                while (nodeSet != null && !nodeSet.isEmpty()) {
                    EligibleTaskDescriptor taskDescriptor = tasksToSchedule.removeFirst();
                    currentJob = ((JobDescriptorImpl) jobMap.get(taskDescriptor.getJobId())).getInternal();
                    InternalTask internalTask = ((EligibleTaskDescriptorImpl) taskDescriptor).getInternal();
                    if (currentPolicy.isTaskExecutable(nodeSet, taskDescriptor)) {
                        // create launcher and try to start the task
                        node = nodeSet.get(0);
                        if (createExecution(nodeSet, node, currentJob, internalTask, taskDescriptor)) {
                            numberOfTaskStarted++;
                        }
                    }
                    // if every task that should be launched have been removed
                    if (tasksToSchedule.isEmpty()) {
                        // get back unused nodes to the RManager
                        if (!nodeSet.isEmpty()) {
                            releaseNodes(currentJob, nodeSet);
                            freeResources.addAll(nodeSet.getAllNodesUrls());
                        }
                        // and leave the loop
                        break;
                    }
                }
            } catch (ActiveObjectCreationException e1) {
                // Something goes wrong with the active object creation (createLauncher)
                logger.warn("An exception occured while creating the task launcher.", e1);
                // so try to get back every remaining nodes to the resource manager
                try {
                    releaseNodes(currentJob, nodeSet);
                    freeResources.addAll(nodeSet.getAllNodesUrls());
                } catch (Exception e2) {
                    logger.info("Unable to get back the nodeSet to the RM", e2);
                }
                if (--activeObjectCreationRetryTimeNumber == 0) {
                    break;
                }
            } catch (Exception e1) {
                // if we are here, it is that something append while launching the current task.
                logger.warn("An exception occured while starting task.", e1);
                // so try to get back every remaining nodes to the resource manager
                try {
                    releaseNodes(currentJob, nodeSet);
                    freeResources.addAll(nodeSet.getAllNodesUrls());
                } catch (Exception e2) {
                    logger.info("Unable to get back the nodeSet to the RM", e2);
                }
            }
        }
        if (freeResources.isEmpty()) {
            break;
        }
        if (activeObjectCreationRetryTimeNumber == 0) {
            break;
        }
    }
    return numberOfTaskStarted;
}
Also used : NodeSet(org.ow2.proactive.utils.NodeSet) InternalJob(org.ow2.proactive.scheduler.job.InternalJob) EligibleTaskDescriptor(org.ow2.proactive.scheduler.descriptor.EligibleTaskDescriptor) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) Node(org.objectweb.proactive.core.node.Node) EligibleTaskDescriptorImpl(org.ow2.proactive.scheduler.descriptor.EligibleTaskDescriptorImpl) VariableBatchSizeIterator(org.ow2.proactive.scheduler.core.helpers.VariableBatchSizeIterator) LinkedList(java.util.LinkedList) ActiveObjectCreationException(org.objectweb.proactive.ActiveObjectCreationException) ActiveObjectCreationException(org.objectweb.proactive.ActiveObjectCreationException) TopologyDisabledException(org.ow2.proactive.resourcemanager.frontend.topology.TopologyDisabledException) InvalidScriptException(org.ow2.proactive.scripting.InvalidScriptException) RMProxyCreationException(org.ow2.proactive.scheduler.core.rmproxies.RMProxyCreationException) IOException(java.io.IOException)

Example 2 with TaskDescriptor

use of org.ow2.proactive.scheduler.common.TaskDescriptor in project scheduling by ow2-proactive.

the class SchedulingMethodImpl method updateVariablesForTasksToSchedule.

/**
 * Update all variables for the given scheduled tasks
 */
private void updateVariablesForTasksToSchedule(LinkedList<EligibleTaskDescriptor> tasksToSchedule) {
    for (EligibleTaskDescriptor taskDescriptor : tasksToSchedule) {
        InternalTask internalTask = ((EligibleTaskDescriptorImpl) taskDescriptor).getInternal();
        internalTask.updateVariables(schedulingService);
    }
}
Also used : EligibleTaskDescriptor(org.ow2.proactive.scheduler.descriptor.EligibleTaskDescriptor) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) EligibleTaskDescriptorImpl(org.ow2.proactive.scheduler.descriptor.EligibleTaskDescriptorImpl)

Example 3 with TaskDescriptor

use of org.ow2.proactive.scheduler.common.TaskDescriptor in project scheduling by ow2-proactive.

the class SchedulingMethodImpl method createExecution.

/**
 * Create launcher and try to start the task.
 *
 * @param nodeSet the node set containing every available nodes that can be used for execution
 * @param node the node on which to start the task
 * @param job the job that owns the task to be started
 * @param task the task to be started
 * @param taskDescriptor the descriptor of the task to be started
 */
protected boolean createExecution(NodeSet nodeSet, Node node, InternalJob job, InternalTask task, TaskDescriptor taskDescriptor) throws Exception {
    TaskLauncher launcher = null;
    LiveJobs.JobData jobData = null;
    try {
        jobData = schedulingService.lockJob(job.getId());
        // task is not paused
        if (nodeSet.size() >= task.getNumberOfNodesNeeded() && (task.getStatus() != TaskStatus.PAUSED) && (jobData != null)) {
            // start dataspace app for this job
            DataSpaceServiceStarter dsStarter = schedulingService.getInfrastructure().getDataSpaceServiceStarter();
            job.startDataSpaceApplication(dsStarter.getNamingService(), ImmutableList.of(task));
            NodeSet nodes = new NodeSet();
            try {
                // create launcher
                launcher = task.createLauncher(node);
                activeObjectCreationRetryTimeNumber = ACTIVEOBJECT_CREATION_RETRY_TIME_NUMBER;
                nodeSet.remove(0);
                // we will need to update this code once topology will be allowed for single-node task
                if (task.isParallel()) {
                    nodes = new NodeSet(nodeSet);
                    task.getExecuterInformation().addNodes(nodes);
                    nodeSet.clear();
                }
                // set nodes in the executable container
                task.getExecutableContainer().setNodes(nodes);
                tlogger.debug(task.getId(), "deploying");
                // above 500 parent tasks, it is worth adjusting.
                if (taskDescriptor.getParents().size() > 500) {
                    dotaskActionTimeout = (int) (taskDescriptor.getParents().size() / 500.0 * PASchedulerProperties.SCHEDULER_STARTTASK_TIMEOUT.getValueAsInt());
                } else {
                    // reset the dotaskActionTimeout to its default value otherwise.
                    dotaskActionTimeout = PASchedulerProperties.SCHEDULER_STARTTASK_TIMEOUT.getValueAsInt();
                }
                boolean taskRecoverable = getRMProxiesManager().getRmProxy().areNodesRecoverable(nodes);
                String terminateNotificationNodeURL = PAActiveObject.getActiveObjectNode(terminateNotification).getNodeInformation().getURL();
                TaskRecoveryData taskRecoveryData = new TaskRecoveryData(terminateNotificationNodeURL, taskRecoverable);
                threadPool.submitWithTimeout(new TimedDoTaskAction(job, taskDescriptor, launcher, schedulingService, terminateNotification, corePrivateKey, taskRecoveryData), dotaskActionTimeout, TimeUnit.MILLISECONDS);
                // we advertise here that the task is started, however
                // this is not entirely true: the only thing we are sure
                // about at this point is that we submitted to the thread
                // pool the action that will call the "doTask" of the task
                // launcher. There is thus a small gap here where the task
                // is seen as started whereas it is not yet started. We
                // cannot easily move the task started notification because
                // 1) it makes the job lock acquisition less predictable
                // (because the TimeDoTaskAction will have to compete with
                // the SchedulingMethodImpl)
                // and more importantly 2) the
                // SchedulingMethodImpl#createExecution may happen to be
                // called a second time for the task that is currently being
                // started by the TimedDoTaskAction.
                finalizeStarting(job, task, node, launcher);
                return true;
            } catch (Exception t) {
                try {
                    // if there was a problem, free nodeSet for multi-nodes task
                    nodes.add(node);
                    releaseNodes(job, nodes);
                } catch (Throwable ni) {
                // miam miam
                }
                throw t;
            }
        } else {
            return false;
        }
    } finally {
        if (jobData != null) {
            jobData.unlock();
        }
    }
}
Also used : NodeSet(org.ow2.proactive.utils.NodeSet) TaskLauncher(org.ow2.proactive.scheduler.task.TaskLauncher) ActiveObjectCreationException(org.objectweb.proactive.ActiveObjectCreationException) TopologyDisabledException(org.ow2.proactive.resourcemanager.frontend.topology.TopologyDisabledException) InvalidScriptException(org.ow2.proactive.scripting.InvalidScriptException) RMProxyCreationException(org.ow2.proactive.scheduler.core.rmproxies.RMProxyCreationException) IOException(java.io.IOException) TaskRecoveryData(org.ow2.proactive.scheduler.task.internal.TaskRecoveryData)

Example 4 with TaskDescriptor

use of org.ow2.proactive.scheduler.common.TaskDescriptor in project scheduling by ow2-proactive.

the class JobDescriptorImpl method terminate.

/**
 * Update the eligible list of task and dependencies if necessary.
 * This function considered that the taskId is in eligible task list.
 * Visibility is package because user cannot use this method.
 *
 * @param taskId the task to remove from running task.
 */
public void terminate(TaskId taskId, boolean inErrorTask) {
    Map<TaskId, ? extends TaskDescriptor> currentTasks = inErrorTask ? pausedTasks : runningTasks;
    List<TaskId> taskIdsToSkip = new ArrayList<>();
    if (getInternal().getType() == JobType.TASKSFLOW) {
        TaskDescriptor taskToTerminate = currentTasks.get(taskId);
        if (taskToTerminate != null) {
            for (TaskDescriptor childTask : taskToTerminate.getChildren()) {
                decreaseParentCount(childTask);
                if (((EligibleTaskDescriptorImpl) childTask).getCount() == 0) {
                    if (internalJob.getStatus() == JobStatus.PAUSED) {
                        pausedTasks.put(childTask.getTaskId(), (EligibleTaskDescriptor) childTask);
                    } else if (internalJob.getStatus() == JobStatus.IN_ERROR && ((EligibleTaskDescriptorImpl) childTask).getInternal().getStatus() == TaskStatus.PAUSED) {
                        pausedTasks.put(childTask.getTaskId(), (EligibleTaskDescriptor) childTask);
                    } else if (((EligibleTaskDescriptorImpl) childTask).getInternal().getStatus() == TaskStatus.SKIPPED) {
                        runningTasks.put(childTask.getTaskId(), (EligibleTaskDescriptor) childTask);
                        taskIdsToSkip.add(childTask.getTaskId());
                    } else {
                        eligibleTasks.put(childTask.getTaskId(), (EligibleTaskDescriptor) childTask);
                    }
                }
            }
            decreaseChildrenCountForAllParents(taskToTerminate);
        }
    }
    currentTasks.remove(taskId);
    for (TaskId taskIdToSkip : taskIdsToSkip) {
        terminate(taskIdToSkip);
    }
}
Also used : TaskDescriptor(org.ow2.proactive.scheduler.common.TaskDescriptor) TaskId(org.ow2.proactive.scheduler.common.task.TaskId) ArrayList(java.util.ArrayList)

Example 5 with TaskDescriptor

use of org.ow2.proactive.scheduler.common.TaskDescriptor in project scheduling by ow2-proactive.

the class JobDescriptorImpl method doLoop.

/**
 * Complete LOOP action on JobDescriptor side
 *
 * @param initiator Task initiating the LOOP action
 * @param tree InternalTask tree of replicated tasks
 * @param target Target task of the LOOP action
 */
public void doLoop(TaskId initiator, Map<TaskId, InternalTask> tree, InternalTask target, InternalTask newInit) {
    Map<TaskId, EligibleTaskDescriptorImpl> acc = new HashMap<>();
    // create new EligibleTasks and accumulate it
    for (Entry<TaskId, InternalTask> it : tree.entrySet()) {
        TaskId itId = it.getValue().getId();
        EligibleTaskDescriptorImpl td = new EligibleTaskDescriptorImpl(it.getValue());
        acc.put(itId, td);
    }
    EligibleTaskDescriptorImpl oldEnd = (EligibleTaskDescriptorImpl) runningTasks.get(initiator);
    EligibleTaskDescriptorImpl newStart = acc.get(target.getId());
    EligibleTaskDescriptorImpl newEnd = acc.get(newInit.getId());
    // plug the end of the old tree (initiator) to the beginning of the new (target)
    for (TaskDescriptor ot : oldEnd.getChildren()) {
        newEnd.addChild(ot);
        ot.getParents().remove(oldEnd);
        ot.getParents().add(newEnd);
    }
    oldEnd.clearChildren();
    // recreate the dependencies
    for (Entry<TaskId, InternalTask> it : tree.entrySet()) {
        TaskId itId = it.getValue().getTaskInfo().getTaskId();
        EligibleTaskDescriptorImpl down = acc.get(itId);
        List<InternalTask> ideps = new ArrayList<>();
        int deptype = 0;
        if (it.getValue().hasDependences()) {
            ideps.addAll(it.getValue().getIDependences());
        }
        if (it.getValue().getIfBranch() != null) {
            deptype = 1;
            ideps.add(it.getValue().getIfBranch());
        }
        if (it.getValue().getJoinedBranches() != null) {
            deptype = 2;
            ideps.addAll(it.getValue().getJoinedBranches());
        }
        if (ideps.size() > 0 && !target.equals(itId)) {
            for (InternalTask parent : ideps) {
                if (parent == null) {
                    continue;
                }
                EligibleTaskDescriptorImpl up = acc.get(parent.getTaskInfo().getTaskId());
                switch(deptype) {
                    case 0:
                        if (parent.getId().equals(initiator)) {
                            up = (EligibleTaskDescriptorImpl) runningTasks.get(initiator);
                        }
                        up.addChild(down);
                        down.addParent(up);
                        break;
                    case 1:
                    case 2:
                        // 'weak' dependencies from FlowAction#IF are not
                        // represented in TaskDescriptor
                        branchTasks.put(down.getTaskId(), down);
                        break;
                }
            }
        }
    }
    // EligibleTaskDescriptorImpl newTask = (EligibleTaskDescriptorImpl) acc.get(target.getId());
    setNewLoopTaskToPausedIfJobIsPaused(newStart);
    putNewLoopTaskIntoPausedOrEligableList(target.getId(), newStart);
    runningTasks.remove(initiator);
}
Also used : TaskDescriptor(org.ow2.proactive.scheduler.common.TaskDescriptor) TaskId(org.ow2.proactive.scheduler.common.task.TaskId) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) ArrayList(java.util.ArrayList)

Aggregations

TaskDescriptor (org.ow2.proactive.scheduler.common.TaskDescriptor)21 EligibleTaskDescriptor (org.ow2.proactive.scheduler.descriptor.EligibleTaskDescriptor)15 JobDescriptor (org.ow2.proactive.scheduler.common.JobDescriptor)12 JobId (org.ow2.proactive.scheduler.common.job.JobId)10 InternalTask (org.ow2.proactive.scheduler.task.internal.InternalTask)10 Test (org.junit.Test)8 TaskId (org.ow2.proactive.scheduler.common.task.TaskId)7 ArrayList (java.util.ArrayList)4 LinkedList (java.util.LinkedList)4 HashMap (java.util.HashMap)3 EligibleTaskDescriptorImpl (org.ow2.proactive.scheduler.descriptor.EligibleTaskDescriptorImpl)3 InternalJob (org.ow2.proactive.scheduler.job.InternalJob)3 TaskResultImpl (org.ow2.proactive.scheduler.task.TaskResultImpl)3 IOException (java.io.IOException)2 Collection (java.util.Collection)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 ActiveObjectCreationException (org.objectweb.proactive.ActiveObjectCreationException)2 TopologyDisabledException (org.ow2.proactive.resourcemanager.frontend.topology.TopologyDisabledException)2 UnknownJobException (org.ow2.proactive.scheduler.common.exception.UnknownJobException)2 UnknownTaskException (org.ow2.proactive.scheduler.common.exception.UnknownTaskException)2