Search in sources :

Example 6 with InternalTask

use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.

the class LiveJobs method restartTaskOnNodeFailure.

private void restartTaskOnNodeFailure(InternalTask task, JobData jobData, TerminationData terminationData) {
    final String errorMsg = "An error has occurred due to a node failure and the maximum amount of retries property has been reached.";
    task.setProgress(0);
    task.decreaseNumberOfExecutionOnFailureLeft();
    tlogger.info(task.getId(), "number of retry on failure left " + task.getNumberOfExecutionOnFailureLeft());
    InternalJob job = jobData.job;
    if (task.getNumberOfExecutionOnFailureLeft() > 0) {
        task.setStatus(TaskStatus.WAITING_ON_FAILURE);
        job.newWaitingTask();
        listener.taskStateUpdated(job.getOwner(), new NotificationData<TaskInfo>(SchedulerEvent.TASK_WAITING_FOR_RESTART, new TaskInfoImpl((TaskInfoImpl) task.getTaskInfo())));
        job.reStartTask(task);
        dbManager.taskRestarted(job, task, null);
        tlogger.info(task.getId(), " is waiting for restart");
    } else {
        job.incrementNumberOfFailedTasksBy(1);
        endJob(jobData, terminationData, task, null, errorMsg, JobStatus.FAILED);
    }
}
Also used : TaskInfo(org.ow2.proactive.scheduler.common.task.TaskInfo) InternalJob(org.ow2.proactive.scheduler.job.InternalJob) TaskInfoImpl(org.ow2.proactive.scheduler.task.TaskInfoImpl)

Example 7 with InternalTask

use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.

the class LiveJobs method jobRecovered.

void jobRecovered(InternalJob job) {
    jobs.put(job.getId(), new JobData(job));
    for (InternalTask task : job.getITasks()) {
        if (task.getStatus() == TaskStatus.RUNNING) {
            logger.info("Recover task " + task.getId() + " (" + task.getName() + ") of job " + job.getId() + " (" + job.getName() + ")");
            runningTasksData.put(TaskIdWrapper.wrap(task.getId()), new RunningTaskData(task, job.getOwner(), job.getCredentials(), task.getExecuterInformation().getLauncher()));
        }
    }
}
Also used : InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask)

Example 8 with InternalTask

use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.

the class LiveJobs method restartTaskOnNodeFailure.

TerminationData restartTaskOnNodeFailure(InternalTask task) {
    JobData jobData = lockJob(task.getJobId());
    if (jobData == null) {
        return emptyResult(task.getId());
    }
    try {
        TaskId taskId = task.getId();
        if (task.getStatus() != TaskStatus.RUNNING) {
            return emptyResult(taskId);
        }
        RunningTaskData taskData = runningTasksData.remove(TaskIdWrapper.wrap(taskId));
        if (taskData == null) {
            throw new IllegalStateException("Task " + task.getId() + " is not running.");
        }
        TerminationData result = TerminationData.newTerminationData();
        result.addTaskData(jobData.job, taskData, TerminationData.TerminationStatus.NODEFAILED, null);
        restartTaskOnNodeFailure(task, jobData, result);
        return result;
    } finally {
        jobData.unlock();
    }
}
Also used : TaskId(org.ow2.proactive.scheduler.common.task.TaskId)

Example 9 with InternalTask

use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.

the class LiveJobs method updateTasksInSchedulerState.

private void updateTasksInSchedulerState(InternalJob job, Set<TaskId> tasksToUpdate) {
    for (TaskId tid : tasksToUpdate) {
        try {
            InternalTask t = job.getTask(tid);
            TaskInfo ti = new TaskInfoImpl((TaskInfoImpl) t.getTaskInfo());
            listener.taskStateUpdated(job.getOwner(), new NotificationData<>(SchedulerEvent.TASK_RUNNING_TO_FINISHED, ti));
        } catch (UnknownTaskException e) {
            logger.error(e);
        }
    }
}
Also used : TaskInfo(org.ow2.proactive.scheduler.common.task.TaskInfo) UnknownTaskException(org.ow2.proactive.scheduler.common.exception.UnknownTaskException) TaskId(org.ow2.proactive.scheduler.common.task.TaskId) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) TaskInfoImpl(org.ow2.proactive.scheduler.task.TaskInfoImpl)

Example 10 with InternalTask

use of org.ow2.proactive.scheduler.task.internal.InternalTask in project scheduling by ow2-proactive.

the class SchedulingMethodImpl method selectAndStartTasks.

private int selectAndStartTasks(Policy currentPolicy, Map<JobId, JobDescriptor> jobMap, Set<String> freeResources, LinkedList<EligibleTaskDescriptor> fullListOfTaskRetrievedFromPolicy) {
    int numberOfTaskStarted = 0;
    VariableBatchSizeIterator progressiveIterator = new VariableBatchSizeIterator(fullListOfTaskRetrievedFromPolicy);
    while (progressiveIterator.hasMoreElements() && !freeResources.isEmpty()) {
        LinkedList<EligibleTaskDescriptor> taskRetrievedFromPolicy = new LinkedList<>(progressiveIterator.getNextElements(freeResources.size()));
        if (logger.isDebugEnabled()) {
            loggingEligibleTasksDetails(fullListOfTaskRetrievedFromPolicy, taskRetrievedFromPolicy);
        }
        updateVariablesForTasksToSchedule(taskRetrievedFromPolicy);
        for (EligibleTaskDescriptor etd : taskRetrievedFromPolicy) {
            // load and Initialize the executable container
            loadAndInit(((EligibleTaskDescriptorImpl) etd).getInternal());
        }
        while (!taskRetrievedFromPolicy.isEmpty()) {
            if (freeResources.isEmpty()) {
                break;
            }
            // get the next compatible tasks from the whole returned policy tasks
            LinkedList<EligibleTaskDescriptor> tasksToSchedule = new LinkedList<>();
            int neededResourcesNumber = 0;
            while (!taskRetrievedFromPolicy.isEmpty() && neededResourcesNumber == 0) {
                // the loop will search for next compatible task until it find something
                neededResourcesNumber = getNextcompatibleTasks(jobMap, taskRetrievedFromPolicy, freeResources.size(), tasksToSchedule);
            }
            if (logger.isDebugEnabled()) {
                logger.debug("tasksToSchedule : " + tasksToSchedule);
            }
            logger.debug("required number of nodes : " + neededResourcesNumber);
            if (neededResourcesNumber == 0 || tasksToSchedule.isEmpty()) {
                break;
            }
            NodeSet nodeSet = getRMNodes(jobMap, neededResourcesNumber, tasksToSchedule, freeResources);
            if (nodeSet != null) {
                freeResources.removeAll(nodeSet.getAllNodesUrls());
            }
            // start selected tasks
            Node node = null;
            InternalJob currentJob = null;
            try {
                while (nodeSet != null && !nodeSet.isEmpty()) {
                    EligibleTaskDescriptor taskDescriptor = tasksToSchedule.removeFirst();
                    currentJob = ((JobDescriptorImpl) jobMap.get(taskDescriptor.getJobId())).getInternal();
                    InternalTask internalTask = ((EligibleTaskDescriptorImpl) taskDescriptor).getInternal();
                    if (currentPolicy.isTaskExecutable(nodeSet, taskDescriptor)) {
                        // create launcher and try to start the task
                        node = nodeSet.get(0);
                        if (createExecution(nodeSet, node, currentJob, internalTask, taskDescriptor)) {
                            numberOfTaskStarted++;
                        }
                    }
                    // if every task that should be launched have been removed
                    if (tasksToSchedule.isEmpty()) {
                        // get back unused nodes to the RManager
                        if (!nodeSet.isEmpty()) {
                            releaseNodes(currentJob, nodeSet);
                            freeResources.addAll(nodeSet.getAllNodesUrls());
                        }
                        // and leave the loop
                        break;
                    }
                }
            } catch (ActiveObjectCreationException e1) {
                // Something goes wrong with the active object creation (createLauncher)
                logger.warn("An exception occured while creating the task launcher.", e1);
                // so try to get back every remaining nodes to the resource manager
                try {
                    releaseNodes(currentJob, nodeSet);
                    freeResources.addAll(nodeSet.getAllNodesUrls());
                } catch (Exception e2) {
                    logger.info("Unable to get back the nodeSet to the RM", e2);
                }
                if (--activeObjectCreationRetryTimeNumber == 0) {
                    break;
                }
            } catch (Exception e1) {
                // if we are here, it is that something append while launching the current task.
                logger.warn("An exception occured while starting task.", e1);
                // so try to get back every remaining nodes to the resource manager
                try {
                    releaseNodes(currentJob, nodeSet);
                    freeResources.addAll(nodeSet.getAllNodesUrls());
                } catch (Exception e2) {
                    logger.info("Unable to get back the nodeSet to the RM", e2);
                }
            }
        }
        if (freeResources.isEmpty()) {
            break;
        }
        if (activeObjectCreationRetryTimeNumber == 0) {
            break;
        }
    }
    return numberOfTaskStarted;
}
Also used : NodeSet(org.ow2.proactive.utils.NodeSet) InternalJob(org.ow2.proactive.scheduler.job.InternalJob) EligibleTaskDescriptor(org.ow2.proactive.scheduler.descriptor.EligibleTaskDescriptor) InternalTask(org.ow2.proactive.scheduler.task.internal.InternalTask) Node(org.objectweb.proactive.core.node.Node) EligibleTaskDescriptorImpl(org.ow2.proactive.scheduler.descriptor.EligibleTaskDescriptorImpl) VariableBatchSizeIterator(org.ow2.proactive.scheduler.core.helpers.VariableBatchSizeIterator) LinkedList(java.util.LinkedList) ActiveObjectCreationException(org.objectweb.proactive.ActiveObjectCreationException) ActiveObjectCreationException(org.objectweb.proactive.ActiveObjectCreationException) TopologyDisabledException(org.ow2.proactive.resourcemanager.frontend.topology.TopologyDisabledException) InvalidScriptException(org.ow2.proactive.scripting.InvalidScriptException) RMProxyCreationException(org.ow2.proactive.scheduler.core.rmproxies.RMProxyCreationException) IOException(java.io.IOException)

Aggregations

InternalTask (org.ow2.proactive.scheduler.task.internal.InternalTask)142 InternalJob (org.ow2.proactive.scheduler.job.InternalJob)74 Test (org.junit.Test)72 InternalScriptTask (org.ow2.proactive.scheduler.task.internal.InternalScriptTask)39 TaskId (org.ow2.proactive.scheduler.common.task.TaskId)37 TaskResultImpl (org.ow2.proactive.scheduler.task.TaskResultImpl)32 InternalTaskFlowJob (org.ow2.proactive.scheduler.job.InternalTaskFlowJob)31 ArrayList (java.util.ArrayList)30 JobIdImpl (org.ow2.proactive.scheduler.job.JobIdImpl)25 JobId (org.ow2.proactive.scheduler.common.job.JobId)22 TaskFlowJob (org.ow2.proactive.scheduler.common.job.TaskFlowJob)18 ExecuterInformation (org.ow2.proactive.scheduler.task.internal.ExecuterInformation)16 TaskInfoImpl (org.ow2.proactive.scheduler.task.TaskInfoImpl)13 UnknownTaskException (org.ow2.proactive.scheduler.common.exception.UnknownTaskException)12 HashMap (java.util.HashMap)10 UnknownJobException (org.ow2.proactive.scheduler.common.exception.UnknownJobException)10 ProActiveTest (org.ow2.tests.ProActiveTest)10 TaskInfo (org.ow2.proactive.scheduler.common.task.TaskInfo)9 HashSet (java.util.HashSet)8 SchedulerStateRecoverHelper (org.ow2.proactive.scheduler.core.db.SchedulerStateRecoverHelper)8