use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.
the class SchedulingMethodImpl method selectAndStartTasks.
private int selectAndStartTasks(Policy currentPolicy, Map<JobId, JobDescriptor> jobMap, Set<String> freeResources, LinkedList<EligibleTaskDescriptor> fullListOfTaskRetrievedFromPolicy) {
int numberOfTaskStarted = 0;
VariableBatchSizeIterator progressiveIterator = new VariableBatchSizeIterator(fullListOfTaskRetrievedFromPolicy);
while (progressiveIterator.hasMoreElements() && !freeResources.isEmpty()) {
LinkedList<EligibleTaskDescriptor> taskRetrievedFromPolicy = new LinkedList<>(progressiveIterator.getNextElements(freeResources.size()));
if (logger.isDebugEnabled()) {
loggingEligibleTasksDetails(fullListOfTaskRetrievedFromPolicy, taskRetrievedFromPolicy);
}
updateVariablesForTasksToSchedule(taskRetrievedFromPolicy);
for (EligibleTaskDescriptor etd : taskRetrievedFromPolicy) {
// load and Initialize the executable container
loadAndInit(((EligibleTaskDescriptorImpl) etd).getInternal());
}
while (!taskRetrievedFromPolicy.isEmpty()) {
if (freeResources.isEmpty()) {
break;
}
// get the next compatible tasks from the whole returned policy tasks
LinkedList<EligibleTaskDescriptor> tasksToSchedule = new LinkedList<>();
int neededResourcesNumber = 0;
while (!taskRetrievedFromPolicy.isEmpty() && neededResourcesNumber == 0) {
// the loop will search for next compatible task until it find something
neededResourcesNumber = getNextcompatibleTasks(jobMap, taskRetrievedFromPolicy, freeResources.size(), tasksToSchedule);
}
if (logger.isDebugEnabled()) {
logger.debug("tasksToSchedule : " + tasksToSchedule);
}
logger.debug("required number of nodes : " + neededResourcesNumber);
if (neededResourcesNumber == 0 || tasksToSchedule.isEmpty()) {
break;
}
NodeSet nodeSet = getRMNodes(jobMap, neededResourcesNumber, tasksToSchedule, freeResources);
if (nodeSet != null) {
freeResources.removeAll(nodeSet.getAllNodesUrls());
}
// start selected tasks
Node node = null;
InternalJob currentJob = null;
try {
while (nodeSet != null && !nodeSet.isEmpty()) {
EligibleTaskDescriptor taskDescriptor = tasksToSchedule.removeFirst();
currentJob = ((JobDescriptorImpl) jobMap.get(taskDescriptor.getJobId())).getInternal();
InternalTask internalTask = ((EligibleTaskDescriptorImpl) taskDescriptor).getInternal();
if (currentPolicy.isTaskExecutable(nodeSet, taskDescriptor)) {
// create launcher and try to start the task
node = nodeSet.get(0);
if (createExecution(nodeSet, node, currentJob, internalTask, taskDescriptor)) {
numberOfTaskStarted++;
}
}
// if every task that should be launched have been removed
if (tasksToSchedule.isEmpty()) {
// get back unused nodes to the RManager
if (!nodeSet.isEmpty()) {
releaseNodes(currentJob, nodeSet);
freeResources.addAll(nodeSet.getAllNodesUrls());
}
// and leave the loop
break;
}
}
} catch (ActiveObjectCreationException e1) {
// Something goes wrong with the active object creation (createLauncher)
logger.warn("An exception occured while creating the task launcher.", e1);
// so try to get back every remaining nodes to the resource manager
try {
releaseNodes(currentJob, nodeSet);
freeResources.addAll(nodeSet.getAllNodesUrls());
} catch (Exception e2) {
logger.info("Unable to get back the nodeSet to the RM", e2);
}
if (--activeObjectCreationRetryTimeNumber == 0) {
break;
}
} catch (Exception e1) {
// if we are here, it is that something append while launching the current task.
logger.warn("An exception occured while starting task.", e1);
// so try to get back every remaining nodes to the resource manager
try {
releaseNodes(currentJob, nodeSet);
freeResources.addAll(nodeSet.getAllNodesUrls());
} catch (Exception e2) {
logger.info("Unable to get back the nodeSet to the RM", e2);
}
}
}
if (freeResources.isEmpty()) {
break;
}
if (activeObjectCreationRetryTimeNumber == 0) {
break;
}
}
return numberOfTaskStarted;
}
use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.
the class SchedulingMethodImpl method loadAndInit.
/**
* Load and initialize the task to be started
*
* @param task the task to be initialized
*/
protected void loadAndInit(InternalTask task) {
if ((task.getExecutableContainer() == null) || ((ScriptExecutableContainer) task.getExecutableContainer()).getScript() == null) {
tlogger.debug(task.getId(), "initializing the executable container");
ExecutableContainer container = getDBManager().loadExecutableContainer(task);
task.setExecutableContainer(container);
}
}
use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.
the class SchedulingMethodImpl method createExecution.
/**
* Create launcher and try to start the task.
*
* @param nodeSet the node set containing every available nodes that can be used for execution
* @param node the node on which to start the task
* @param job the job that owns the task to be started
* @param task the task to be started
* @param taskDescriptor the descriptor of the task to be started
*/
protected boolean createExecution(NodeSet nodeSet, Node node, InternalJob job, InternalTask task, TaskDescriptor taskDescriptor) throws Exception {
TaskLauncher launcher = null;
LiveJobs.JobData jobData = null;
try {
jobData = schedulingService.lockJob(job.getId());
// task is not paused
if (nodeSet.size() >= task.getNumberOfNodesNeeded() && (task.getStatus() != TaskStatus.PAUSED) && (jobData != null)) {
// start dataspace app for this job
DataSpaceServiceStarter dsStarter = schedulingService.getInfrastructure().getDataSpaceServiceStarter();
job.startDataSpaceApplication(dsStarter.getNamingService(), ImmutableList.of(task));
NodeSet nodes = new NodeSet();
try {
// create launcher
launcher = task.createLauncher(node);
activeObjectCreationRetryTimeNumber = ACTIVEOBJECT_CREATION_RETRY_TIME_NUMBER;
nodeSet.remove(0);
// we will need to update this code once topology will be allowed for single-node task
if (task.isParallel()) {
nodes = new NodeSet(nodeSet);
task.getExecuterInformation().addNodes(nodes);
nodeSet.clear();
}
// set nodes in the executable container
task.getExecutableContainer().setNodes(nodes);
tlogger.debug(task.getId(), "deploying");
// above 500 parent tasks, it is worth adjusting.
if (taskDescriptor.getParents().size() > 500) {
dotaskActionTimeout = (int) (taskDescriptor.getParents().size() / 500.0 * PASchedulerProperties.SCHEDULER_STARTTASK_TIMEOUT.getValueAsInt());
} else {
// reset the dotaskActionTimeout to its default value otherwise.
dotaskActionTimeout = PASchedulerProperties.SCHEDULER_STARTTASK_TIMEOUT.getValueAsInt();
}
boolean taskRecoverable = getRMProxiesManager().getRmProxy().areNodesRecoverable(nodes);
String terminateNotificationNodeURL = PAActiveObject.getActiveObjectNode(terminateNotification).getNodeInformation().getURL();
TaskRecoveryData taskRecoveryData = new TaskRecoveryData(terminateNotificationNodeURL, taskRecoverable);
threadPool.submitWithTimeout(new TimedDoTaskAction(job, taskDescriptor, launcher, schedulingService, terminateNotification, corePrivateKey, taskRecoveryData), dotaskActionTimeout, TimeUnit.MILLISECONDS);
// we advertise here that the task is started, however
// this is not entirely true: the only thing we are sure
// about at this point is that we submitted to the thread
// pool the action that will call the "doTask" of the task
// launcher. There is thus a small gap here where the task
// is seen as started whereas it is not yet started. We
// cannot easily move the task started notification because
// 1) it makes the job lock acquisition less predictable
// (because the TimeDoTaskAction will have to compete with
// the SchedulingMethodImpl)
// and more importantly 2) the
// SchedulingMethodImpl#createExecution may happen to be
// called a second time for the task that is currently being
// started by the TimedDoTaskAction.
finalizeStarting(job, task, node, launcher);
return true;
} catch (Exception t) {
try {
// if there was a problem, free nodeSet for multi-nodes task
nodes.add(node);
releaseNodes(job, nodes);
} catch (Throwable ni) {
// miam miam
}
throw t;
}
} else {
return false;
}
} finally {
if (jobData != null) {
jobData.unlock();
}
}
}
use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.
the class SchedulingMethodImpl method getNextcompatibleTasks.
/**
* Extract the n first compatible tasks from the first argument list,
* and return them according that the extraction is stopped when the maxResource number is reached.<br>
* Two tasks are compatible if and only if they have the same list of selection script and
* the same list of node exclusion.
* The check of compliance is currently done by the {@link SchedulingTaskComparator} class.<br>
* This method has two side effects : extracted tasks are removed from the bagOfTasks and put in the toFill list
*
* @param bagOfTasks the list of tasks form which to extract tasks
* @param maxResource the limit number of resources that the extraction should not exceed
* @param toFill the list that will contains the task to schedule at the end. This list must not be null but must be empty.<br>
* this list will be filled with the n first compatible tasks according that the number of resources needed
* by these tasks does not exceed the given max resource number.
* @return the number of nodes needed to start every task present in the 'toFill' argument at the end of the method.
*/
protected int getNextcompatibleTasks(Map<JobId, JobDescriptor> jobsMap, LinkedList<EligibleTaskDescriptor> bagOfTasks, int maxResource, LinkedList<EligibleTaskDescriptor> toFill) {
if (toFill == null || bagOfTasks == null) {
throw new IllegalArgumentException("The two given lists must not be null !");
}
int neededResource = 0;
if (!PASchedulerProperties.SCHEDULER_REST_URL.isSet()) {
Iterator<EligibleTaskDescriptor> it = bagOfTasks.iterator();
EligibleTaskDescriptor etd;
while (it.hasNext()) {
etd = it.next();
if (checkEligibleTaskDescriptorScript.isTaskContainsAPIBinding(etd)) {
// skip task here
it.remove();
}
}
}
if (maxResource > 0 && !bagOfTasks.isEmpty()) {
EligibleTaskDescriptor etd = bagOfTasks.removeFirst();
((EligibleTaskDescriptorImpl) etd).addAttempt();
InternalJob currentJob = ((JobDescriptorImpl) jobsMap.get(etd.getJobId())).getInternal();
InternalTask internalTask = currentJob.getIHMTasks().get(etd.getTaskId());
int neededNodes = internalTask.getNumberOfNodesNeeded();
SchedulingTaskComparator referent = new SchedulingTaskComparator(internalTask, currentJob);
boolean firstLoop = true;
do {
if (!firstLoop) {
// if bagOfTasks is not empty
if (!bagOfTasks.isEmpty()) {
etd = bagOfTasks.removeFirst();
((EligibleTaskDescriptorImpl) etd).addAttempt();
currentJob = ((JobDescriptorImpl) jobsMap.get(etd.getJobId())).getInternal();
internalTask = currentJob.getIHMTasks().get(etd.getTaskId());
neededNodes = internalTask.getNumberOfNodesNeeded();
}
} else {
firstLoop = false;
}
if (neededNodes > maxResource) {
// no instruction is important :
// in this case, a multi node task leads the search to be stopped and the
// the current task would be retried on the next step
// we continue to start the maximum number of task in a single scheduling loop.
// this case will focus on starting single node task first if lot of resources are busy.
// (multi-nodes starvation may occurs)
} else {
// check if the task is compatible with the other previous one
if (referent.equals(new SchedulingTaskComparator(internalTask, currentJob))) {
tlogger.debug(internalTask.getId(), "scheduling");
neededResource += neededNodes;
maxResource -= neededNodes;
toFill.add(etd);
} else {
bagOfTasks.addFirst(etd);
break;
}
}
} while (maxResource > 0 && !bagOfTasks.isEmpty());
}
return neededResource;
}
use of org.ow2.proactive.scheduler.common.task.Task in project scheduling by ow2-proactive.
the class SchedulingService method kill.
public boolean kill() {
if (status.isKilled()) {
return false;
}
status = SchedulerStatus.KILLED;
pinger.interrupt();
schedulingThread.interrupt();
logger.info("Killing all running task processes...");
for (RunningTaskData taskData : jobs.getRunningTasks()) {
NodeSet nodes = taskData.getTask().getExecuterInformation().getNodes();
try {
taskData.getLauncher().kill();
} catch (Throwable t) {
logger.error("Failed to terminate launcher", t);
}
try {
infrastructure.getRMProxiesManager().getUserRMProxy(taskData.getUser(), taskData.getCredentials()).releaseNodes(nodes, taskData.getTask().getCleaningScript(), addThirdPartyCredentials(taskData.getCredentials()));
} catch (Throwable t) {
logger.error("Failed to release nodes", t);
}
}
listenJobLogsSupport.shutdown();
infrastructure.shutdown();
listener.schedulerStateUpdated(SchedulerEvent.KILLED);
return true;
}
Aggregations