use of org.ow2.proactive.scheduler.task.internal.TaskRecoveryData in project scheduling by ow2-proactive.
the class SchedulingMethodImpl method createExecution.
/**
* Create launcher and try to start the task.
*
* @param nodeSet the node set containing every available nodes that can be used for execution
* @param node the node on which to start the task
* @param job the job that owns the task to be started
* @param task the task to be started
* @param taskDescriptor the descriptor of the task to be started
*/
protected boolean createExecution(NodeSet nodeSet, Node node, InternalJob job, InternalTask task, TaskDescriptor taskDescriptor) throws Exception {
TaskLauncher launcher = null;
LiveJobs.JobData jobData = null;
try {
jobData = schedulingService.lockJob(job.getId());
// task is not paused
if (nodeSet.size() >= task.getNumberOfNodesNeeded() && (task.getStatus() != TaskStatus.PAUSED) && (jobData != null)) {
// start dataspace app for this job
DataSpaceServiceStarter dsStarter = schedulingService.getInfrastructure().getDataSpaceServiceStarter();
job.startDataSpaceApplication(dsStarter.getNamingService(), ImmutableList.of(task));
NodeSet nodes = new NodeSet();
try {
// create launcher
launcher = task.createLauncher(node);
activeObjectCreationRetryTimeNumber = ACTIVEOBJECT_CREATION_RETRY_TIME_NUMBER;
nodeSet.remove(0);
// we will need to update this code once topology will be allowed for single-node task
if (task.isParallel()) {
nodes = new NodeSet(nodeSet);
task.getExecuterInformation().addNodes(nodes);
nodeSet.clear();
}
// set nodes in the executable container
task.getExecutableContainer().setNodes(nodes);
tlogger.debug(task.getId(), "deploying");
// above 500 parent tasks, it is worth adjusting.
if (taskDescriptor.getParents().size() > 500) {
dotaskActionTimeout = (int) (taskDescriptor.getParents().size() / 500.0 * PASchedulerProperties.SCHEDULER_STARTTASK_TIMEOUT.getValueAsInt());
} else {
// reset the dotaskActionTimeout to its default value otherwise.
dotaskActionTimeout = PASchedulerProperties.SCHEDULER_STARTTASK_TIMEOUT.getValueAsInt();
}
boolean taskRecoverable = getRMProxiesManager().getRmProxy().areNodesRecoverable(nodes);
String terminateNotificationNodeURL = PAActiveObject.getActiveObjectNode(terminateNotification).getNodeInformation().getURL();
TaskRecoveryData taskRecoveryData = new TaskRecoveryData(terminateNotificationNodeURL, taskRecoverable);
threadPool.submitWithTimeout(new TimedDoTaskAction(job, taskDescriptor, launcher, schedulingService, terminateNotification, corePrivateKey, taskRecoveryData), dotaskActionTimeout, TimeUnit.MILLISECONDS);
// we advertise here that the task is started, however
// this is not entirely true: the only thing we are sure
// about at this point is that we submitted to the thread
// pool the action that will call the "doTask" of the task
// launcher. There is thus a small gap here where the task
// is seen as started whereas it is not yet started. We
// cannot easily move the task started notification because
// 1) it makes the job lock acquisition less predictable
// (because the TimeDoTaskAction will have to compete with
// the SchedulingMethodImpl)
// and more importantly 2) the
// SchedulingMethodImpl#createExecution may happen to be
// called a second time for the task that is currently being
// started by the TimedDoTaskAction.
finalizeStarting(job, task, node, launcher);
return true;
} catch (Exception t) {
try {
// if there was a problem, free nodeSet for multi-nodes task
nodes.add(node);
releaseNodes(job, nodes);
} catch (Throwable ni) {
// miam miam
}
throw t;
}
} else {
return false;
}
} finally {
if (jobData != null) {
jobData.unlock();
}
}
}
Aggregations