use of org.ow2.proactive.scheduler.descriptor.JobDescriptorImpl in project scheduling by ow2-proactive.
the class SchedulingServiceTest9 method testRestartOnNodeFailureAndJobCancel.
@Test
public void testRestartOnNodeFailureAndJobCancel() throws Exception {
service.submitJob(createJob(createTestJob()));
listener.assertEvents(SchedulerEvent.JOB_SUBMITTED);
JobDescriptor jobDesc;
jobDesc = startTask(2);
listener.assertEvents(SchedulerEvent.JOB_PENDING_TO_RUNNING, SchedulerEvent.JOB_UPDATED, SchedulerEvent.TASK_PENDING_TO_RUNNING, SchedulerEvent.TASK_PENDING_TO_RUNNING);
InternalTask task = ((JobDescriptorImpl) jobDesc).getInternal().getTask("javaTask1");
service.restartTaskOnNodeFailure(task);
listener.assertEvents(SchedulerEvent.TASK_WAITING_FOR_RESTART);
infrastructure.assertRequests(1);
startTask(1);
service.restartTaskOnNodeFailure(task);
listener.assertEvents(SchedulerEvent.TASK_PENDING_TO_RUNNING, SchedulerEvent.TASK_RUNNING_TO_FINISHED, SchedulerEvent.TASK_RUNNING_TO_FINISHED, SchedulerEvent.JOB_RUNNING_TO_FINISHED, SchedulerEvent.JOB_UPDATED);
infrastructure.assertRequests(2);
}
use of org.ow2.proactive.scheduler.descriptor.JobDescriptorImpl in project scheduling by ow2-proactive.
the class SchedulingMethodImpl method getRMNodes.
/**
* Ask to the RM the given number of node resources.<br>
* If there is a problem with these task selection (such as bad selectionScript) this method
* will terminate the corresponding tasks and jobs. As the selection scripts contain errors, the task
* and its surrounding jobs must be stopped.
*
* @param neededResourcesNumber the number of resources to ask for (must be > 0).
* @param tasksToSchedule the task to be scheduled
* @return A nodeSet that contains at most 'neededResourcesNumber' available compatible resources.
* An empty nodeSet if no nodes could be found
* null if the their was an exception when asking for the nodes (ie : selection script has failed)
*/
protected NodeSet getRMNodes(Map<JobId, JobDescriptor> jobMap, int neededResourcesNumber, LinkedList<EligibleTaskDescriptor> tasksToSchedule, Set<String> freeResources) {
NodeSet nodeSet;
if (neededResourcesNumber <= 0) {
throw new IllegalArgumentException("'neededResourcesNumber' must be greater than 0");
}
EligibleTaskDescriptor etd = tasksToSchedule.getFirst();
InternalJob currentJob = ((JobDescriptorImpl) jobMap.get(etd.getJobId())).getInternal();
InternalTask internalTask0 = currentJob.getIHMTasks().get(etd.getTaskId());
try {
TopologyDescriptor descriptor = null;
boolean bestEffort = true;
if (internalTask0.isParallel()) {
descriptor = internalTask0.getParallelEnvironment().getTopologyDescriptor();
bestEffort = false;
if (descriptor == null) {
logger.debug("Topology is not defined for the task " + internalTask0.getName());
}
}
if (descriptor == null) {
// descriptor is not defined, use default
descriptor = TopologyDescriptor.ARBITRARY;
}
try {
Criteria criteria = new Criteria(neededResourcesNumber);
criteria.setTopology(descriptor);
// resolve script variables (if any) in the list of selection
// scripts and then set it as the selection criteria.
criteria.setScripts(resolveScriptVariables(internalTask0.getSelectionScripts(), internalTask0.getRuntimeVariables()));
criteria.setBlackList(internalTask0.getNodeExclusion());
criteria.setBestEffort(bestEffort);
criteria.setAcceptableNodesUrls(freeResources);
criteria.setBindings(createBindingsForSelectionScripts(currentJob, internalTask0));
if (internalTask0.getRuntimeGenericInformation().containsKey(SchedulerConstants.NODE_ACCESS_TOKEN)) {
criteria.setNodeAccessToken(internalTask0.getRuntimeGenericInformation().get(SchedulerConstants.NODE_ACCESS_TOKEN));
}
Collection<String> computationDescriptors = new ArrayList<>(tasksToSchedule.size());
for (EligibleTaskDescriptor task : tasksToSchedule) {
computationDescriptors.add(TaskLogger.getTaskLogRelativePath(task.getTaskId()));
}
criteria.setComputationDescriptors(computationDescriptors);
nodeSet = getRMProxiesManager().getUserRMProxy(currentJob.getOwner(), currentJob.getCredentials()).getNodes(criteria);
} catch (TopologyDisabledException tde) {
jlogger.warn(currentJob.getId(), "will be canceled as the topology is disabled");
schedulingService.simulateJobStartAndCancelIt(tasksToSchedule, "Topology is disabled");
return null;
}
// the following line is used to unwrap the future, warning when moving or removing
// it may also throw a ScriptException which is a RuntimeException
PAFuture.waitFor(nodeSet, true);
logger.debug("provided nodes " + nodeSet.size());
return nodeSet;
} catch (IOException | ClassNotFoundException e) {
logger.warn("Failed to deserialize previous task variables before selection for task " + internalTask0.getId().toString(), e);
schedulingService.simulateJobStartAndCancelIt(tasksToSchedule, "Failed to deserialize previous task variables before selection for task " + internalTask0.getId().toString());
return null;
} catch (RMProxyCreationException e) {
logger.warn("Failed to create User RM Proxy", e);
// simulate jobs starts and cancel it
schedulingService.simulateJobStartAndCancelIt(tasksToSchedule, "Failed to create User RM Proxy : Authentication Failed to Resource Manager for user '" + currentJob.getOwner() + "'");
// leave the method by ss failure
return null;
}
}
use of org.ow2.proactive.scheduler.descriptor.JobDescriptorImpl in project scheduling by ow2-proactive.
the class BaseServiceTest method taskStarted.
void taskStarted(JobDescriptor jobDesc, EligibleTaskDescriptor taskDesc) throws Exception {
InternalTask task = ((EligibleTaskDescriptorImpl) taskDesc).getInternal();
TaskLauncher launcher = Mockito.mock(TaskLauncher.class);
task.setExecuterInformation(new ExecuterInformation(launcher, NodeFactory.getDefaultNode()));
service.taskStarted(((JobDescriptorImpl) jobDesc).getInternal(), task, launcher);
}
use of org.ow2.proactive.scheduler.descriptor.JobDescriptorImpl in project scheduling by ow2-proactive.
the class SchedulingServiceTest1 method testSimpleJob.
@Test
public void testSimpleJob() throws Exception {
service.submitJob(createJob(createTestJob()));
listener.assertEvents(SchedulerEvent.JOB_SUBMITTED);
Map<JobId, JobDescriptor> jobsMap;
JobDescriptor jobDesc;
jobsMap = service.lockJobsToSchedule();
assertEquals(1, jobsMap.size());
jobDesc = jobsMap.values().iterator().next();
Assert.assertEquals(1, jobDesc.getEligibleTasks().size());
taskStarted(jobDesc, (EligibleTaskDescriptor) jobDesc.getEligibleTasks().iterator().next());
service.unlockJobsToSchedule(jobsMap.values());
jobsMap = service.lockJobsToSchedule();
assertEquals(1, jobsMap.size());
jobDesc = jobsMap.values().iterator().next();
Assert.assertEquals(0, jobDesc.getEligibleTasks().size());
service.unlockJobsToSchedule(jobsMap.values());
TaskId taskId = ((JobDescriptorImpl) jobDesc).getInternal().getTask("task1").getId();
service.taskTerminatedWithResult(taskId, new TaskResultImpl(taskId, "Result", null, 0));
jobsMap = service.lockJobsToSchedule();
// when a job finishes, it isn't removed from the hibernate context unless
// the housekeeping mechanism is enabled
assertEquals(1, jobsMap.size());
listener.assertEvents(SchedulerEvent.JOB_PENDING_TO_RUNNING, SchedulerEvent.JOB_UPDATED, SchedulerEvent.TASK_PENDING_TO_RUNNING, SchedulerEvent.TASK_RUNNING_TO_FINISHED, SchedulerEvent.JOB_RUNNING_TO_FINISHED, SchedulerEvent.JOB_UPDATED);
infrastructure.assertRequests(1);
}
use of org.ow2.proactive.scheduler.descriptor.JobDescriptorImpl in project scheduling by ow2-proactive.
the class SchedulingServiceTest4 method testTaskRestart1.
@Test
public void testTaskRestart1() throws Exception {
service.submitJob(createJob(createTestJob()));
listener.assertEvents(SchedulerEvent.JOB_SUBMITTED);
JobDescriptor jobDesc = startTask();
try {
service.restartTask(jobDesc.getJobId(), "invalid task name", 100);
Assert.fail();
} catch (UnknownTaskException e) {
}
try {
service.restartTask(JobIdImpl.makeJobId("1234567"), "javaTask", 100);
Assert.fail();
} catch (UnknownJobException e) {
}
service.restartTask(jobDesc.getJobId(), "javaTask", 100);
listener.assertEvents(SchedulerEvent.JOB_PENDING_TO_RUNNING, SchedulerEvent.JOB_UPDATED, SchedulerEvent.TASK_PENDING_TO_RUNNING, SchedulerEvent.TASK_WAITING_FOR_RESTART);
infrastructure.assertRequests(1);
startTask();
TaskId taskId = ((JobDescriptorImpl) jobDesc).getInternal().getTask("javaTask").getId();
service.taskTerminatedWithResult(taskId, new TaskResultImpl(taskId, "OK", null, 0));
listener.assertEvents(SchedulerEvent.TASK_PENDING_TO_RUNNING, SchedulerEvent.TASK_RUNNING_TO_FINISHED, SchedulerEvent.JOB_RUNNING_TO_FINISHED, SchedulerEvent.JOB_UPDATED);
infrastructure.assertRequests(1);
}
Aggregations