use of org.ow2.proactive.scheduler.task.utils.task.termination.TaskKiller in project scheduling by ow2-proactive.
the class TaskLauncher method doTask.
public void doTask(ExecutableContainer executableContainer, TaskResult[] previousTasksResults, TaskTerminateNotification terminateNotification, String terminateNotificationNodeURL, boolean taskRecoverable) {
TaskResultImpl taskResult;
WallTimer wallTimer = null;
TaskContext context = null;
Stopwatch taskStopwatchForFailures = null;
TaskDataspaces dataspaces = null;
try {
taskStarted.set(true);
logger.info("Task started " + taskId.getJobId().getReadableName() + " : " + taskId.getReadableName());
this.taskKiller = this.replaceTaskKillerWithDoubleTimeoutValueIfRunAsMe(executableContainer.isRunAsUser());
wallTimer = new WallTimer(initializer.getWalltime(), taskKiller);
taskStopwatchForFailures = Stopwatch.createUnstarted();
taskLauncherRebinder = new TaskLauncherRebinder(taskId, terminateNotificationNodeURL, taskRecoverable);
addShutdownHook();
// lock the cache space cleaning mechanism
DataSpaceNodeConfigurationAgent.lockCacheSpaceCleaning();
dataspaces = factory.createTaskDataspaces(taskId, initializer.getNamingService(), executableContainer.isRunAsUser());
File taskLogFile = taskLogger.createFileAppender(dataspaces.getScratchFolder());
progressFileReader.start(dataspaces.getScratchFolder(), taskId);
context = new TaskContext(executableContainer, initializer, previousTasksResults, new NodeDataSpacesURIs(dataspaces.getScratchURI(), dataspaces.getCacheURI(), dataspaces.getInputURI(), dataspaces.getOutputURI(), dataspaces.getUserURI(), dataspaces.getGlobalURI()), progressFileReader.getProgressFile().toString(), getHostname(), decrypter);
File workingDir = getTaskWorkingDir(context, dataspaces);
logger.info("Task working dir: " + workingDir);
logger.info("Cache space: " + context.getNodeDataSpaceURIs().getCacheURI());
logger.info("Input space: " + context.getNodeDataSpaceURIs().getInputURI());
logger.info("Output space: " + context.getNodeDataSpaceURIs().getOutputURI());
logger.info("User space: " + context.getNodeDataSpaceURIs().getUserURI());
logger.info("Global space: " + context.getNodeDataSpaceURIs().getGlobalURI());
logger.info("Scheduler rest url: " + context.getSchedulerRestUrl());
wallTimer.start();
// should handle interrupt
dataspaces.copyInputDataToScratch(initializer.getFilteredInputFiles(fileSelectorsFilters(context)));
if (decrypter != null) {
decrypter.setCredentials(executableContainer.getCredentials());
}
TaskExecutor taskExecutor = factory.createTaskExecutor(workingDir);
taskStopwatchForFailures.start();
taskResult = taskExecutor.execute(context, taskLogger.getOutputSink(), taskLogger.getErrorSink());
taskStopwatchForFailures.stop();
// by the time the task finishes, the scheduler might have had a
// transient failure, so we need to make sure that the placeholder
// for the task's result still exists, or get the new place for
// the result if it does not exist anymore.
TaskTerminateNotification rebindedTerminateNotification = taskLauncherRebinder.makeSureSchedulerIsConnected(terminateNotification);
switch(taskKiller.getStatus()) {
case WALLTIME_REACHED:
taskResult = getWalltimedTaskResult(context, taskStopwatchForFailures);
sendResultToScheduler(rebindedTerminateNotification, taskResult);
return;
case KILLED_MANUALLY:
// killed by Scheduler, no need to send results back
return;
}
dataspaces.copyScratchDataToOutput(initializer.getFilteredOutputFiles(fileSelectorsFilters(context, taskResult)));
wallTimer.stop();
copyTaskLogsToUserSpace(taskLogFile, dataspaces);
taskResult.setLogs(taskLogger.getLogs());
sendResultToScheduler(rebindedTerminateNotification, taskResult);
} catch (Throwable taskFailure) {
if (wallTimer != null) {
wallTimer.stop();
}
switch(taskKiller.getStatus()) {
case WALLTIME_REACHED:
taskResult = getWalltimedTaskResult(context, taskStopwatchForFailures);
sendResultToScheduler(terminateNotification, taskResult);
break;
case KILLED_MANUALLY:
// killed by Scheduler, no need to send results back
return;
default:
logger.info("Failed to execute task", taskFailure);
long elapsedTime = 0;
if (taskStopwatchForFailures != null) {
elapsedTime = taskStopwatchForFailures.elapsed(TimeUnit.MILLISECONDS);
}
taskFailure.printStackTrace(taskLogger.getErrorSink());
Map<String, byte[]> serializedVariables = extractVariablesFromContext(context);
taskResult = new TaskResultImpl(taskId, taskFailure, taskLogger.getLogs(), elapsedTime);
taskResult.setPropagatedVariables(serializedVariables);
sendResultToScheduler(terminateNotification, taskResult);
}
} finally {
try {
progressFileReader.stop();
taskLogger.close();
if (dataspaces != null) {
dataspaces.close();
}
// unlocks the cache space cleaning thread
DataSpaceNodeConfigurationAgent.unlockCacheSpaceCleaning();
removeShutdownHook();
} finally {
terminate();
}
}
}
use of org.ow2.proactive.scheduler.task.utils.task.termination.TaskKiller in project scheduling by ow2-proactive.
the class TaskLauncher method initActivity.
@Override
public void initActivity(Body body) {
this.taskId = initializer.getTaskId();
this.taskLogger = new TaskLogger(taskId, getHostname());
this.progressFileReader = new ProgressFileReader();
this.taskKiller = new TaskKiller(Thread.currentThread(), new CleanupTimeoutGetter());
nodeShutdownHook = new Thread(new Runnable() {
@Override
public void run() {
kill();
}
});
}
use of org.ow2.proactive.scheduler.task.utils.task.termination.TaskKiller in project scheduling by ow2-proactive.
the class TaskKillerTest method testThatTaskKillerInterruptsThreadImmediatelyWhenSetToZero.
@Test
public void testThatTaskKillerInterruptsThreadImmediatelyWhenSetToZero() {
KilledThread testThreadToBeInterrupted = new KilledThread();
testThreadToBeInterrupted.start();
CleanupTimeoutGetter cleanupTimeoutGetterMock = mock(CleanupTimeoutGetter.class);
doReturn(0L).when(cleanupTimeoutGetterMock).getCleanupTimeSeconds();
TaskKiller taskKiller = new TaskKiller(testThreadToBeInterrupted, cleanupTimeoutGetterMock);
assertThat("Task Killer must not interrupt thread before kill() is called", testThreadToBeInterrupted.isInterruptedOnce, is(false));
assertThat("Task Killer must not interrupt thread before kill() is called", testThreadToBeInterrupted.isInterruptedMoreThanOnce, is(false));
startKilling(taskKiller);
// Wait 100 milliseconds for killing thread to start
waitOrFailTest(100);
assertThat("Task Killer must interrupt once if kill() is called and then wait for the timeout which is set to 10 seconds.", testThreadToBeInterrupted.isInterruptedOnce, is(true));
assertThat("Task Killer must only interrupt once (not twice) after kill() is called and then wait for the timeout which is set to 10 seconds.", testThreadToBeInterrupted.isInterruptedMoreThanOnce, is(true));
// Cleanup - remove system property
System.clearProperty(this.taskKillerCleanupTimePropertyName);
}
use of org.ow2.proactive.scheduler.task.utils.task.termination.TaskKiller in project scheduling by ow2-proactive.
the class TaskKillerTest method testThatTaskKillerInterruptsThreadThenWaitsUntilInterruptingAgainWithoutSystemPropertySet.
@Test
public void testThatTaskKillerInterruptsThreadThenWaitsUntilInterruptingAgainWithoutSystemPropertySet() {
KilledThread testThreadToBeInterrupted = new KilledThread();
testThreadToBeInterrupted.start();
CleanupTimeoutGetter cleanupTimeoutGetterMock = mock(CleanupTimeoutGetter.class);
doReturn(10L).when(cleanupTimeoutGetterMock).getCleanupTimeSeconds();
TaskKiller taskKiller = new TaskKiller(testThreadToBeInterrupted, cleanupTimeoutGetterMock);
assertThat("Task Killer must not interrupt thread before kill() is called", testThreadToBeInterrupted.isInterruptedOnce, is(false));
assertThat("Task Killer must not interrupt thread before kill() is called", testThreadToBeInterrupted.isInterruptedMoreThanOnce, is(false));
startKilling(taskKiller);
// Wait a second for killing thread to start
waitOrFailTest(1000);
assertThat("Task Killer must interrupt once if kill() is called and then wait for the timeout which is set to 10 seconds.", testThreadToBeInterrupted.isInterruptedOnce, is(true));
assertThat("Task Killer must only interrupt once (not twice) after kill() is called and then wait for the timeout which is set to 10 seconds.", testThreadToBeInterrupted.isInterruptedMoreThanOnce, is(false));
// Wait 10 seconds for killing timeout to be exceeded
waitOrFailTest(10000);
assertThat("Task Killer must have interrupted at least twice after timeout has passed", testThreadToBeInterrupted.isInterruptedMoreThanOnce, is(true));
}
use of org.ow2.proactive.scheduler.task.utils.task.termination.TaskKiller in project scheduling by ow2-proactive.
the class TaskKillerTest method testThatTaskKillerInterruptsThreadThenWaitsUntilInterruptingAgain.
@Test
public void testThatTaskKillerInterruptsThreadThenWaitsUntilInterruptingAgain() {
KilledThread testThreadToBeInterrupted = new KilledThread();
testThreadToBeInterrupted.start();
CleanupTimeoutGetter cleanupTimeoutGetterMock = mock(CleanupTimeoutGetter.class);
doReturn(5L).when(cleanupTimeoutGetterMock).getCleanupTimeSeconds();
TaskKiller taskKiller = new TaskKiller(testThreadToBeInterrupted, cleanupTimeoutGetterMock);
assertThat("Task Killer must not interrupt thread before kill() is called", testThreadToBeInterrupted.isInterruptedOnce, is(false));
assertThat("Task Killer must not interrupt thread before kill() is called", testThreadToBeInterrupted.isInterruptedMoreThanOnce, is(false));
startKilling(taskKiller);
// Wait a second for killing thread to start
waitOrFailTest(1000);
assertThat("Task Killer must interrupt once if kill() is called and then wait for the timeout which is set to 10 seconds.", testThreadToBeInterrupted.isInterruptedOnce, is(true));
assertThat("Task Killer must only interrupt once (not twice) after kill() is called and then wait for the timeout which is set to 10 seconds.", testThreadToBeInterrupted.isInterruptedMoreThanOnce, is(false));
// Wait 5 seconds for killing timeout to be exceeded
waitOrFailTest(5000);
assertThat("Task Killer must have interrupted at least twice after timeout has passed", testThreadToBeInterrupted.isInterruptedMoreThanOnce, is(true));
// Cleanup - remove system property
System.clearProperty(this.taskKillerCleanupTimePropertyName);
}
Aggregations