use of cz.metacentrum.perun.taskslib.model.SendTask in project perun by CESNET.
the class BlockingSendExecutorCompletionService method blockingSubmit.
@Override
public Future<SendTask> blockingSubmit(EngineWorker<SendTask> taskWorker) throws InterruptedException {
semaphore.acquire();
Future<SendTask> future = null;
try {
SendWorker sendWorker = (SendWorker) taskWorker;
sendWorker.getSendTask().setStartTime(new Date(System.currentTimeMillis()));
sendWorker.getSendTask().setStatus(SENDING);
future = completionService.submit(sendWorker);
executingSendTasks.put(future, sendWorker.getSendTask());
} catch (Exception ex) {
semaphore.release();
throw ex;
}
return future;
}
use of cz.metacentrum.perun.taskslib.model.SendTask in project perun by CESNET.
the class PropagationMaintainerImpl method endStuckTasks.
// ----- methods ------------------------------
public void endStuckTasks() {
// handle stuck GEN tasks
for (Map.Entry<Future<Task>, Task> generatingTask : generatingTasks.getRunningTasks().entrySet()) {
Task task = generatingTask.getValue();
Future<Task> future = generatingTask.getKey();
LocalDateTime startTime = task.getGenStartTime();
long howManyMinutesAgo = 0;
if (startTime != null) {
howManyMinutesAgo = ChronoUnit.MINUTES.between(startTime, LocalDateTime.now());
}
if (startTime == null) {
// by implementation can't happen, we set time before adding to the generatingTasksMap
log.error("[{}] Task in generatingTasks has no start time. Shouldn't happen by implementation.", task.getId());
} else if (howManyMinutesAgo >= rescheduleTime) {
if (!future.isCancelled()) {
// Cancel running GEN Task - we expect that it will be picked by GenCollector
// and removed from the Engine.
log.debug("[{}] Cancelling stuck generating Future<Task>.", task.getId());
future.cancel(true);
} else {
// We cancelled Task in previous run, but it wasn't picked by GenCollector
// GenCollector probably doesn't run -> abort task manually
log.debug("[{}] Cancelled stuck generating Future<Task> was not picked by GenCollector, forcefully removing from Engine.", task.getId());
// to release semaphore
generatingTasks.removeStuckTask(future);
abortTask(task, TaskStatus.GENERROR);
}
}
}
// handle stuck SEND tasks
for (Map.Entry<Future<SendTask>, SendTask> sendingSendTask : sendingSendTasks.getRunningTasks().entrySet()) {
SendTask sendTask = sendingSendTask.getValue();
Future<SendTask> future = sendingSendTask.getKey();
Task task = sendTask.getTask();
Date startTime = sendTask.getStartTime();
int howManyMinutesAgo = 0;
if (startTime != null) {
howManyMinutesAgo = (int) (System.currentTimeMillis() - startTime.getTime()) / 1000 / 60;
}
if (startTime == null) {
// by implementation can't happen, we set time before adding to the generatingTasksMap
log.error("[{}] SendTask in sendingSendTask has no start time for Destination {}. Shouldn't happen by implementation.", task.getId(), sendTask.getDestination());
} else if (howManyMinutesAgo >= rescheduleTime) {
sendTask.setStatus(SendTaskStatus.ERROR);
if (!future.isCancelled()) {
// Cancel running Send Task - we expect that it will be picked by SendCollector
// and removed from the Engine if all SendTasks are done
log.debug("[{}] Cancelling stuck sending Future<SendTask> for Destination: {}.", task.getId(), sendTask.getDestination());
future.cancel(true);
} else {
log.debug("[{}] Cancelled stuck sending Future<SendTask> for Destination: {} was not picked by SendCollector, forcefully removing from Engine.", task.getId(), sendTask.getDestination());
// We cancelled Task in previous run, but it wasn't picked by SendCollector
// SendCollector probably doesn't run
// to release semaphore
sendingSendTasks.removeStuckTask(future);
// make sure Task is switched to SENDERROR
task.setSendEndTime(LocalDateTime.now());
task.setStatus(TaskStatus.SENDERROR);
// report result
TaskResult taskResult = null;
try {
taskResult = schedulingPool.createTaskResult(task.getId(), sendTask.getDestination().getId(), sendTask.getStderr(), sendTask.getStdout(), sendTask.getReturnCode(), task.getService());
jmsQueueManager.reportTaskResult(taskResult);
} catch (JMSException | InterruptedException e) {
log.error("[{}] Error trying to reportTaskResult {} of {} to Dispatcher: {}", task.getId(), taskResult, task, e);
}
// lower counter for stuck SendTask if count <= 1 remove from Engine
try {
schedulingPool.decreaseSendTaskCount(task, 1);
} catch (TaskStoreException e) {
log.error("[{}] Task {} could not be removed from SchedulingPool: {}", task.getId(), task, e);
}
}
}
}
// check all known Tasks
Collection<Task> allTasks = schedulingPool.getAllTasks();
if (allTasks == null) {
return;
}
for (Task task : allTasks) {
switch(task.getStatus()) {
case WAITING:
/*
Such Tasks should never be in Engine, (only in Dispatcher) since when they are sent to Engine,
status is set to PLANNED in both components. If they are already present in SchedulingPool
(Engine), then adding of new (same) Task is skipped and previous processing is finished first.
=> just remove such nonsense from SchedulingPool and don't spam Dispatcher
*/
try {
// TODO - can such Task be in any structure like generating/sending/newTasks/generatedTasks ?
schedulingPool.removeTask(task.getId());
log.warn("[{}] Task in WAITING state shouldn't be in Engine at all, silently removing from SchedulingPool.", task.getId());
} catch (TaskStoreException ex) {
log.error("[{}] Failed during removal of WAITING Task from SchedulingPool. Such Task shouldn't be in Engine at all: {}", task.getId(), ex);
}
case PLANNED:
/*
Check tasks, that should be put to scheduling pool by EventProcessorImpl and taken by GenPlanner.
Tasks might be like that, because adding to BlockingDeque has limit on Integer#MAX_SIZE
(while EventProcessorImpl adds Task to the scheduling pool).
Also if GenPlanner implementation fails it might take Task from the BlockingDeque but doesn't change
its status or doesn't put it between generatingTasks.
*/
BlockingDeque<Task> newTasks = schedulingPool.getNewTasksQueue();
if (!newTasks.contains(task)) {
try {
log.debug("[{}] Re-adding PLANNED Task back to pool and newTasks queue. Probably GenPlanner failed.", task.getId());
schedulingPool.addTask(task);
} catch (TaskStoreException e) {
log.error("Could not save Task {} into Engine SchedulingPool because of {}, setting to ERROR", task, e);
abortTask(task, TaskStatus.ERROR);
}
}
break;
case GENERATING:
/*
This is basically the same check as for the GENERATING Tasks above,
but now for Tasks missing in "generatingTasks".
!! We can't abort GENERATING Tasks with startTime=NULL here,
because they are waiting to be started at genCompletionService#blockingSubmit() !!
*/
LocalDateTime startTime = task.getGenStartTime();
long howManyMinutesAgo = 0;
if (startTime != null) {
howManyMinutesAgo = ChronoUnit.MINUTES.between(startTime, LocalDateTime.now());
}
// somebody probably wrongly manipulated the structure
if (howManyMinutesAgo >= rescheduleTime && !generatingTasks.getRunningTasks().values().contains(task)) {
// probably GenCollector failed to pick task -> abort
abortTask(task, TaskStatus.GENERROR);
}
break;
case GENERROR:
case GENERATED:
/*
Check Tasks, which should be processed by GenCollector and taken by SendPlanner or reported as GENERROR to Dispatcher.
Task must have endTime set by GenWorker, otherwise it failed completely and should be reported as error.
If either of GenCollector and SendPlanner fails to process generated tasks, it's missing in generatedTasksQueue.
*/
LocalDateTime genEndTime = task.getGenEndTime();
howManyMinutesAgo = 0;
if (genEndTime != null) {
howManyMinutesAgo = ChronoUnit.MINUTES.between(genEndTime, LocalDateTime.now());
}
// If too much time has passed for Task and its not present in generatedTasksQueue, something is broken
if ((genEndTime == null || howManyMinutesAgo >= rescheduleTime) && !schedulingPool.getGeneratedTasksQueue().contains(task)) {
abortTask(task, TaskStatus.GENERROR);
}
break;
case SENDING:
// TODO since Task is switched to SENDING before blockingSubmit() of any SendWorker.
break;
case WARNING:
case SENDERROR:
LocalDateTime endTime = task.getSendEndTime();
howManyMinutesAgo = 0;
if (endTime != null) {
howManyMinutesAgo = ChronoUnit.MINUTES.between(endTime, LocalDateTime.now());
}
// If too much time has passed something is broken
if (endTime == null || howManyMinutesAgo >= rescheduleTime) {
abortTask(task, TaskStatus.SENDERROR);
}
break;
case ERROR:
break;
case DONE:
default:
// unknown state
log.debug("[{}] Failing to default, status was: {}", task.getId(), task.getStatus());
abortTask(task, TaskStatus.ERROR);
}
}
}
use of cz.metacentrum.perun.taskslib.model.SendTask in project perun by CESNET.
the class AbstractEngineTest method setup.
@Before
public void setup() throws Exception {
// create session
sess = perun.getPerunSession(new PerunPrincipal("perunTests", ExtSourcesManager.EXTSOURCE_NAME_INTERNAL, ExtSourcesManager.EXTSOURCE_INTERNAL), new PerunClient());
// create expected core objects
facility = perun.getFacilitiesManagerBl().createFacility(sess, new Facility(0, "EngineTestFacility"));
Service srv = new Service(0, "test_service", null);
srv.setEnabled(true);
srv.setDelay(1);
srv.setRecurrence(2);
// this command always return true
srv.setScript("/bin/true");
service = perun.getServicesManagerBl().createService(sess, srv);
Service srv2 = new Service(0, "test_service2", null);
srv2.setEnabled(true);
srv2.setDelay(1);
srv2.setRecurrence(2);
// this command always return false
srv2.setScript("/bin/false");
service2 = perun.getServicesManagerBl().createService(sess, srv2);
destination1 = perun.getServicesManagerBl().addDestination(sess, service, facility, new Destination(0, "par.dest1", "host", "PARALLEL"));
destination2 = perun.getServicesManagerBl().addDestination(sess, service, facility, new Destination(0, "par.dest2", "host", "PARALLEL"));
destination3 = perun.getServicesManagerBl().addDestination(sess, service, facility, new Destination(0, "one.dest1", "host", "ONE"));
destination4 = perun.getServicesManagerBl().addDestination(sess, service, facility, new Destination(0, "one.dest2", "host", "ONE"));
List<Destination> destinations = new ArrayList<Destination>() {
{
add(destination1);
add(destination2);
add(destination3);
add(destination4);
}
};
// create Tasks in shared perun-core DB (as if action was initiated by dispatcher).
task1 = new Task();
task1.setDestinations(destinations);
task1.setFacility(facility);
task1.setService(service);
task1.setSchedule(LocalDateTime.now());
task1.setStatus(Task.TaskStatus.PLANNED);
task1.setId(tasksManagerImpl.insertTask(task1));
task2 = new Task();
task2.setDestinations(destinations);
task2.setFacility(facility);
task2.setService(service2);
task2.setSchedule(LocalDateTime.now());
task2.setStatus(Task.TaskStatus.PLANNED);
task2.setId(tasksManagerImpl.insertTask(task2));
sendTask1 = new SendTask(task1, destination1);
sendTask1.setStartTime(new Date(System.currentTimeMillis()));
sendTask1.setStatus(SendTask.SendTaskStatus.SENDING);
sendTask1.setReturnCode(0);
sendTask2 = new SendTask(task1, destination2);
sendTask2.setStartTime(new Date(System.currentTimeMillis()));
sendTask2.setStatus(SendTask.SendTaskStatus.SENDING);
sendTask2.setReturnCode(0);
sendTask3 = new SendTask(task1, destination3);
sendTask3.setStartTime(new Date(System.currentTimeMillis()));
sendTask3.setStatus(SendTask.SendTaskStatus.SENDING);
sendTask3.setReturnCode(0);
sendTask4 = new SendTask(task1, destination4);
sendTask4.setStartTime(new Date(System.currentTimeMillis()));
sendTask4.setStatus(SendTask.SendTaskStatus.SENDING);
sendTask4.setReturnCode(0);
sendTaskFalse = new SendTask(task2, destination1);
sendTaskFalse.setStartTime(new Date(System.currentTimeMillis()));
sendTaskFalse.setStatus(SendTask.SendTaskStatus.SENDING);
sendTaskFalse.setReturnCode(1);
}
use of cz.metacentrum.perun.taskslib.model.SendTask in project perun by CESNET.
the class SendCollector method run.
@Override
public void run() {
while (!shouldStop()) {
SendTask sendTask = null;
Task task = null;
Service service = null;
Destination destination = null;
String stderr;
String stdout;
int returnCode;
// FIXME - doesn't provide nice output and clog the log
log.debug(schedulingPool.getReport());
try {
sendTask = sendCompletionService.blockingTake();
task = sendTask.getTask();
/*
Set Task "sendEndTime" immediately for each done SendTask, so it's not considered as stuck
by PropagationMaintainer#endStuckTasks().
Like this we can maximally propagate for "rescheduleTime" for each Destination and not
all Destinations (whole Task). Default rescheduleTime is 3 hours * no.of destinations.
*/
task.setSendEndTime(LocalDateTime.now());
// XXX: why is this necessary? Rewriting status with every completed destination?
if (!Objects.equals(task.getStatus(), Task.TaskStatus.SENDERROR) && !Objects.equals(task.getStatus(), Task.TaskStatus.WARNING) && !Objects.equals(sendTask.getStatus(), SendTaskStatus.WARNING)) {
// keep SENDING status only if task previously hasn't failed
task.setStatus(Task.TaskStatus.SENDING);
} else if (!Objects.equals(task.getStatus(), Task.TaskStatus.SENDERROR) && sendTask.getStatus() == SendTaskStatus.WARNING) {
task.setStatus(Task.TaskStatus.WARNING);
}
destination = sendTask.getDestination();
stderr = sendTask.getStderr();
stdout = sendTask.getStdout();
returnCode = sendTask.getReturnCode();
service = sendTask.getTask().getService();
} catch (InterruptedException e) {
String errorStr = "Thread collecting sent SendTasks was interrupted.";
log.error("{}: {}", errorStr, e);
throw new RuntimeException(errorStr, e);
} catch (TaskExecutionException e) {
task = e.getTask();
/*
Set Task "sendEndTime" immediately for each done SendTask, so it's not considered as stuck
by PropagationMaintainer#endStuckTasks().
Like this we can maximally propagate for "rescheduleTime" for each Destination and not
all Destinations (whole Task). Default rescheduleTime is 3 hours * no.of destinations.
*/
task.setSendEndTime(LocalDateTime.now());
// set SENDERROR status immediately as first SendTask (Destination) fails
task.setStatus(Task.TaskStatus.SENDERROR);
destination = e.getDestination();
stderr = e.getStderr();
stdout = e.getStdout();
returnCode = e.getReturnCode();
service = task.getService();
log.error("[{}] Error occurred while sending Task to destination {}", task.getId(), e.getDestination());
} catch (Throwable ex) {
log.error("Unexpected exception in SendCollector thread. Stuck Tasks will be cleaned by PropagationMaintainer#endStuckTasks() later.", ex);
continue;
}
// this is just interesting cross-check
if (schedulingPool.getTask(task.getId()) == null) {
log.warn("[{}] Task retrieved from SendTask is no longer in SchedulingPool. Probably cleaning thread removed it before completion. " + "This might create possibility of running GEN and SEND of same Task together!", task.getId());
}
try {
// report TaskResult to Dispatcher for this SendTask (Destination)
jmsQueueManager.reportTaskResult(schedulingPool.createTaskResult(task.getId(), destination.getId(), stderr, stdout, returnCode, service));
} catch (JMSException | InterruptedException e1) {
log.error("[{}] Error trying to reportTaskResult for Destination: {} to Dispatcher: {}", task.getId(), destination, e1);
}
try {
// Decrease SendTasks count for Task
// Consequently, if count is <=1, Task is reported to Dispatcher
// as DONE/SENDERROR and removed from SchedulingPool (Engine).
schedulingPool.decreaseSendTaskCount(task, 1);
} catch (TaskStoreException e) {
log.error("[{}] Task {} could not be removed from SchedulingPool: {}", task.getId(), task, e);
}
}
}
use of cz.metacentrum.perun.taskslib.model.SendTask in project perun by CESNET.
the class BlockingSendExecutorCompletionService method blockingTake.
@Override
public SendTask blockingTake() throws InterruptedException, TaskExecutionException {
Future<SendTask> taskFuture = completionService.take();
try {
// .get() throws CancellationException if Task processing was cancelled from outside
SendTask sendTask = taskFuture.get();
removeTaskFuture(taskFuture);
return sendTask;
} catch (ExecutionException e) {
SendTask sendTask = executingSendTasks.get(taskFuture);
removeTaskFuture(taskFuture);
Throwable cause = e.getCause();
if (cause instanceof TaskExecutionException) {
// SEND Task failed and related Task and results are part of this exception
throw (TaskExecutionException) cause;
} else {
// Unexpected exception during processing, pass stored SendTask if possible
if (sendTask == null) {
log.error("We couldn't get SendTask for failed Future<SendTask>: {}", e);
throw new RuntimeException("We couldn't get SendTask for failed Future<Task>", e);
}
throw new TaskExecutionException(sendTask.getTask(), sendTask.getDestination(), "Unexpected exception during SEND Task processing.", e);
}
} catch (CancellationException ex) {
// processing was cancelled
SendTask removedSendTask = executingSendTasks.get(taskFuture);
removeTaskFuture(taskFuture);
if (removedSendTask == null) {
log.error("Somebody manually removed Future<SendTask> from executingSendTasks or SendTask was null: {}", ex);
// we can't do anything about it
throw ex;
}
// make sure SendCollector always get related Task
throw new TaskExecutionException(removedSendTask.getTask(), removedSendTask.getDestination(), "Processing of Task was cancelled before completion.");
}
}
Aggregations