use of cz.metacentrum.perun.taskslib.model.Task in project perun by CESNET.
the class PropagationMaintainerImpl method endStuckTasks.
private void endStuckTasks() {
// list all tasks in processing and planned and check if any have beeen
// running for too long.
List<Task> suspiciousTasks = schedulingPool.getProcessingTasks();
log.debug("There are {} PROCESSING tasks", suspiciousTasks.size());
suspiciousTasks.addAll(schedulingPool.getPlannedTasks());
log.debug("There are {} tasks that are PLANNED or PROCESSING", suspiciousTasks.size());
for (Task task : suspiciousTasks) {
log.debug("checking task " + task.toString() + " for staying around too long...");
// count how many minutes the task stays in one state - if the state
// is PLANNED count it from when it was scheduled ; if it is
// PROCESSING count it from when it started
Date checkDate = task.getStatus().equals(TaskStatus.PLANNED) ? task.getSchedule() : task.getStartTime();
if (checkDate == null) {
log.error("ERROR: task in state {} has no corresponding timestamp", task.getStatus());
checkDate = new Date(System.currentTimeMillis());
if (task.getStatus().equals(TaskStatus.PLANNED)) {
task.setSchedule(checkDate);
} else {
task.setStartTime(checkDate);
}
}
Date ended = task.getEndTime();
TaskStatus status = task.getStatus();
if (ended != null || status.equals(TaskStatus.DONE) || status.equals(TaskStatus.ERROR)) {
log.error("ERROR: Task presumably in PLANNED or PROCESSING state, but appears to have ended.");
cz.metacentrum.perun.engine.scheduling.TaskStatus taskStatus = taskStatusManager.getTaskStatus(task);
if (taskStatus.isTaskFinished()) {
schedulingPool.setTaskStatus(task, taskStatus.getTaskStatus());
log.debug("TASK " + task.getId() + " status set to DONE");
} else {
// there is something deeply wrong...
log.error("ERROR: Task is weird. Switching it to ERROR. {}", task);
task.setEndTime(new Date(System.currentTimeMillis()));
schedulingPool.setTaskStatus(task, TaskStatus.ERROR);
}
}
int howManyMinutesAgo = (int) (System.currentTimeMillis() - checkDate.getTime()) / 1000 / 60;
// If too much time has passed something is broken
if (howManyMinutesAgo >= rescheduleTime) {
log.error("ERROR: Task is stuck in PLANNED or PROCESSING state. Switching it to ERROR. {}", task);
task.setEndTime(new Date(System.currentTimeMillis()));
schedulingPool.setTaskStatus(task, TaskStatus.ERROR);
}
}
/*
*
* List<Task> suspiciousTasks =
* taskManager.listAllTasksInState(TaskStatus.PROCESSING,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id")));
* suspiciousTasks
* .addAll(taskManager.listAllTasksInState(TaskStatus.PLANNED,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))));
* for (Task task : suspiciousTasks) { //count how many minutes the task
* stays in one state - if the state is PLANNED count it from when it
* was scheduled ; if it is PROCESSING count it from when it started int
* howManyMinutesAgo = (int) (System.currentTimeMillis() - (
* task.getStatus().equals(TaskStatus.PLANNED) ? task.getSchedule() :
* task.getStartTime() ).getTime()) / 1000 / 60;
*
* //If too much time has passed something is broken if
* (howManyMinutesAgo >= 180) { log.error(
* "ERROR: Task is stucked in PLANNED or PROCESSING state. Switching it to ERROR. {}"
* , task); task.setEndTime(new Date(System.currentTimeMillis()));
* task.setStatus(TaskStatus.ERROR); taskManager.updateTask(task,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))); }
* }
*/
}
use of cz.metacentrum.perun.taskslib.model.Task in project perun by CESNET.
the class PropagationMaintainerImpl method rescheduleOldDoneTasks.
private void rescheduleOldDoneTasks() {
// Reschedule SEND tasks in DONE that haven't been running for quite a
// while
List<Task> donetasks = schedulingPool.getDoneTasks();
log.debug("There are {} completed tasks", donetasks.size());
for (Task task : donetasks) {
// skip GEN tasks
if (task.getExecService().getExecServiceType().equals(ExecService.ExecServiceType.GENERATE))
continue;
Date twoDaysAgo = new Date(System.currentTimeMillis() - 1000 * 60 * 24 * 2);
if (task.getEndTime().before(twoDaysAgo)) {
// reschedule the task
try {
taskScheduler.propagateService(task, new Date(System.currentTimeMillis()));
log.info("TASK [" + task + "] wasn't propagated for more then 2 days. Going to schedule it for propagation now.");
} catch (InternalErrorException e) {
log.error("Rescheduling of task which wasn't propagated for more than 2 days failed. {}, Exception: {}", task, e);
}
}
}
/*
*
* for(Task task : taskManager.listAllTasksInState(TaskStatus.DONE,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id")))) {
* //skip GEN tasks
* if(task.getExecService().getExecServiceType().equals(
* ExecService.ExecServiceType.GENERATE)) continue;
*
* Date twoDaysAgo = new Date(System.currentTimeMillis() - 1000 * 60 *
* 24 * 2); if(task.getEndTime().before(twoDaysAgo)) { //reschedule the
* task try { taskScheduler.propagateService(task.getExecService(), new
* Date(System.currentTimeMillis()), task.getFacility());
* log.info("TASK [" + task +
* "] wasn't propagated for more then 2 days. Going to schedule it for propagation now."
* ); } catch (InternalErrorException e) { log.error(
* "Rescheduling of task which wasn't propagated for more than 2 days failed. {}, Exception: {}"
* , task, e); } }
*
* }
*/
}
use of cz.metacentrum.perun.taskslib.model.Task in project perun by CESNET.
the class PropagationMaintainerImpl method checkFinishedTasks.
/*
* private void checkProcessingTasks() {
* log.info("Gonna list tasks in PROCESSING...");
*
* for(Task task: schedulingPool.getProcessingTasks()) {
* if(task.getExecService
* ().getExecServiceType().equals(ExecService.ExecServiceType.GENERATE))
* continue; log.info("Gonna check results for Task ID:" + task.getId());
*
* }
*
* for (Task task : taskManager.listAllTasksInState(TaskStatus.PROCESSING,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id")))) {
* //skip GEN tasks
* if(task.getExecService().getExecServiceType().equals(ExecService
* .ExecServiceType.GENERATE)) continue;
* log.info("Gonna check results for Task ID:" + task.getId());
*
* List<TaskResult> taskResults =
* taskResultDao.getTaskResultsByTask(task.getId());
*
* List<Destination> destinations = null; try { destinations =
* Rpc.ServicesManager.getDestinations(engineManager.getRpcCaller(),
* task.getExecService().getService(), task.getFacility()); }
* catch(InternalErrorException ex) {
* log.error("Can't get destinations. Switching task to ERROR. Cause: {}",
* ex); task.setStatus(TaskStatus.ERROR); task.setEndTime(new
* Date(System.currentTimeMillis())); taskManager.updateTask(task,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))); }
* catch(PrivilegeException ex) {
* log.error("Can't get destinations. Switching task to ERROR. Cause: {}",
* ex); task.setStatus(TaskStatus.ERROR); task.setEndTime(new
* Date(System.currentTimeMillis())); taskManager.updateTask(task,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))); }
* catch(ServiceNotExistsException ex) {
* log.error("Service for the task no longer exists. Removing task", ex);
* taskManager.removeTask(task.getId(),
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))); }
* catch(FacilityNotExistsException ex) {
* log.error("Facility for the task no longer exists. Removing task", ex);
* taskManager.removeTask(task.getId(),
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))); }
*
* switch(task.getType()) {
*
* case SERIAL: collectSerialTaskResults(task, taskResults, destinations);
* break;
*
* case PARALLEL: collectParallelTaskResults(task, taskResults,
* destinations); break;
*
* default: log.error("Unknown task type. Assuming parallel.");
* collectParallelTaskResults(task, taskResults, destinations); break; } } }
*
*
* private void collectSerialTaskResults(Task task, List<TaskResult>
* taskResults, List<Destination> destinations) { if (taskResults.size() <=
* destinations.size()) { // Let's check whether they are all DONE or not...
* int amountDone = 0; int amountDenied = 0; int amountError = 0; int
* amountFatalError = 0; for (TaskResult taskResult : taskResults) { switch
* (taskResult.getStatus()) { case DONE: amountDone++; break; case DENIED:
* amountDenied++; break; case ERROR: amountError++; break; case
* FATAL_ERROR: amountFatalError++; break; default: throw new
* IllegalArgumentException("WTF?! " + taskResult.getStatus().toString()); }
* }
*
* if (amountDone > 0) { // Super, at least one task is DONE.
* log.info("Task ID " + task.getId() +
* " has one Tasks_result DONE, so we set it as DONE.");
* task.setStatus(TaskStatus.DONE); task.setEndTime(new
* Date(System.currentTimeMillis())); taskManager.updateTask(task,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id")));
*
* //Set its GENERATE dependencies as dirty //TODO: Hmm...what to do in case
* of exceptions?
*
* try { log.info("I am going to set all ExecService " +
* task.getExecServiceId() + " dependencies (the GENERATE ones) to NONE.");
* setAllGenerateDependenciesToNone
* (dependenciesResolver.listDependencies(task.getExecServiceId()),
* task.getFacilityId()); } catch (ServiceNotExistsException e) {
* log.error(e.toString(), e); } catch (InternalErrorException e) {
* log.error(e.toString(), e); } catch (PrivilegeException e) {
* log.error(e.toString(), e); } } else { //TODO Now FATAL_ERROR and ERROR
* are being treated exactly the same. Is FATAL_ERROR really necessary? //
* Not DONE yet, are there any destinations left? if (taskResults.size() ==
* destinations.size()) { // Well, we ended in ERROR... log.info(
* "There has been no DONE state Tasks_results, so I am going to set the Task ID"
* + task.getId() + " to ERROR."); task.setStatus(TaskStatus.ERROR);
* task.setEndTime(new Date(System.currentTimeMillis()));
* taskManager.updateTask(task,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))); //Set
* its GENERATE dependencies as dirty //TODO: Hmm...what to do in case of
* exceptions?
*
* try {
* setAllGenerateDependenciesToNone(dependenciesResolver.listDependencies
* (task.getExecServiceId()), task.getFacilityId()); } catch
* (ServiceNotExistsException e) { log.error(e.toString(), e); } catch
* (InternalErrorException e) { log.error(e.toString(), e); } catch
* (PrivilegeException e) { log.error(e.toString(), e); } } else { // There
* are some destinations left to try, schedule it back
* task.setStatus(TaskStatus.PLANNED); task.setSchedule(new
* Date(System.currentTimeMillis())); taskManager.updateTask(task,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))); } } }
* else if (taskResults.size() > destinations.size()) { log.error(
* "There are more Task_results then destinations. so I am going to set the Task ID"
* + task.getId() + " to ERROR."); task.setStatus(TaskStatus.ERROR);
* task.setEndTime(new Date(System.currentTimeMillis()));
* taskManager.updateTask(task,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))); //Set
* its GENERATE dependencies as dirty //TODO: Hmm...what to do in case of
* exceptions? try {
* setAllGenerateDependenciesToNone(dependenciesResolver.listDependencies
* (task.getExecServiceId()), task.getFacilityId()); } catch
* (ServiceNotExistsException e) { log.error(e.toString(), e); } catch
* (InternalErrorException e) { log.error(e.toString(), e); } catch
* (PrivilegeException e) { log.error(e.toString(), e); } }
*
* if(false) { final long THREE_HOUR = 1000 * 60 * 60 * 3; long
* timeDifference = System.currentTimeMillis() -
* task.getStartTime().getTime(); if(timeDifference > THREE_HOUR) { // //
* WARNING!! // // This can be dangerous. We are not sure if there isn't any
* slave script running for this task. // log.error("There are only " +
* taskResults.size() + " Task_results for Task ID" + task.getId() +
* ", but task is in processing too long, so switch task to ERROR");
* task.setStatus(TaskStatus.ERROR); task.setEndTime(new
* Date(System.currentTimeMillis())); taskManager.updateTask(task,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))); //Set
* its GENERATE dependencies as dirty //TODO: Hmm...what to do in case of
* exceptions? try {
* setAllGenerateDependenciesToNone(dependenciesResolver.listDependencies
* (task.getExecServiceId()), task.getFacilityId()); } catch
* (ServiceNotExistsException e) { log.error(e.toString(), e); } catch
* (InternalErrorException e) { log.error(e.toString(), e); } catch
* (PrivilegeException e) { log.error(e.toString(), e); } }
*
* log.info("There are only " + taskResults.size() +
* " Task_results for Task ID" + task.getId() +
* ", so we ain't gonna do anything."); // Well, we ain't gonna do anything
* bro... // TODO: Time out... } }
*
* private void collectParallelTaskResults(Task task, List<TaskResult>
* taskResults, List<Destination> destinations) { // Do we have the same
* number of Destinations as we have TaskResults? if (taskResults.size() ==
* destinations.size()) { // Let's check whether they are all DONE or not...
* int amountDone = 0; int amountDenied = 0; int amountError = 0; int
* amountFatalError = 0; for (TaskResult taskResult : taskResults) { switch
* (taskResult.getStatus()) { case DONE: amountDone++; break; case DENIED:
* amountDenied++; break; case ERROR: amountError++; break; case
* FATAL_ERROR: amountFatalError++; break; default: throw new
* IllegalArgumentException("WTF?! " + taskResult.getStatus().toString()); }
* }
*
* if (amountDone + amountDenied == taskResults.size()) { // Super, all is
* DONE or we don't care (DENIED) :-) log.info("Task ID " + task.getId() +
* " has all Tasks_results either DONE or DENIED, so we set it as DONE.");
* task.setStatus(TaskStatus.DONE); task.setEndTime(new
* Date(System.currentTimeMillis())); taskManager.updateTask(task,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id")));
*
* //Set its GENERATE dependencies as dirty //TODO: Hmm...what to do in case
* of exceptions? try { log.info("I am going to set all ExecService " +
* task.getExecServiceId() + " dependencies (the GENERATE ones) to NONE.");
*
* setAllGenerateDependenciesToNone(dependenciesResolver.listDependencies(task
* .getExecServiceId()), task.getFacilityId()); } catch
* (ServiceNotExistsException e) { log.error(e.toString(), e); } catch
* (InternalErrorException e) { log.error(e.toString(), e); } catch
* (PrivilegeException e) { log.error(e.toString(), e); } } else { final
* long TWO_HOUR = 1000 * 60 * 60 * 2; long timeDifference =
* System.currentTimeMillis() - task.getStartTime().getTime();
* if(timeDifference > TWO_HOUR) { // // WARNING!! // // This can be
* dangerous. We are not sure if there isn't any slave script running for
* this task. // log.error("There are only " + taskResults.size() +
* " Task_results for Task ID" + task.getId() +
* ", but task is in processing too long, so switch task to ERROR");
* task.setStatus(TaskStatus.ERROR); task.setEndTime(new
* Date(System.currentTimeMillis())); taskManager.updateTask(task,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))); //Set
* its GENERATE dependencies as dirty //TODO: Hmm...what to do in case of
* exceptions? try {
* setAllGenerateDependenciesToNone(dependenciesResolver.listDependencies
* (task.getExecServiceId()), task.getFacilityId()); } catch
* (ServiceNotExistsException e) { log.error(e.toString(), e); } catch
* (InternalErrorException e) { log.error(e.toString(), e); } catch
* (PrivilegeException e) { log.error(e.toString(), e); } } } } else if
* (taskResults.size() > destinations.size()) { log.error(
* "There are more Task_results then destinations. so I am going to set the Task ID"
* + task.getId() + " to ERROR."); task.setStatus(TaskStatus.ERROR);
* task.setEndTime(new Date(System.currentTimeMillis()));
* taskManager.updateTask(task,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))); //Set
* its GENERATE dependencies as dirty //TODO: Hmm...what to do in case of
* exceptions? try {
* setAllGenerateDependenciesToNone(dependenciesResolver.listDependencies
* (task.getExecServiceId()), task.getFacilityId()); } catch
* (ServiceNotExistsException e) { log.error(e.toString(), e); } catch
* (InternalErrorException e) { log.error(e.toString(), e); } catch
* (PrivilegeException e) { log.error(e.toString(), e); } } else { final
* long THREE_HOUR = 1000 * 60 * 60 * 3; long timeDifference =
* System.currentTimeMillis() - task.getStartTime().getTime();
* if(timeDifference > THREE_HOUR) { // // WARNING!! // // This can be
* dangerous. We are not sure if there isn't any slave script running for
* this task. // log.error("There are only " + taskResults.size() +
* " Task_results for Task ID" + task.getId() +
* ", but task is in processing too long, so switch task to ERROR");
* task.setStatus(TaskStatus.ERROR); task.setEndTime(new
* Date(System.currentTimeMillis())); taskManager.updateTask(task,
* Integer.parseInt(propertiesBean.getProperty("engine.unique.id"))); //Set
* its GENERATE dependencies as dirty //TODO: Hmm...what to do in case of
* exceptions? try {
* setAllGenerateDependenciesToNone(dependenciesResolver.listDependencies
* (task.getExecServiceId()), task.getFacilityId()); } catch
* (ServiceNotExistsException e) { log.error(e.toString(), e); } catch
* (InternalErrorException e) { log.error(e.toString(), e); } catch
* (PrivilegeException e) { log.error(e.toString(), e); } }
*
* log.info("There are only " + taskResults.size() +
* " Task_results for Task ID" + task.getId() +
* ", so we ain't gonna do anything."); // Well, we ain't gonna do anything
* bro... // TODO: Time out... } }
*/
private void checkFinishedTasks() {
// report finished tasks back to scheduler
// clear all tasks we are done with (ie. DONE, ERROR with no recurrence
// left)
List<Task> tasklist = schedulingPool.getDoneTasks();
log.debug("There are {} DONE tasks", tasklist.size());
for (Task task : tasklist) {
if (task.getEndTime() == null) {
log.error("RECOVERY FROM INCONSISTENT STATE: DONE task does not have end_time! Setting end_time to now.");
Date endTime = new Date(System.currentTimeMillis());
task.setEndTime(endTime);
}
log.debug("TASK " + task.toString() + " finished");
try {
log.debug("TASK reported as finished at " + System.currentTimeMillis());
jmsQueueManager.reportFinishedTask(task, "Destinations []");
schedulingPool.removeTask(task);
log.debug("TASK {} removed from database.", task.getId());
} catch (JMSException e) {
log.error("Failed to report finished task " + task.toString() + ": " + e.getMessage());
}
}
tasklist = schedulingPool.getErrorTasks();
log.debug("There are {} ERROR tasks", tasklist.size());
for (Task task : tasklist) {
if (task.getEndTime() == null) {
log.error("RECOVERY FROM INCONSISTENT STATE: ERROR task does not have end_time! Setting end_time to task.getDelay + 1.");
// getDelay is in minutes, therefore we multiply it with 60*1000
Date endTime = new Date(System.currentTimeMillis() - ((task.getDelay() + 1) * 60000));
task.setEndTime(endTime);
}
List<Destination> destinations = taskStatusManager.getTaskStatus(task).getSuccessfulDestinations();
List<Destination> failedDestinations = task.getDestinations();
failedDestinations.removeAll(destinations);
StringBuilder destinations_s = new StringBuilder("Destinations [");
if (!failedDestinations.isEmpty()) {
destinations_s.append(failedDestinations.remove(0).serializeToString());
for (Destination destination : failedDestinations) {
destinations_s.append(",");
destinations_s.append(destination.serializeToString());
}
}
destinations_s.append("]");
log.debug("TASK " + task.toString() + " finished in error, remaining destinations: " + destinations_s);
try {
jmsQueueManager.reportFinishedTask(task, destinations_s.toString());
schedulingPool.removeTask(task);
log.debug("TASK {} removed from database.", task.getId());
} catch (JMSException e) {
log.error("Failed to report finished task " + task.toString() + ": " + e.getMessage());
}
}
}
use of cz.metacentrum.perun.taskslib.model.Task in project perun by CESNET.
the class SchedulingPoolImpl method checkTasksDb.
@Override
public void checkTasksDb() {
log.debug("Going to cross-check tasks in database...");
for (Pair<Task, Integer> pair : taskManager.listAllTasksAndClients()) {
Task task = pair.getLeft();
DispatcherQueue taskQueue = dispatcherQueuePool.getDispatcherQueueByClient(pair.getRight());
TaskStatus status = task.getStatus();
if (status == null) {
task.setStatus(TaskStatus.NONE);
}
Task local_task = null;
TaskStatus local_status = null;
log.debug(" checking task " + task.toString());
if (taskQueue == null) {
log.warn(" there is no task queue for client " + pair.getRight());
// continue;
}
synchronized (tasksById) {
Pair<Task, DispatcherQueue> local_pair = tasksById.get(task.getId());
if (local_pair != null) {
local_task = local_pair.getLeft();
}
if (local_task == null) {
local_task = tasksByServiceAndFacility.get(new Pair<Integer, Integer>(task.getExecServiceId(), task.getFacilityId()));
}
if (local_task == null) {
for (TaskStatus sts : TaskStatus.class.getEnumConstants()) {
List<Task> tasklist = pool.get(sts);
if (tasklist != null) {
local_task = tasklist.get(task.getId());
}
if (local_task != null) {
local_status = sts;
break;
}
}
}
}
if (local_task == null) {
try {
log.debug(" task not found in any of local structures, adding fresh");
addToPool(task, taskQueue);
} catch (InternalErrorException e) {
log.error("Error adding task to the local structures: " + e.getMessage());
}
} else {
synchronized (tasksById) {
if (!tasksById.containsKey(local_task.getId())) {
log.debug(" task not known by id, adding");
tasksById.put(local_task.getId(), new Pair<Task, DispatcherQueue>(local_task, taskQueue));
}
if (!tasksByServiceAndFacility.containsKey(new Pair<Integer, Integer>(local_task.getExecServiceId(), local_task.getFacilityId()))) {
log.debug(" task not known by ExecService and Facility, adding");
tasksByServiceAndFacility.put(new Pair<Integer, Integer>(local_task.getExecServiceId(), local_task.getFacilityId()), task);
}
if (local_status != null && local_status != local_task.getStatus()) {
log.debug(" task listed with wrong status, removing");
if (pool.get(local_status) != null) {
pool.get(local_status).remove(local_task.getId());
} else {
log.error(" no task list for status " + local_status);
}
}
if (pool.get(local_task.getStatus()) != null && !pool.get(local_task.getStatus()).contains(local_task)) {
log.debug(" task not listed with its status, adding");
pool.get(local_task.getStatus()).add(local_task);
}
}
}
}
}
use of cz.metacentrum.perun.taskslib.model.Task in project perun by CESNET.
the class TaskSchedulerImpl method scheduleItAndWait.
private Boolean scheduleItAndWait(ExecService dependency, Facility facility, ExecService execService, DispatcherQueue dispatcherQueue, Date time) {
// this is called to schedule dependencies of given task
Task task = new Task();
task.setExecService(dependency);
task.setFacility(facility);
task.setSchedule(time);
try {
schedulingPool.addToPool(task, dispatcherQueue);
return scheduleTask(task);
} catch (InternalErrorException e) {
log.error("Could not schedule new task: " + e.getMessage());
return false;
}
// schedulingPool.setTaskStatus(task, TaskStatus.NONE);
}
Aggregations