use of com.webank.wedatasphere.qualitis.bean.JobChecker in project Qualitis by WeBankFinTech.
the class CheckerRunnable method run.
@Override
public void run() {
try {
LOGGER.info("Start to monitor application.");
abstractServiceCoordinator.coordinate();
// Get task that is not finished
List<JobChecker> jobs = null;
try {
jobs = getJobs();
LOGGER.info("Succeed to find applications that are not end. Application: {}", jobs);
} catch (Exception e) {
LOGGER.error("Failed to find applications that are not end.", e);
return;
}
int total = jobs.size();
int updateThreadSize = total / updateJobSize + 1;
CountDownLatch latch = new CountDownLatch(updateThreadSize);
for (int indexThread = 0; total > 0 && indexThread < total; ) {
if (indexThread + updateJobSize < total) {
POOL.execute(new UpdaterRunnable(iChecker, jobs.subList(indexThread, indexThread + updateJobSize), latch));
} else {
POOL.execute(new UpdaterRunnable(iChecker, jobs.subList(indexThread, total), latch));
}
indexThread += updateJobSize;
updateThreadSize--;
}
if (total > 0 && updateThreadSize == 0) {
latch.await();
}
LOGGER.info("Finish to monitor application.");
} catch (Exception e) {
LOGGER.error("Failed to monitor application, caused by: {}", e.getMessage(), e);
} finally {
abstractServiceCoordinator.release();
}
}
use of com.webank.wedatasphere.qualitis.bean.JobChecker in project Qualitis by WeBankFinTech.
the class CheckerRunnable method getJobs.
private List<JobChecker> getJobs() {
List<Application> notEndApplications = applicationDao.findByStatusNotIn(END_APPLICATION_STATUS_LIST);
List<JobChecker> jobCheckers = new ArrayList<>();
for (Application app : notEndApplications) {
// Find not end task
List<Task> notEndTasks = taskDao.findByApplicationAndStatusInAndTaskRemoteIdNotNull(app, NOT_END_TASK_STATUS_LIST);
for (Task task : notEndTasks) {
JobChecker tmp = new JobChecker(app.getId(), TaskStatusEnum.getTaskStateByCode(task.getStatus()), task.getProgress(), StringUtils.isNotBlank(task.getTaskProxyUser()) ? task.getTaskProxyUser() : app.getExecuteUser(), task.getSubmitAddress(), task.getClusterName(), task);
jobCheckers.add(tmp);
}
if (notEndTasks.isEmpty()) {
LOGGER.info("Find abnormal application, which tasks is all end, but application is not end.");
List<Task> allTasks = taskDao.findByApplication(app);
app.resetTask();
applicationDao.saveApplication(app);
LOGGER.info("Finish to reset application status num.");
LOGGER.info("Start to recover application status.");
try {
for (Task task : allTasks) {
if (task.getStatus().equals(TaskStatusEnum.FAILED.getCode())) {
iChecker.checkIfLastJob(app, false, false, false);
} else if (task.getAbortOnFailure() != null && !task.getAbortOnFailure() && task.getStatus().equals(TaskStatusEnum.FAIL_CHECKOUT.getCode())) {
iChecker.checkIfLastJob(app, true, false, false);
} else if (task.getStatus().equals(TaskStatusEnum.PASS_CHECKOUT.getCode())) {
iChecker.checkIfLastJob(app, true, true, false);
} else if (task.getStatus().equals(TaskStatusEnum.TASK_NOT_EXIST.getCode())) {
iChecker.checkIfLastJob(app, false, false, true);
} else if (task.getStatus().equals(TaskStatusEnum.CANCELLED.getCode())) {
app.setApplicationComment(ApplicationCommentEnum.TIMEOUT_KILL.getCode());
iChecker.checkIfLastJob(app, false, false, false);
}
}
LOGGER.info("Succeed to recover application status.");
} catch (Exception e) {
LOGGER.error("Failed to recover applications that are not end.");
LOGGER.error(e.getMessage(), e);
}
}
}
return jobCheckers;
}
Aggregations