Search in sources :

Example 1 with JobChecker

use of com.webank.wedatasphere.qualitis.bean.JobChecker in project Qualitis by WeBankFinTech.

the class CheckerRunnable method run.

@Override
public void run() {
    try {
        LOGGER.info("Start to monitor application.");
        abstractServiceCoordinator.coordinate();
        // Get task that is not finished
        List<JobChecker> jobs = null;
        try {
            jobs = getJobs();
            LOGGER.info("Succeed to find applications that are not end. Application: {}", jobs);
        } catch (Exception e) {
            LOGGER.error("Failed to find applications that are not end.", e);
            return;
        }
        int total = jobs.size();
        int updateThreadSize = total / updateJobSize + 1;
        CountDownLatch latch = new CountDownLatch(updateThreadSize);
        for (int indexThread = 0; total > 0 && indexThread < total; ) {
            if (indexThread + updateJobSize < total) {
                POOL.execute(new UpdaterRunnable(iChecker, jobs.subList(indexThread, indexThread + updateJobSize), latch));
            } else {
                POOL.execute(new UpdaterRunnable(iChecker, jobs.subList(indexThread, total), latch));
            }
            indexThread += updateJobSize;
            updateThreadSize--;
        }
        if (total > 0 && updateThreadSize == 0) {
            latch.await();
        }
        LOGGER.info("Finish to monitor application.");
    } catch (Exception e) {
        LOGGER.error("Failed to monitor application, caused by: {}", e.getMessage(), e);
    } finally {
        abstractServiceCoordinator.release();
    }
}
Also used : JobChecker(com.webank.wedatasphere.qualitis.bean.JobChecker) CountDownLatch(java.util.concurrent.CountDownLatch)

Example 2 with JobChecker

use of com.webank.wedatasphere.qualitis.bean.JobChecker in project Qualitis by WeBankFinTech.

the class CheckerRunnable method getJobs.

private List<JobChecker> getJobs() {
    List<Application> notEndApplications = applicationDao.findByStatusNotIn(END_APPLICATION_STATUS_LIST);
    List<JobChecker> jobCheckers = new ArrayList<>();
    for (Application app : notEndApplications) {
        // Find not end task
        List<Task> notEndTasks = taskDao.findByApplicationAndStatusInAndTaskRemoteIdNotNull(app, NOT_END_TASK_STATUS_LIST);
        for (Task task : notEndTasks) {
            JobChecker tmp = new JobChecker(app.getId(), TaskStatusEnum.getTaskStateByCode(task.getStatus()), task.getProgress(), StringUtils.isNotBlank(task.getTaskProxyUser()) ? task.getTaskProxyUser() : app.getExecuteUser(), task.getSubmitAddress(), task.getClusterName(), task);
            jobCheckers.add(tmp);
        }
        if (notEndTasks.isEmpty()) {
            LOGGER.info("Find abnormal application, which tasks is all end, but application is not end.");
            List<Task> allTasks = taskDao.findByApplication(app);
            app.resetTask();
            applicationDao.saveApplication(app);
            LOGGER.info("Finish to reset application status num.");
            LOGGER.info("Start to recover application status.");
            try {
                for (Task task : allTasks) {
                    if (task.getStatus().equals(TaskStatusEnum.FAILED.getCode())) {
                        iChecker.checkIfLastJob(app, false, false, false);
                    } else if (task.getAbortOnFailure() != null && !task.getAbortOnFailure() && task.getStatus().equals(TaskStatusEnum.FAIL_CHECKOUT.getCode())) {
                        iChecker.checkIfLastJob(app, true, false, false);
                    } else if (task.getStatus().equals(TaskStatusEnum.PASS_CHECKOUT.getCode())) {
                        iChecker.checkIfLastJob(app, true, true, false);
                    } else if (task.getStatus().equals(TaskStatusEnum.TASK_NOT_EXIST.getCode())) {
                        iChecker.checkIfLastJob(app, false, false, true);
                    } else if (task.getStatus().equals(TaskStatusEnum.CANCELLED.getCode())) {
                        app.setApplicationComment(ApplicationCommentEnum.TIMEOUT_KILL.getCode());
                        iChecker.checkIfLastJob(app, false, false, false);
                    }
                }
                LOGGER.info("Succeed to recover application status.");
            } catch (Exception e) {
                LOGGER.error("Failed to recover applications that are not end.");
                LOGGER.error(e.getMessage(), e);
            }
        }
    }
    return jobCheckers;
}
Also used : Task(com.webank.wedatasphere.qualitis.entity.Task) ArrayList(java.util.ArrayList) JobChecker(com.webank.wedatasphere.qualitis.bean.JobChecker) Application(com.webank.wedatasphere.qualitis.entity.Application)

Aggregations

JobChecker (com.webank.wedatasphere.qualitis.bean.JobChecker)2 Application (com.webank.wedatasphere.qualitis.entity.Application)1 Task (com.webank.wedatasphere.qualitis.entity.Task)1 ArrayList (java.util.ArrayList)1 CountDownLatch (java.util.concurrent.CountDownLatch)1