Search in sources :

Example 1 with GeneralAlgorithmEditor

use of edu.cmu.tetradapp.editor.GeneralAlgorithmEditor in project tetrad by cmu-phil.

the class HpcJobsScheduledTask method run.

// Pooling job status from HPC nodes
@Override
public void run() {
    TetradDesktop desktop = (TetradDesktop) DesktopController.getInstance();
    if (desktop == null)
        return;
    final HpcAccountManager hpcAccountManager = desktop.getHpcAccountManager();
    // No Hpc Account in the first place, no need to proceed!
    List<HpcAccount> hpcAccounts = hpcAccountManager.getHpcAccounts();
    if (hpcAccounts == null || hpcAccounts.isEmpty())
        return;
    final HpcJobManager hpcJobManager = desktop.getHpcJobManager();
    // LOGGER.debug("HpcJobsScheduledTask: " + new Date(System.currentTimeMillis()));
    // Load active jobs: Status (0 = Submitted; 1 = Running; 2 = Kill
    // Request)
    Map<HpcAccount, Set<HpcJobInfo>> submittedHpcJobInfos = hpcJobManager.getSubmittedHpcJobInfoMap();
    for (HpcAccount hpcAccount : submittedHpcJobInfos.keySet()) {
        LOGGER.debug("HpcJobsScheduledTask: " + hpcAccount.getConnectionName());
        Set<HpcJobInfo> hpcJobInfos = submittedHpcJobInfos.get(hpcAccount);
        // Pid-HpcJobInfo map
        Map<Long, HpcJobInfo> hpcJobInfoMap = new HashMap<>();
        for (HpcJobInfo hpcJobInfo : hpcJobInfos) {
            if (hpcJobInfo.getPid() != null) {
                long pid = hpcJobInfo.getPid().longValue();
                hpcJobInfoMap.put(pid, hpcJobInfo);
                LOGGER.debug("id: " + hpcJobInfo.getId() + " : " + hpcJobInfo.getAlgoId() + ": pid: " + pid + " : " + hpcJobInfo.getResultFileName());
            } else {
                LOGGER.debug("id: " + hpcJobInfo.getId() + " : " + hpcJobInfo.getAlgoId() + ": no pid! : " + hpcJobInfo.getResultFileName());
                hpcJobInfos.remove(hpcJobInfo);
            }
        }
        // Finished job map
        HashMap<Long, HpcJobInfo> finishedJobMap = new HashMap<>();
        for (HpcJobInfo job : hpcJobInfos) {
            finishedJobMap.put(job.getPid(), job);
        }
        try {
            List<JobInfo> jobInfos = hpcJobManager.getRemoteActiveJobs(hpcAccountManager, hpcAccount);
            for (JobInfo jobInfo : jobInfos) {
                LOGGER.debug("Remote pid: " + jobInfo.getId() + " : " + jobInfo.getAlgoId() + " : " + jobInfo.getResultFileName());
                long pid = jobInfo.getId();
                if (finishedJobMap.containsKey(pid)) {
                    finishedJobMap.remove(pid);
                }
                int remoteStatus = jobInfo.getStatus();
                String recentStatusText = (remoteStatus == 0 ? "Submitted" : (remoteStatus == 1 ? "Running" : "Kill Request"));
                // Local job
                HpcJobInfo hpcJobInfo = hpcJobInfoMap.get(pid);
                // map
                HpcJobLog hpcJobLog = hpcJobManager.getHpcJobLog(hpcJobInfo);
                if (hpcJobInfo != null) {
                    int status = hpcJobInfo.getStatus();
                    if (status != remoteStatus) {
                        // Update status
                        hpcJobInfo.setStatus(remoteStatus);
                        hpcJobManager.updateHpcJobInfo(hpcJobInfo);
                        hpcJobLog.setLastUpdatedTime(new Date(System.currentTimeMillis()));
                        String log = "Job status changed to " + recentStatusText;
                        LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : pid : " + pid);
                        LOGGER.debug(log);
                        hpcJobManager.logHpcJobLogDetail(hpcJobLog, remoteStatus, log);
                    }
                }
            }
            // Download finished jobs' results
            if (finishedJobMap.size() > 0) {
                Set<ResultFile> resultFiles = hpcJobManager.listRemoteAlgorithmResultFiles(hpcAccountManager, hpcAccount);
                Set<String> resultFileNames = new HashSet<>();
                for (ResultFile resultFile : resultFiles) {
                    resultFileNames.add(resultFile.getName());
                // LOGGER.debug(hpcAccount.getConnectionName()
                // + " Result : " + resultFile.getName());
                }
                for (HpcJobInfo hpcJobInfo : finishedJobMap.values()) {
                    // Job
                    // is
                    // done
                    // or
                    // killed or
                    // time-out
                    HpcJobLog hpcJobLog = hpcJobManager.getHpcJobLog(hpcJobInfo);
                    String recentStatusText = "Job finished";
                    // Finished
                    int recentStatus = 3;
                    if (hpcJobInfo.getStatus() == 2) {
                        recentStatusText = "Job killed";
                        // Killed
                        recentStatus = 4;
                    }
                    hpcJobInfo.setStatus(recentStatus);
                    hpcJobManager.updateHpcJobInfo(hpcJobInfo);
                    // LOGGER.debug("hpcJobInfo: id: "
                    // + hpcJobInfo.getId() + " : "
                    // + hpcJobInfo.getStatus());
                    hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, recentStatusText);
                    LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + recentStatusText);
                    GeneralAlgorithmEditor editor = hpcJobManager.getGeneralAlgorithmEditor(hpcJobInfo);
                    if (editor != null) {
                        LOGGER.debug("GeneralAlgorithmEditor is not null");
                        String resultJsonFileName = hpcJobInfo.getResultJsonFileName();
                        String errorResultFileName = hpcJobInfo.getErrorResultFileName();
                        if (resultFileNames.contains(resultJsonFileName)) {
                            // Result Downloaded
                            recentStatus = 5;
                            String json = downloadAlgorithmResultFile(hpcAccountManager, hpcJobManager, hpcAccount, resultJsonFileName, editor);
                            if (!json.toLowerCase().contains("not found")) {
                                editor.setAlgorithmResult(json);
                            }
                            String log = "Result downloaded";
                            hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
                            LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
                        } else if (resultFileNames.contains(errorResultFileName)) {
                            // Error Result Downloaded
                            recentStatus = 6;
                            String error = downloadAlgorithmResultFile(hpcAccountManager, hpcJobManager, hpcAccount, errorResultFileName, editor);
                            if (!error.toLowerCase().contains("not found")) {
                                editor.setAlgorithmErrorResult(error);
                            }
                            String log = "Error Result downloaded";
                            hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
                            LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
                        } else {
                            // Try again
                            Thread.sleep(5000);
                            String json = downloadAlgorithmResultFile(hpcAccountManager, hpcJobManager, hpcAccount, resultJsonFileName, editor);
                            if (!json.toLowerCase().contains("not found")) {
                                editor.setAlgorithmResult(json);
                                // Result Downloaded
                                recentStatus = 5;
                                String log = "Result downloaded";
                                hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
                                LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
                            } else {
                                String error = downloadAlgorithmResultFile(hpcAccountManager, hpcJobManager, hpcAccount, errorResultFileName, editor);
                                if (!error.toLowerCase().contains("not found")) {
                                    editor.setAlgorithmErrorResult(error);
                                    // Error Result
                                    recentStatus = 6;
                                    // Downloaded
                                    String log = "Error Result downloaded";
                                    hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
                                    LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
                                } else {
                                    // Result Not Found
                                    recentStatus = 7;
                                    String log = resultJsonFileName + " not found";
                                    hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
                                    LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
                                }
                            }
                        }
                    }
                    hpcJobManager.removeFinishedHpcJob(hpcJobInfo);
                }
            } else {
                LOGGER.debug("No finished job yet.");
            }
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) HpcAccount(edu.pitt.dbmi.tetrad.db.entity.HpcAccount) GeneralAlgorithmEditor(edu.cmu.tetradapp.editor.GeneralAlgorithmEditor) HpcJobManager(edu.cmu.tetradapp.app.hpc.manager.HpcJobManager) JobInfo(edu.pitt.dbmi.ccd.rest.client.dto.algo.JobInfo) HpcJobInfo(edu.pitt.dbmi.tetrad.db.entity.HpcJobInfo) HpcAccountManager(edu.cmu.tetradapp.app.hpc.manager.HpcAccountManager) TetradDesktop(edu.cmu.tetradapp.app.TetradDesktop) HashSet(java.util.HashSet) ResultFile(edu.pitt.dbmi.ccd.rest.client.dto.algo.ResultFile) Date(java.util.Date) ClientProtocolException(org.apache.http.client.ClientProtocolException) URISyntaxException(java.net.URISyntaxException) IOException(java.io.IOException) HpcJobInfo(edu.pitt.dbmi.tetrad.db.entity.HpcJobInfo) HpcJobLog(edu.pitt.dbmi.tetrad.db.entity.HpcJobLog)

Aggregations

TetradDesktop (edu.cmu.tetradapp.app.TetradDesktop)1 HpcAccountManager (edu.cmu.tetradapp.app.hpc.manager.HpcAccountManager)1 HpcJobManager (edu.cmu.tetradapp.app.hpc.manager.HpcJobManager)1 GeneralAlgorithmEditor (edu.cmu.tetradapp.editor.GeneralAlgorithmEditor)1 JobInfo (edu.pitt.dbmi.ccd.rest.client.dto.algo.JobInfo)1 ResultFile (edu.pitt.dbmi.ccd.rest.client.dto.algo.ResultFile)1 HpcAccount (edu.pitt.dbmi.tetrad.db.entity.HpcAccount)1 HpcJobInfo (edu.pitt.dbmi.tetrad.db.entity.HpcJobInfo)1 HpcJobLog (edu.pitt.dbmi.tetrad.db.entity.HpcJobLog)1 IOException (java.io.IOException)1 URISyntaxException (java.net.URISyntaxException)1 Date (java.util.Date)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 ClientProtocolException (org.apache.http.client.ClientProtocolException)1