use of edu.pitt.dbmi.tetrad.db.entity.HpcJobLog in project tetrad by cmu-phil.
the class HpcJobsScheduledTask method run.
// Pooling job status from HPC nodes
@Override
public void run() {
TetradDesktop desktop = (TetradDesktop) DesktopController.getInstance();
if (desktop == null)
return;
final HpcAccountManager hpcAccountManager = desktop.getHpcAccountManager();
// No Hpc Account in the first place, no need to proceed!
List<HpcAccount> hpcAccounts = hpcAccountManager.getHpcAccounts();
if (hpcAccounts == null || hpcAccounts.isEmpty())
return;
final HpcJobManager hpcJobManager = desktop.getHpcJobManager();
// LOGGER.debug("HpcJobsScheduledTask: " + new Date(System.currentTimeMillis()));
// Load active jobs: Status (0 = Submitted; 1 = Running; 2 = Kill
// Request)
Map<HpcAccount, Set<HpcJobInfo>> submittedHpcJobInfos = hpcJobManager.getSubmittedHpcJobInfoMap();
for (HpcAccount hpcAccount : submittedHpcJobInfos.keySet()) {
LOGGER.debug("HpcJobsScheduledTask: " + hpcAccount.getConnectionName());
Set<HpcJobInfo> hpcJobInfos = submittedHpcJobInfos.get(hpcAccount);
// Pid-HpcJobInfo map
Map<Long, HpcJobInfo> hpcJobInfoMap = new HashMap<>();
for (HpcJobInfo hpcJobInfo : hpcJobInfos) {
if (hpcJobInfo.getPid() != null) {
long pid = hpcJobInfo.getPid().longValue();
hpcJobInfoMap.put(pid, hpcJobInfo);
LOGGER.debug("id: " + hpcJobInfo.getId() + " : " + hpcJobInfo.getAlgoId() + ": pid: " + pid + " : " + hpcJobInfo.getResultFileName());
} else {
LOGGER.debug("id: " + hpcJobInfo.getId() + " : " + hpcJobInfo.getAlgoId() + ": no pid! : " + hpcJobInfo.getResultFileName());
hpcJobInfos.remove(hpcJobInfo);
}
}
// Finished job map
HashMap<Long, HpcJobInfo> finishedJobMap = new HashMap<>();
for (HpcJobInfo job : hpcJobInfos) {
finishedJobMap.put(job.getPid(), job);
}
try {
List<JobInfo> jobInfos = hpcJobManager.getRemoteActiveJobs(hpcAccountManager, hpcAccount);
for (JobInfo jobInfo : jobInfos) {
LOGGER.debug("Remote pid: " + jobInfo.getId() + " : " + jobInfo.getAlgoId() + " : " + jobInfo.getResultFileName());
long pid = jobInfo.getId();
if (finishedJobMap.containsKey(pid)) {
finishedJobMap.remove(pid);
}
int remoteStatus = jobInfo.getStatus();
String recentStatusText = (remoteStatus == 0 ? "Submitted" : (remoteStatus == 1 ? "Running" : "Kill Request"));
// Local job
HpcJobInfo hpcJobInfo = hpcJobInfoMap.get(pid);
// map
HpcJobLog hpcJobLog = hpcJobManager.getHpcJobLog(hpcJobInfo);
if (hpcJobInfo != null) {
int status = hpcJobInfo.getStatus();
if (status != remoteStatus) {
// Update status
hpcJobInfo.setStatus(remoteStatus);
hpcJobManager.updateHpcJobInfo(hpcJobInfo);
hpcJobLog.setLastUpdatedTime(new Date(System.currentTimeMillis()));
String log = "Job status changed to " + recentStatusText;
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : pid : " + pid);
LOGGER.debug(log);
hpcJobManager.logHpcJobLogDetail(hpcJobLog, remoteStatus, log);
}
}
}
// Download finished jobs' results
if (finishedJobMap.size() > 0) {
Set<ResultFile> resultFiles = hpcJobManager.listRemoteAlgorithmResultFiles(hpcAccountManager, hpcAccount);
Set<String> resultFileNames = new HashSet<>();
for (ResultFile resultFile : resultFiles) {
resultFileNames.add(resultFile.getName());
// LOGGER.debug(hpcAccount.getConnectionName()
// + " Result : " + resultFile.getName());
}
for (HpcJobInfo hpcJobInfo : finishedJobMap.values()) {
// Job
// is
// done
// or
// killed or
// time-out
HpcJobLog hpcJobLog = hpcJobManager.getHpcJobLog(hpcJobInfo);
String recentStatusText = "Job finished";
// Finished
int recentStatus = 3;
if (hpcJobInfo.getStatus() == 2) {
recentStatusText = "Job killed";
// Killed
recentStatus = 4;
}
hpcJobInfo.setStatus(recentStatus);
hpcJobManager.updateHpcJobInfo(hpcJobInfo);
// LOGGER.debug("hpcJobInfo: id: "
// + hpcJobInfo.getId() + " : "
// + hpcJobInfo.getStatus());
hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, recentStatusText);
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + recentStatusText);
GeneralAlgorithmEditor editor = hpcJobManager.getGeneralAlgorithmEditor(hpcJobInfo);
if (editor != null) {
LOGGER.debug("GeneralAlgorithmEditor is not null");
String resultJsonFileName = hpcJobInfo.getResultJsonFileName();
String errorResultFileName = hpcJobInfo.getErrorResultFileName();
if (resultFileNames.contains(resultJsonFileName)) {
// Result Downloaded
recentStatus = 5;
String json = downloadAlgorithmResultFile(hpcAccountManager, hpcJobManager, hpcAccount, resultJsonFileName, editor);
if (!json.toLowerCase().contains("not found")) {
editor.setAlgorithmResult(json);
}
String log = "Result downloaded";
hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
} else if (resultFileNames.contains(errorResultFileName)) {
// Error Result Downloaded
recentStatus = 6;
String error = downloadAlgorithmResultFile(hpcAccountManager, hpcJobManager, hpcAccount, errorResultFileName, editor);
if (!error.toLowerCase().contains("not found")) {
editor.setAlgorithmErrorResult(error);
}
String log = "Error Result downloaded";
hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
} else {
// Try again
Thread.sleep(5000);
String json = downloadAlgorithmResultFile(hpcAccountManager, hpcJobManager, hpcAccount, resultJsonFileName, editor);
if (!json.toLowerCase().contains("not found")) {
editor.setAlgorithmResult(json);
// Result Downloaded
recentStatus = 5;
String log = "Result downloaded";
hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
} else {
String error = downloadAlgorithmResultFile(hpcAccountManager, hpcJobManager, hpcAccount, errorResultFileName, editor);
if (!error.toLowerCase().contains("not found")) {
editor.setAlgorithmErrorResult(error);
// Error Result
recentStatus = 6;
// Downloaded
String log = "Error Result downloaded";
hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
} else {
// Result Not Found
recentStatus = 7;
String log = resultJsonFileName + " not found";
hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
}
}
}
}
hpcJobManager.removeFinishedHpcJob(hpcJobInfo);
}
} else {
LOGGER.debug("No finished job yet.");
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
use of edu.pitt.dbmi.tetrad.db.entity.HpcJobLog in project tetrad by cmu-phil.
the class HpcJobActivityEditor method getActiveRowData.
private Vector<Vector<String>> getActiveRowData(final TetradDesktop desktop, final List<HpcAccount> exclusiveHpcAccounts) throws Exception {
final Vector<Vector<String>> activeRowData = new Vector<>();
final HpcJobManager hpcJobManager = desktop.getHpcJobManager();
Map<Long, HpcJobInfo> activeHpcJobInfoMap = null;
// Pending
Map<HpcAccount, Set<HpcJobInfo>> pendingHpcJobInfoMap = hpcJobManager.getPendingHpcJobInfoMap();
pendingDisplayHpcJobInfoSet.clear();
for (HpcAccount hpcAccount : pendingHpcJobInfoMap.keySet()) {
if (exclusiveHpcAccounts != null && !exclusiveHpcAccounts.contains(hpcAccount)) {
continue;
}
Set<HpcJobInfo> pendingHpcJobSet = pendingHpcJobInfoMap.get(hpcAccount);
for (HpcJobInfo hpcJobInfo : pendingHpcJobSet) {
// For monitoring purpose
pendingDisplayHpcJobInfoSet.add(hpcJobInfo);
if (activeHpcJobInfoMap == null) {
activeHpcJobInfoMap = new HashMap<>();
}
activeHpcJobInfoMap.put(hpcJobInfo.getId(), hpcJobInfo);
}
}
// Submitted
Map<HpcAccount, Set<HpcJobInfo>> submittedHpcJobInfoMap = hpcJobManager.getSubmittedHpcJobInfoMap();
submittedDisplayHpcJobInfoSet.clear();
for (HpcAccount hpcAccount : submittedHpcJobInfoMap.keySet()) {
if (exclusiveHpcAccounts != null && !exclusiveHpcAccounts.contains(hpcAccount)) {
continue;
}
Set<HpcJobInfo> submittedHpcJobSet = submittedHpcJobInfoMap.get(hpcAccount);
for (HpcJobInfo hpcJobInfo : submittedHpcJobSet) {
// For monitoring purpose
submittedDisplayHpcJobInfoSet.add(hpcJobInfo);
if (activeHpcJobInfoMap == null) {
activeHpcJobInfoMap = new HashMap<>();
}
activeHpcJobInfoMap.put(hpcJobInfo.getId(), hpcJobInfo);
}
}
if (activeHpcJobInfoMap != null) {
List<Long> activeJobIds = new ArrayList<>(activeHpcJobInfoMap.keySet());
Collections.sort(activeJobIds);
Collections.reverse(activeJobIds);
for (Long jobId : activeJobIds) {
final HpcJobInfo hpcJobInfo = activeHpcJobInfoMap.get(jobId);
Vector<String> rowData = new Vector<>();
HpcJobLog hpcJobLog = hpcJobManager.getHpcJobLog(hpcJobInfo);
// Local job id
rowData.add(hpcJobInfo.getId().toString());
int status = hpcJobInfo.getStatus();
switch(status) {
case -1:
rowData.add("Pending");
break;
case 0:
rowData.add("Submitted");
break;
case 1:
rowData.add("Running");
break;
case 2:
rowData.add("Kill Request");
break;
}
// Locally added time
rowData.add(FilePrint.fileTimestamp(hpcJobLog.getAddedTime().getTime()));
// HPC node name
HpcAccount hpcAccount = hpcJobInfo.getHpcAccount();
rowData.add(hpcAccount.getConnectionName());
// Algorithm
rowData.add(hpcJobInfo.getAlgoId());
// Dataset uploading progress
AlgorithmParamRequest algorParamReq = hpcJobInfo.getAlgorithmParamRequest();
String datasetPath = algorParamReq.getDatasetPath();
int progress = hpcJobManager.getUploadFileProgress(datasetPath);
if (progress > -1 && progress < 100) {
rowData.add("" + progress + "%");
} else {
rowData.add("Done");
}
// Prior Knowledge uploading progress
String priorKnowledgePath = algorParamReq.getPriorKnowledgePath();
if (priorKnowledgePath != null) {
progress = hpcJobManager.getUploadFileProgress(priorKnowledgePath);
if (progress > -1 && progress < 100) {
rowData.add("" + progress + "%");
} else {
rowData.add("Done");
}
} else {
rowData.add("Skipped");
}
if (status > -1) {
// Submitted time
rowData.add(FilePrint.fileTimestamp(hpcJobInfo.getSubmittedTime().getTime()));
// HPC job id
rowData.add(hpcJobInfo.getPid() != null ? "" + hpcJobInfo.getPid() : "");
} else {
rowData.add("");
rowData.add("");
}
// Last update time
rowData.add(FilePrint.fileTimestamp(hpcJobLog.getLastUpdatedTime().getTime()));
// Cancel job
rowData.add("Cancel");
activeRowData.add(rowData);
}
}
return activeRowData;
}
use of edu.pitt.dbmi.tetrad.db.entity.HpcJobLog in project tetrad by cmu-phil.
the class HpcJobManager method submitNewHpcJobToQueue.
public synchronized void submitNewHpcJobToQueue(final HpcJobInfo hpcJobInfo, final GeneralAlgorithmEditor generalAlgorithmEditor) {
hpcJobInfoService.add(hpcJobInfo);
LOGGER.debug("hpcJobInfo: id: " + hpcJobInfo.getId());
HpcJobLog hpcJobLog = new HpcJobLog();
hpcJobLog.setAddedTime(new Date(System.currentTimeMillis()));
hpcJobLog.setHpcJobInfo(hpcJobInfo);
hpcJobLogService.update(hpcJobLog);
LOGGER.debug("HpcJobLog: id: " + hpcJobLog.getId());
HpcJobLogDetail hpcJobLogDetail = new HpcJobLogDetail();
hpcJobLogDetail.setAddedTime(new Date());
hpcJobLogDetail.setHpcJobLog(hpcJobLog);
// Pending
hpcJobLogDetail.setJobState(-1);
hpcJobLogDetail.setProgress("Pending");
hpcJobLogDetailService.add(hpcJobLogDetail);
LOGGER.debug("HpcJobLogDetail: id: " + hpcJobLogDetail.getId());
hpcGraphResultMap.put(hpcJobInfo, generalAlgorithmEditor);
// Put a new pre-process task into hpc job queue
HpcJobPreProcessTask preProcessTask = new HpcJobPreProcessTask(hpcJobInfo);
// Added a job to the pending list
final HpcAccount hpcAccount = hpcJobInfo.getHpcAccount();
Set<HpcJobInfo> hpcJobInfos = pendingHpcJobInfoMap.get(hpcAccount);
if (hpcJobInfos == null) {
hpcJobInfos = new LinkedHashSet<>();
}
hpcJobInfos.add(hpcJobInfo);
pendingHpcJobInfoMap.put(hpcAccount, hpcJobInfos);
executorService.execute(preProcessTask);
}
use of edu.pitt.dbmi.tetrad.db.entity.HpcJobLog in project tetrad by cmu-phil.
the class HpcJobActivityEditor method getFinishedRowData.
private Vector<Vector<String>> getFinishedRowData(final TetradDesktop desktop, final List<HpcAccount> exclusiveHpcAccounts) throws Exception {
final Vector<Vector<String>> finishedRowData = new Vector<>();
HpcJobManager hpcJobManager = desktop.getHpcJobManager();
Map<Long, HpcJobInfo> finishedHpcJobIdMap = null;
// Finished jobs
Map<HpcAccount, Set<HpcJobInfo>> finishedHpcJobInfoMap = hpcJobManager.getFinishedHpcJobInfoMap();
for (HpcAccount hpcAccount : finishedHpcJobInfoMap.keySet()) {
if (exclusiveHpcAccounts != null && !exclusiveHpcAccounts.contains(hpcAccount)) {
continue;
}
Set<HpcJobInfo> finishedHpcJobSet = finishedHpcJobInfoMap.get(hpcAccount);
for (HpcJobInfo hpcJobInfo : finishedHpcJobSet) {
if (finishedHpcJobIdMap == null) {
finishedHpcJobIdMap = new HashMap<>();
}
finishedHpcJobIdMap.put(hpcJobInfo.getId(), hpcJobInfo);
}
}
if (finishedHpcJobIdMap != null) {
List<Long> finishedJobIds = new ArrayList<>(finishedHpcJobIdMap.keySet());
Collections.sort(finishedJobIds);
Collections.reverse(finishedJobIds);
for (Long jobId : finishedJobIds) {
final HpcJobInfo hpcJobInfo = finishedHpcJobIdMap.get(jobId);
Vector<String> rowData = new Vector<>();
HpcJobLog hpcJobLog = hpcJobManager.getHpcJobLog(hpcJobInfo);
// Local job id
rowData.add(hpcJobInfo.getId().toString());
int status = hpcJobInfo.getStatus();
switch(status) {
case 3:
rowData.add("Finished");
break;
case 4:
rowData.add("Canceled");
break;
case 5:
rowData.add("Finished");
break;
case 6:
rowData.add("Error");
break;
}
// Locally added time
rowData.add(FilePrint.fileTimestamp(hpcJobLog.getAddedTime().getTime()));
// HPC node name
HpcAccount hpcAccount = hpcJobInfo.getHpcAccount();
rowData.add(hpcAccount.getConnectionName());
// Algorithm
rowData.add(hpcJobInfo.getAlgoId());
// Submitted time
rowData.add(hpcJobInfo.getSubmittedTime() != null ? FilePrint.fileTimestamp(hpcJobInfo.getSubmittedTime().getTime()) : "");
// HPC job id
rowData.add("" + hpcJobInfo.getPid());
// Result Name
switch(status) {
case 3:
rowData.add(hpcJobInfo.getResultFileName());
break;
case 4:
rowData.add("");
break;
case 5:
rowData.add(hpcJobInfo.getResultFileName());
break;
case 6:
rowData.add(hpcJobInfo.getErrorResultFileName());
break;
}
// Finished time
if (status != 4) {
rowData.add(FilePrint.fileTimestamp(hpcJobLog.getEndedTime().getTime()));
} else {
rowData.add("");
}
// Canceled time
if (status == 4) {
rowData.add(hpcJobLog.getCanceledTime() != null ? FilePrint.fileTimestamp(hpcJobLog.getCanceledTime().getTime()) : "");
} else {
rowData.add("");
}
// Last update time
rowData.add(FilePrint.fileTimestamp(hpcJobLog.getLastUpdatedTime().getTime()));
// Delete job from db
rowData.add("Delete");
finishedRowData.add(rowData);
}
}
return finishedRowData;
}
use of edu.pitt.dbmi.tetrad.db.entity.HpcJobLog in project tetrad by cmu-phil.
the class HpcJobManager method removeHpcJobInfoTransaction.
public synchronized void removeHpcJobInfoTransaction(final HpcJobInfo hpcJobInfo) {
HpcJobLog hpcJobLog = hpcJobLogService.findByHpcJobInfo(hpcJobInfo);
List<HpcJobLogDetail> logDetailList = hpcJobLogDetailService.findByHpcJobLog(hpcJobLog);
for (HpcJobLogDetail logDetail : logDetailList) {
hpcJobLogDetailService.remove(logDetail);
}
hpcJobLogService.remove(hpcJobLog);
}
Aggregations