use of edu.pitt.dbmi.ccd.rest.client.dto.algo.JobInfo in project tetrad by cmu-phil.
the class HpcJobsScheduledTask method run.
// Pooling job status from HPC nodes
@Override
public void run() {
TetradDesktop desktop = (TetradDesktop) DesktopController.getInstance();
if (desktop == null)
return;
final HpcAccountManager hpcAccountManager = desktop.getHpcAccountManager();
// No Hpc Account in the first place, no need to proceed!
List<HpcAccount> hpcAccounts = hpcAccountManager.getHpcAccounts();
if (hpcAccounts == null || hpcAccounts.isEmpty())
return;
final HpcJobManager hpcJobManager = desktop.getHpcJobManager();
// LOGGER.debug("HpcJobsScheduledTask: " + new Date(System.currentTimeMillis()));
// Load active jobs: Status (0 = Submitted; 1 = Running; 2 = Kill
// Request)
Map<HpcAccount, Set<HpcJobInfo>> submittedHpcJobInfos = hpcJobManager.getSubmittedHpcJobInfoMap();
for (HpcAccount hpcAccount : submittedHpcJobInfos.keySet()) {
LOGGER.debug("HpcJobsScheduledTask: " + hpcAccount.getConnectionName());
Set<HpcJobInfo> hpcJobInfos = submittedHpcJobInfos.get(hpcAccount);
// Pid-HpcJobInfo map
Map<Long, HpcJobInfo> hpcJobInfoMap = new HashMap<>();
for (HpcJobInfo hpcJobInfo : hpcJobInfos) {
if (hpcJobInfo.getPid() != null) {
long pid = hpcJobInfo.getPid().longValue();
hpcJobInfoMap.put(pid, hpcJobInfo);
LOGGER.debug("id: " + hpcJobInfo.getId() + " : " + hpcJobInfo.getAlgoId() + ": pid: " + pid + " : " + hpcJobInfo.getResultFileName());
} else {
LOGGER.debug("id: " + hpcJobInfo.getId() + " : " + hpcJobInfo.getAlgoId() + ": no pid! : " + hpcJobInfo.getResultFileName());
hpcJobInfos.remove(hpcJobInfo);
}
}
// Finished job map
HashMap<Long, HpcJobInfo> finishedJobMap = new HashMap<>();
for (HpcJobInfo job : hpcJobInfos) {
finishedJobMap.put(job.getPid(), job);
}
try {
List<JobInfo> jobInfos = hpcJobManager.getRemoteActiveJobs(hpcAccountManager, hpcAccount);
for (JobInfo jobInfo : jobInfos) {
LOGGER.debug("Remote pid: " + jobInfo.getId() + " : " + jobInfo.getAlgoId() + " : " + jobInfo.getResultFileName());
long pid = jobInfo.getId();
if (finishedJobMap.containsKey(pid)) {
finishedJobMap.remove(pid);
}
int remoteStatus = jobInfo.getStatus();
String recentStatusText = (remoteStatus == 0 ? "Submitted" : (remoteStatus == 1 ? "Running" : "Kill Request"));
// Local job
HpcJobInfo hpcJobInfo = hpcJobInfoMap.get(pid);
// map
HpcJobLog hpcJobLog = hpcJobManager.getHpcJobLog(hpcJobInfo);
if (hpcJobInfo != null) {
int status = hpcJobInfo.getStatus();
if (status != remoteStatus) {
// Update status
hpcJobInfo.setStatus(remoteStatus);
hpcJobManager.updateHpcJobInfo(hpcJobInfo);
hpcJobLog.setLastUpdatedTime(new Date(System.currentTimeMillis()));
String log = "Job status changed to " + recentStatusText;
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : pid : " + pid);
LOGGER.debug(log);
hpcJobManager.logHpcJobLogDetail(hpcJobLog, remoteStatus, log);
}
}
}
// Download finished jobs' results
if (finishedJobMap.size() > 0) {
Set<ResultFile> resultFiles = hpcJobManager.listRemoteAlgorithmResultFiles(hpcAccountManager, hpcAccount);
Set<String> resultFileNames = new HashSet<>();
for (ResultFile resultFile : resultFiles) {
resultFileNames.add(resultFile.getName());
// LOGGER.debug(hpcAccount.getConnectionName()
// + " Result : " + resultFile.getName());
}
for (HpcJobInfo hpcJobInfo : finishedJobMap.values()) {
// Job
// is
// done
// or
// killed or
// time-out
HpcJobLog hpcJobLog = hpcJobManager.getHpcJobLog(hpcJobInfo);
String recentStatusText = "Job finished";
// Finished
int recentStatus = 3;
if (hpcJobInfo.getStatus() == 2) {
recentStatusText = "Job killed";
// Killed
recentStatus = 4;
}
hpcJobInfo.setStatus(recentStatus);
hpcJobManager.updateHpcJobInfo(hpcJobInfo);
// LOGGER.debug("hpcJobInfo: id: "
// + hpcJobInfo.getId() + " : "
// + hpcJobInfo.getStatus());
hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, recentStatusText);
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + recentStatusText);
GeneralAlgorithmEditor editor = hpcJobManager.getGeneralAlgorithmEditor(hpcJobInfo);
if (editor != null) {
LOGGER.debug("GeneralAlgorithmEditor is not null");
String resultJsonFileName = hpcJobInfo.getResultJsonFileName();
String errorResultFileName = hpcJobInfo.getErrorResultFileName();
if (resultFileNames.contains(resultJsonFileName)) {
// Result Downloaded
recentStatus = 5;
String json = downloadAlgorithmResultFile(hpcAccountManager, hpcJobManager, hpcAccount, resultJsonFileName, editor);
if (!json.toLowerCase().contains("not found")) {
editor.setAlgorithmResult(json);
}
String log = "Result downloaded";
hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
} else if (resultFileNames.contains(errorResultFileName)) {
// Error Result Downloaded
recentStatus = 6;
String error = downloadAlgorithmResultFile(hpcAccountManager, hpcJobManager, hpcAccount, errorResultFileName, editor);
if (!error.toLowerCase().contains("not found")) {
editor.setAlgorithmErrorResult(error);
}
String log = "Error Result downloaded";
hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
} else {
// Try again
Thread.sleep(5000);
String json = downloadAlgorithmResultFile(hpcAccountManager, hpcJobManager, hpcAccount, resultJsonFileName, editor);
if (!json.toLowerCase().contains("not found")) {
editor.setAlgorithmResult(json);
// Result Downloaded
recentStatus = 5;
String log = "Result downloaded";
hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
} else {
String error = downloadAlgorithmResultFile(hpcAccountManager, hpcJobManager, hpcAccount, errorResultFileName, editor);
if (!error.toLowerCase().contains("not found")) {
editor.setAlgorithmErrorResult(error);
// Error Result
recentStatus = 6;
// Downloaded
String log = "Error Result downloaded";
hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
} else {
// Result Not Found
recentStatus = 7;
String log = resultJsonFileName + " not found";
hpcJobManager.logHpcJobLogDetail(hpcJobLog, recentStatus, log);
LOGGER.debug(hpcJobInfo.getAlgoId() + " : id : " + hpcJobInfo.getId() + " : " + log);
}
}
}
}
hpcJobManager.removeFinishedHpcJob(hpcJobInfo);
}
} else {
LOGGER.debug("No finished job yet.");
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
use of edu.pitt.dbmi.ccd.rest.client.dto.algo.JobInfo in project tetrad by cmu-phil.
the class HpcJobManager method requestHpcJobKilled.
public HpcJobInfo requestHpcJobKilled(final HpcJobInfo hpcJobInfo) throws Exception {
final HpcAccount hpcAccount = hpcJobInfo.getHpcAccount();
HpcAccountService hpcAccountService = getHpcAccountService(hpcAccount);
JobQueueService jobQueueService = hpcAccountService.getJobQueueService();
TetradDesktop desktop = (TetradDesktop) DesktopController.getInstance();
final HpcAccountManager hpcAccountManager = desktop.getHpcAccountManager();
JsonWebTokenManager jsonWebTokenManager = hpcAccountManager.getJsonWebTokenManager();
jobQueueService.requestJobKilled(hpcJobInfo.getPid(), jsonWebTokenManager.getJsonWebToken(hpcAccount));
JobInfo jobInfo = jobQueueService.getJobStatus(hpcJobInfo.getPid(), jsonWebTokenManager.getJsonWebToken(hpcAccount));
if (jobInfo != null) {
hpcJobInfo.setStatus(jobInfo.getStatus());
return hpcJobInfo;
}
return null;
}
use of edu.pitt.dbmi.ccd.rest.client.dto.algo.JobInfo in project tetrad by cmu-phil.
the class HpcJobPreProcessTask method run.
@Override
public void run() {
TetradDesktop desktop = (TetradDesktop) DesktopController.getInstance();
while (desktop == null) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
final HpcAccountManager hpcAccountManager = desktop.getHpcAccountManager();
final HpcJobManager hpcJobManager = desktop.getHpcJobManager();
HpcAccount hpcAccount = hpcJobInfo.getHpcAccount();
AlgorithmParamRequest algorParamReq = hpcJobInfo.getAlgorithmParamRequest();
String datasetPath = algorParamReq.getDatasetPath();
String priorKnowledgePath = algorParamReq.getPriorKnowledgePath();
try {
HpcAccountService hpcAccountService = hpcJobManager.getHpcAccountService(hpcAccount);
HpcJobLog hpcJobLog = hpcJobManager.getHpcJobLog(hpcJobInfo);
String log = "Initiated connection to " + hpcAccount.getConnectionName();
LOGGER.debug(log);
hpcJobManager.logHpcJobLogDetail(hpcJobLog, -1, log);
log = "datasetPath: " + datasetPath;
System.out.println(log);
Path file = Paths.get(datasetPath);
// Get file's MD5 hash and use it as its identifier
String md5 = algorParamReq.getDatasetMd5();
// Initiate data uploading progress
hpcJobManager.updateUploadFileProgress(datasetPath, 0);
Path prior = null;
if (priorKnowledgePath != null) {
log = "priorKnowledgePath: " + priorKnowledgePath;
LOGGER.debug(log);
prior = Paths.get(priorKnowledgePath);
// Initiate prior knowledge uploading progress
hpcJobManager.updateUploadFileProgress(priorKnowledgePath, 0);
}
// Check if this dataset already exists with this md5 hash
RemoteDataFileService remoteDataService = hpcAccountService.getRemoteDataService();
DataFile dataFile = HpcAccountUtils.getRemoteDataFile(hpcAccountManager, remoteDataService, hpcAccount, md5);
DataUploadService dataUploadService = hpcAccountService.getDataUploadService();
// If not, upload the file
if (dataFile == null) {
log = "Started uploading " + file.getFileName().toString();
LOGGER.debug(log);
dataUploadService.startUpload(file, HpcAccountUtils.getJsonWebToken(hpcAccountManager, hpcAccount));
hpcJobManager.logHpcJobLogDetail(hpcJobLog, -1, log);
int progress;
while ((progress = dataUploadService.getUploadJobStatus(file.toAbsolutePath().toString())) < 100) {
// System.out.println("Uploading "
// + file.toAbsolutePath().toString() + " Progress: "
// + progress + "%");
hpcJobManager.updateUploadFileProgress(datasetPath, progress);
Thread.sleep(10);
}
hpcJobManager.updateUploadFileProgress(datasetPath, progress);
log = "Finished uploading " + file.getFileName().toString();
LOGGER.debug(log);
hpcJobManager.logHpcJobLogDetail(hpcJobLog, -1, log);
// Get remote datafile
dataFile = HpcAccountUtils.getRemoteDataFile(hpcAccountManager, remoteDataService, hpcAccount, md5);
HpcAccountUtils.summarizeDataset(remoteDataService, algorParamReq, dataFile.getId(), HpcAccountUtils.getJsonWebToken(hpcAccountManager, hpcAccount));
log = "Summarized " + file.getFileName().toString();
LOGGER.debug(log);
hpcJobManager.logHpcJobLogDetail(hpcJobLog, -1, log);
} else {
log = "Skipped uploading " + file.getFileName().toString();
LOGGER.debug(log);
hpcJobManager.updateUploadFileProgress(datasetPath, -1);
hpcJobManager.logHpcJobLogDetail(hpcJobLog, -1, log);
if (dataFile.getFileSummary().getVariableType() == null) {
HpcAccountUtils.summarizeDataset(remoteDataService, algorParamReq, dataFile.getId(), HpcAccountUtils.getJsonWebToken(hpcAccountManager, hpcAccount));
log = "Summarized " + file.getFileName().toString();
LOGGER.debug(log);
hpcJobManager.logHpcJobLogDetail(hpcJobLog, -1, "Summarized " + file.getFileName().toString());
}
}
DataFile priorKnowledgeFile = null;
// Prior Knowledge File
if (prior != null) {
// Get prior knowledge file Id
md5 = algorParamReq.getPriorKnowledgeMd5();
priorKnowledgeFile = HpcAccountUtils.getRemotePriorKnowledgeFile(hpcAccountManager, remoteDataService, hpcAccount, md5);
if (priorKnowledgeFile == null) {
// Upload prior knowledge file
dataUploadService.startUpload(prior, HpcAccountUtils.getJsonWebToken(hpcAccountManager, hpcAccount));
log = "Started uploading Prior Knowledge File";
LOGGER.debug(log);
hpcJobManager.logHpcJobLogDetail(hpcJobLog, -1, log);
int progress;
while ((progress = dataUploadService.getUploadJobStatus(prior.toAbsolutePath().toString())) < 100) {
hpcJobManager.updateUploadFileProgress(priorKnowledgePath, progress);
Thread.sleep(10);
}
hpcJobManager.updateUploadFileProgress(priorKnowledgePath, progress);
priorKnowledgeFile = HpcAccountUtils.getRemotePriorKnowledgeFile(hpcAccountManager, remoteDataService, hpcAccount, md5);
log = "Finished uploading Prior Knowledge File";
LOGGER.debug(log);
hpcJobManager.logHpcJobLogDetail(hpcJobLog, -1, log);
}
}
// Algorithm Job Preparation
edu.pitt.dbmi.ccd.rest.client.dto.algo.AlgorithmParamRequest paramRequest = new edu.pitt.dbmi.ccd.rest.client.dto.algo.AlgorithmParamRequest();
String algoId = hpcJobInfo.getAlgoId();
paramRequest.setAlgoId(algoId);
paramRequest.setDatasetFileId(dataFile.getId());
// Test
if (algorParamReq.getTestId() != null) {
paramRequest.setTestId(algorParamReq.getTestId());
}
// Score
if (algorParamReq.getScoreId() != null) {
paramRequest.setScoreId(algorParamReq.getScoreId());
}
Set<AlgoParameter> algorithmParameters = new HashSet<>();
for (AlgorithmParameter param : algorParamReq.getAlgorithmParameters()) {
algorithmParameters.add(new AlgoParameter(param.getParameter(), param.getValue()));
LOGGER.debug("AlgorithmParameter: " + param.getParameter() + " : " + param.getValue());
}
if (priorKnowledgeFile != null) {
paramRequest.setPriorKnowledgeFileId(priorKnowledgeFile.getId());
LOGGER.debug("priorKnowledgeFileId: " + priorKnowledgeFile.getId());
}
paramRequest.setAlgoParameters(algorithmParameters);
if (algorParamReq.getJvmOptions() != null) {
JvmOptions jvmOptions = new JvmOptions();
jvmOptions.setMaxHeapSize(algorParamReq.getJvmOptions().getMaxHeapSize());
paramRequest.setJvmOptions(jvmOptions);
}
Set<HpcParameter> hpcParameters = algorParamReq.getHpcParameters();
if (hpcParameters != null) {
Set<edu.pitt.dbmi.ccd.rest.client.dto.algo.HpcParameter> hpcParams = new HashSet<>();
for (HpcParameter param : hpcParameters) {
edu.pitt.dbmi.ccd.rest.client.dto.algo.HpcParameter hpcParam = new edu.pitt.dbmi.ccd.rest.client.dto.algo.HpcParameter();
hpcParam.setKey(param.getKey());
hpcParam.setValue(param.getValue());
hpcParams.add(hpcParam);
LOGGER.debug("HpcParameter: " + hpcParam.getKey() + " : " + hpcParam.getValue());
}
paramRequest.setHpcParameters(hpcParams);
}
// Submit a job
JobQueueService jobQueueService = hpcAccountService.getJobQueueService();
JobInfo jobInfo = jobQueueService.addToRemoteQueue(paramRequest, HpcAccountUtils.getJsonWebToken(hpcAccountManager, hpcAccount));
// Log the job submission
hpcJobInfo.setSubmittedTime(new Date(System.currentTimeMillis()));
// Submitted
hpcJobInfo.setStatus(0);
hpcJobInfo.setPid(jobInfo.getId());
hpcJobInfo.setResultFileName(jobInfo.getResultFileName());
hpcJobInfo.setResultJsonFileName(jobInfo.getResultJsonFileName());
hpcJobInfo.setErrorResultFileName(jobInfo.getErrorResultFileName());
hpcJobManager.updateHpcJobInfo(hpcJobInfo);
log = "Submitted job to " + hpcAccount.getConnectionName();
LOGGER.debug(log);
hpcJobManager.logHpcJobLogDetail(hpcJobLog, 0, log);
LOGGER.debug("HpcJobPreProcessTask: HpcJobInfo: id : " + hpcJobInfo.getId() + " : pid : " + hpcJobInfo.getPid() + " : " + hpcJobInfo.getAlgoId() + hpcJobInfo.getAlgorithmParamRequest().getTestId() == null ? "" : " : " + hpcJobInfo.getAlgorithmParamRequest().getTestId() + hpcJobInfo.getAlgorithmParamRequest().getScoreId() == null ? "" : " : " + hpcJobInfo.getAlgorithmParamRequest().getScoreId() + " : " + hpcJobInfo.getResultFileName());
hpcJobManager.addNewSubmittedHpcJob(hpcJobInfo);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
Aggregations