use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.
the class BESJobSubmissionTask method copyOutputFilesToStorage.
private void copyOutputFilesToStorage(TaskContext taskContext, List<OutputDataObjectType> copyOutput) throws GFacException {
ProcessContext pc = taskContext.getParentProcessContext();
String remoteFilePath = null, fileName = null, localFilePath = null;
try {
authenticationInfo = Factory.getStorageSSHKeyAuthentication(pc);
ServerInfo serverInfo = pc.getComputeResourceServerInfo();
Session sshSession = Factory.getSSHSession(authenticationInfo, serverInfo);
for (OutputDataObjectType output : copyOutput) {
switch(output.getType()) {
case STDERR:
case STDOUT:
case STRING:
case URI:
localFilePath = output.getValue();
if (localFilePath.contains("://")) {
localFilePath = localFilePath.substring(localFilePath.indexOf("://") + 2, localFilePath.length());
}
fileName = localFilePath.substring(localFilePath.lastIndexOf("/") + 1);
URI destinationURI = TaskUtils.getDestinationURI(taskContext, hostName, inputPath, fileName);
remoteFilePath = destinationURI.getPath();
log.info("SCP local file :{} -> from remote :{}", localFilePath, remoteFilePath);
SSHUtils.scpTo(localFilePath, remoteFilePath, sshSession);
output.setValue(destinationURI.toString());
break;
default:
break;
}
}
} catch (IOException | JSchException | SSHApiException | URISyntaxException | CredentialStoreException e) {
log.error("Error while coping local file " + localFilePath + " to remote " + remoteFilePath, e);
throw new GFacException("Error while scp output files to remote storage file location", e);
}
}
use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.
the class DefaultJobSubmissionTask method recover.
@Override
public TaskStatus recover(TaskContext taskContext) {
ProcessContext processContext = taskContext.getParentProcessContext();
JobModel jobModel = processContext.getJobModel();
// original job failed before submitting
if (jobModel == null || jobModel.getJobId() == null) {
return execute(taskContext);
} else {
// job is already submitted and monitor should handle the recovery
return new TaskStatus(TaskState.COMPLETED);
}
}
use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.
the class DefaultJobSubmissionTask method execute.
@Override
public TaskStatus execute(TaskContext taskContext) {
// set to completed.
TaskStatus taskStatus = new TaskStatus(TaskState.COMPLETED);
try {
ProcessContext processContext = taskContext.getParentProcessContext();
JobModel jobModel = processContext.getJobModel();
jobModel.setTaskId(taskContext.getTaskId());
RemoteCluster remoteCluster = processContext.getJobSubmissionRemoteCluster();
GroovyMap groovyMap = GFacUtils.createGroovyMap(processContext, taskContext);
groovyMap.getStringValue(Script.JOB_NAME).ifPresent(jobName -> jobModel.setJobName(jobName));
ResourceJobManager resourceJobManager = GFacUtils.getResourceJobManager(processContext);
JobManagerConfiguration jConfig = null;
if (resourceJobManager != null) {
jConfig = Factory.getJobManagerConfiguration(resourceJobManager);
}
JobStatus jobStatus = new JobStatus();
File jobFile = GFacUtils.createJobFile(groovyMap, taskContext, jConfig);
if (jobFile != null && jobFile.exists()) {
jobModel.setJobDescription(FileUtils.readFileToString(jobFile));
JobSubmissionOutput jobSubmissionOutput = remoteCluster.submitBatchJob(jobFile.getPath(), processContext.getWorkingDir());
int exitCode = jobSubmissionOutput.getExitCode();
jobModel.setExitCode(exitCode);
jobModel.setStdErr(jobSubmissionOutput.getStdErr());
jobModel.setStdOut(jobSubmissionOutput.getStdOut());
String jobId = jobSubmissionOutput.getJobId();
String experimentId = taskContext.getExperimentId();
if (exitCode != 0 || jobSubmissionOutput.isJobSubmissionFailed()) {
jobModel.setJobId(DEFAULT_JOB_ID);
if (jobSubmissionOutput.isJobSubmissionFailed()) {
List<JobStatus> statusList = new ArrayList<>();
statusList.add(new JobStatus(JobState.FAILED));
statusList.get(0).setReason(jobSubmissionOutput.getFailureReason());
jobModel.setJobStatuses(statusList);
GFacUtils.saveJobModel(processContext, jobModel);
log.error("expId: {}, processid: {}, taskId: {} :- Job submission failed for job name {}", experimentId, taskContext.getProcessId(), taskContext.getTaskId(), jobModel.getJobName());
ErrorModel errorModel = new ErrorModel();
errorModel.setUserFriendlyMessage(jobSubmissionOutput.getFailureReason());
errorModel.setActualErrorMessage(jobSubmissionOutput.getFailureReason());
GFacUtils.saveExperimentError(processContext, errorModel);
GFacUtils.saveProcessError(processContext, errorModel);
GFacUtils.saveTaskError(taskContext, errorModel);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason("Job submission command didn't return a jobId");
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
taskContext.setTaskStatus(taskStatus);
} else {
String msg;
GFacUtils.saveJobModel(processContext, jobModel);
ErrorModel errorModel = new ErrorModel();
if (exitCode != Integer.MIN_VALUE) {
msg = "expId:" + processContext.getProcessModel().getExperimentId() + ", processId:" + processContext.getProcessId() + ", taskId: " + taskContext.getTaskId() + " return non zero exit code:" + exitCode + " for JobName:" + jobModel.getJobName() + ", with failure reason : " + jobSubmissionOutput.getFailureReason() + " Hence changing job state to Failed.";
errorModel.setActualErrorMessage(jobSubmissionOutput.getFailureReason());
} else {
msg = "expId:" + processContext.getProcessModel().getExperimentId() + ", processId:" + processContext.getProcessId() + ", taskId: " + taskContext.getTaskId() + " doesn't return valid job submission exit code for JobName:" + jobModel.getJobName() + ", with failure reason : stdout ->" + jobSubmissionOutput.getStdOut() + " stderr -> " + jobSubmissionOutput.getStdErr() + " Hence changing job state to Failed.";
errorModel.setActualErrorMessage(msg);
}
log.error(msg);
errorModel.setUserFriendlyMessage(msg);
GFacUtils.saveExperimentError(processContext, errorModel);
GFacUtils.saveProcessError(processContext, errorModel);
GFacUtils.saveTaskError(taskContext, errorModel);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
taskContext.setTaskStatus(taskStatus);
}
try {
GFacUtils.saveAndPublishTaskStatus(taskContext);
} catch (GFacException e) {
log.error("Error while saving task status", e);
}
return taskStatus;
} else if (jobId != null && !jobId.isEmpty()) {
jobModel.setJobId(jobId);
GFacUtils.saveJobModel(processContext, jobModel);
jobStatus.setJobState(JobState.SUBMITTED);
ComputeResourceDescription computeResourceDescription = taskContext.getParentProcessContext().getComputeResourceDescription();
jobStatus.setReason("Successfully Submitted to " + computeResourceDescription.getHostName());
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
jobModel.setJobStatuses(Arrays.asList(jobStatus));
GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
if (verifyJobSubmissionByJobId(remoteCluster, jobId)) {
jobStatus.setJobState(JobState.QUEUED);
jobStatus.setReason("Verification step succeeded");
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
jobModel.setJobStatuses(Arrays.asList(jobStatus));
GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
}
// doing gateway reporting
if (computeResourceDescription.isGatewayUsageReporting()) {
String loadCommand = computeResourceDescription.getGatewayUsageModuleLoadCommand();
String usageExecutable = computeResourceDescription.getGatewayUsageExecutable();
ExperimentModel experiment = (ExperimentModel) taskContext.getParentProcessContext().getExperimentCatalog().get(ExperimentCatalogModelType.EXPERIMENT, experimentId);
String username = experiment.getUserName() + "@" + taskContext.getParentProcessContext().getUsageReportingGatewayId();
RawCommandInfo rawCommandInfo = new RawCommandInfo(loadCommand + " && " + usageExecutable + " -gateway_user " + username + " -submit_time \"`date '+%F %T %:z'`\" -jobid " + jobId);
remoteCluster.execute(rawCommandInfo);
}
taskStatus = new TaskStatus(TaskState.COMPLETED);
taskStatus.setReason("Submitted job to compute resource");
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
} else {
int verificationTryCount = 0;
while (verificationTryCount++ < 3) {
String verifyJobId = verifyJobSubmission(remoteCluster, jobModel);
if (verifyJobId != null && !verifyJobId.isEmpty()) {
// JobStatus either changed from SUBMITTED to QUEUED or directly to QUEUED
jobId = verifyJobId;
jobModel.setJobId(jobId);
GFacUtils.saveJobModel(processContext, jobModel);
jobStatus.setJobState(JobState.QUEUED);
jobStatus.setReason("Verification step succeeded");
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
jobModel.setJobStatuses(Arrays.asList(jobStatus));
GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
taskStatus.setState(TaskState.COMPLETED);
taskStatus.setReason("Submitted job to compute resource");
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
break;
}
log.info("Verify step return invalid jobId, retry verification step in {} secs", verificationTryCount * 10);
Thread.sleep(verificationTryCount * 10000);
}
}
if (jobId == null || jobId.isEmpty()) {
jobModel.setJobId(DEFAULT_JOB_ID);
GFacUtils.saveJobModel(processContext, jobModel);
String msg = "expId:" + processContext.getProcessModel().getExperimentId() + " Couldn't find " + "remote jobId for JobName:" + jobModel.getJobName() + ", both submit and verify steps " + "doesn't return a valid JobId. " + "Hence changing experiment state to Failed";
log.error(msg);
ErrorModel errorModel = new ErrorModel();
errorModel.setUserFriendlyMessage(msg);
errorModel.setActualErrorMessage(msg);
GFacUtils.saveExperimentError(processContext, errorModel);
GFacUtils.saveProcessError(processContext, errorModel);
GFacUtils.saveTaskError(taskContext, errorModel);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason("Couldn't find job id in both submitted and verified steps");
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
} else {
GFacUtils.saveJobModel(processContext, jobModel);
}
} else {
taskStatus.setState(TaskState.FAILED);
if (jobFile == null) {
taskStatus.setReason("JobFile is null");
} else {
taskStatus.setReason("Job file doesn't exist");
}
}
} catch (AppCatalogException e) {
String msg = "Error while instantiating app catalog";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (ApplicationSettingsException e) {
String msg = "Error occurred while creating job descriptor";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (GFacException e) {
String msg = "Error occurred while submitting the job";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (IOException e) {
String msg = "Error while reading the content of the job file";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (InterruptedException e) {
String msg = "Error occurred while verifying the job submission";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (Throwable e) {
String msg = "JobSubmission failed";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
}
taskContext.setTaskStatus(taskStatus);
try {
GFacUtils.saveAndPublishTaskStatus(taskContext);
} catch (GFacException e) {
log.error("Error while saving task status", e);
}
return taskStatus;
}
use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.
the class GFacUtils method saveAndPublishTaskStatus.
public static void saveAndPublishTaskStatus(TaskContext taskContext) throws GFacException {
try {
TaskState state = taskContext.getTaskState();
// first we save job jobModel to the registry for sa and then save the job status.
ProcessContext processContext = taskContext.getParentProcessContext();
ExperimentCatalog experimentCatalog = processContext.getExperimentCatalog();
TaskStatus status = taskContext.getTaskStatus();
if (status.getTimeOfStateChange() == 0 || status.getTimeOfStateChange() > 0) {
status.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
} else {
status.setTimeOfStateChange(status.getTimeOfStateChange());
}
experimentCatalog.add(ExpCatChildDataType.TASK_STATUS, status, taskContext.getTaskId());
TaskIdentifier identifier = new TaskIdentifier(taskContext.getTaskId(), processContext.getProcessId(), processContext.getProcessModel().getExperimentId(), processContext.getGatewayId());
TaskStatusChangeEvent taskStatusChangeEvent = new TaskStatusChangeEvent(state, identifier);
MessageContext msgCtx = new MessageContext(taskStatusChangeEvent, MessageType.TASK, AiravataUtils.getId(MessageType.TASK.name()), taskContext.getParentProcessContext().getGatewayId());
msgCtx.setUpdatedTime(AiravataUtils.getCurrentTimestamp());
processContext.getStatusPublisher().publish(msgCtx);
} catch (Exception e) {
throw new GFacException("Error persisting task status" + e.getLocalizedMessage(), e);
}
}
Aggregations