use of org.apache.airavata.gfac.core.cluster.RemoteCluster in project airavata by apache.
the class LocalJobSubmissionTask method execute.
@Override
public TaskStatus execute(TaskContext taskContext) {
TaskStatus taskStatus = new TaskStatus(TaskState.CREATED);
try {
ProcessContext processContext = taskContext.getParentProcessContext();
JobModel jobModel = processContext.getJobModel();
jobModel.setTaskId(taskContext.getTaskId());
RemoteCluster remoteCluster = processContext.getJobSubmissionRemoteCluster();
GroovyMap groovyMap = GFacUtils.createGroovyMap(processContext, taskContext);
String jobId = AiravataUtils.getId("JOB_ID_");
jobModel.setJobName(groovyMap.get(Script.JOB_NAME).toString());
jobModel.setJobId(jobId);
ResourceJobManager resourceJobManager = GFacUtils.getResourceJobManager(processContext);
JobManagerConfiguration jConfig = null;
if (resourceJobManager != null) {
jConfig = Factory.getJobManagerConfiguration(resourceJobManager);
}
JobStatus jobStatus = new JobStatus();
File jobFile = GFacUtils.createJobFile(groovyMap, taskContext, jConfig);
if (jobFile != null && jobFile.exists()) {
jobModel.setJobDescription(FileUtils.readFileToString(jobFile));
GFacUtils.saveJobModel(processContext, jobModel);
JobSubmissionOutput jobSubmissionOutput = remoteCluster.submitBatchJob(jobFile.getPath(), processContext.getWorkingDir());
jobStatus.setJobState(JobState.SUBMITTED);
jobStatus.setReason("Successfully Submitted to " + taskContext.getParentProcessContext().getComputeResourceDescription().getHostName());
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
jobModel.setJobStatuses(Arrays.asList(jobStatus));
// log job submit status
GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
// for local, job gets completed synchronously
// so changing job status to complete
jobModel.setExitCode(jobSubmissionOutput.getExitCode());
jobModel.setStdErr(jobSubmissionOutput.getStdErr());
jobModel.setStdOut(jobSubmissionOutput.getStdOut());
jobModel.setJobId(jobId);
jobStatus.setJobState(JobState.COMPLETE);
jobStatus.setReason("Successfully Completed " + taskContext.getParentProcessContext().getComputeResourceDescription().getHostName());
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
jobModel.setJobStatuses(Arrays.asList(jobStatus));
// log job complete status
GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
taskStatus = new TaskStatus(TaskState.COMPLETED);
taskStatus.setReason("Submitted job to compute resource");
} else {
taskStatus.setState(TaskState.FAILED);
if (jobFile == null) {
taskStatus.setReason("JobFile is null");
} else {
taskStatus.setReason("Job file doesn't exist");
}
}
} catch (GFacException | IOException | AppCatalogException | ApplicationSettingsException e) {
String msg = "Error occurred while submitting a local job";
log.error(msg, e);
taskStatus.setReason(msg);
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
taskStatus.setState(TaskState.FAILED);
}
return taskStatus;
}
use of org.apache.airavata.gfac.core.cluster.RemoteCluster in project airavata by apache.
the class ArchiveTask method execute.
@Override
public TaskStatus execute(TaskContext taskContext) {
// implement archive logic with jscp
TaskStatus status = new TaskStatus(TaskState.EXECUTING);
ProcessContext processContext = taskContext.getParentProcessContext();
RemoteCluster remoteCluster = processContext.getJobSubmissionRemoteCluster();
AuthenticationInfo authenticationInfo = null;
DataStagingTaskModel subTaskModel = null;
try {
subTaskModel = (DataStagingTaskModel) ThriftUtils.getSubTaskModel(taskContext.getTaskModel());
} catch (TException e) {
String msg = "Error! Deserialization issue with SubTask Model";
log.error(msg, e);
status.setState(TaskState.FAILED);
status.setReason(msg);
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
return status;
}
try {
StorageResourceDescription storageResource = taskContext.getParentProcessContext().getStorageResource();
if (storageResource != null) {
hostName = storageResource.getHostName();
} else {
throw new GFacException("Storage Resource is null");
}
userName = processContext.getStorageResourceLoginUserName();
inputPath = processContext.getStorageFileSystemRootLocation();
inputPath = (inputPath.endsWith(File.separator) ? inputPath : inputPath + File.separator);
status = new TaskStatus(TaskState.COMPLETED);
Session srcSession = Factory.getSSHSession(Factory.getComputerResourceSSHKeyAuthentication(processContext), processContext.getComputeResourceServerInfo());
Session destSession = Factory.getSSHSession(Factory.getStorageSSHKeyAuthentication(processContext), processContext.getStorageResourceServerInfo());
URI sourceURI = new URI(subTaskModel.getSource());
URI destinationURI = null;
String workingDirName = null, path = null;
if (sourceURI.getPath().endsWith("/")) {
path = sourceURI.getPath().substring(0, sourceURI.getPath().length() - 1);
} else {
path = sourceURI.getPath();
}
workingDirName = path.substring(path.lastIndexOf(File.separator) + 1, path.length());
// tar working dir
// cd /Users/syodage/Desktop/temp/.. && tar -cvf path/workingDir.tar temp
String archiveTar = "archive.tar";
String resourceAbsTarFilePath = path + "/" + archiveTar;
CommandInfo commandInfo = new RawCommandInfo("cd " + path + " && tar -cvf " + resourceAbsTarFilePath + " ./* ");
// move tar to storage resource
remoteCluster.execute(commandInfo);
destinationURI = TaskUtils.getDestinationURI(taskContext, hostName, inputPath, archiveTar);
remoteCluster.scpThirdParty(resourceAbsTarFilePath, srcSession, destinationURI.getPath(), destSession, RemoteCluster.DIRECTION.FROM, true);
// delete tar in remote computer resource
commandInfo = new RawCommandInfo("rm " + resourceAbsTarFilePath);
remoteCluster.execute(commandInfo);
// untar file and delete tar in storage resource
String destPath = destinationURI.getPath();
String destParent = destPath.substring(0, destPath.lastIndexOf("/"));
String storageArchiveDir = "ARCHIVE";
commandInfo = new RawCommandInfo("cd " + destParent + " && mkdir " + storageArchiveDir + " && tar -xvf " + archiveTar + " -C " + storageArchiveDir + " && rm " + archiveTar + " && chmod 755 -R " + storageArchiveDir + "/*");
executeCommand(destSession, commandInfo, new StandardOutReader());
} catch (CredentialStoreException e) {
String msg = "Storage authentication issue, make sure you are passing valid credential token";
log.error(msg, e);
status.setState(TaskState.FAILED);
status.setReason(msg);
status.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (URISyntaxException | GFacException e) {
String msg = "Error! Archive task failed";
log.error(msg, e);
status.setState(TaskState.FAILED);
status.setReason(msg);
status.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
}
return status;
}
use of org.apache.airavata.gfac.core.cluster.RemoteCluster in project airavata by apache.
the class DefaultJobSubmissionTask method execute.
@Override
public TaskStatus execute(TaskContext taskContext) {
// set to completed.
TaskStatus taskStatus = new TaskStatus(TaskState.COMPLETED);
try {
ProcessContext processContext = taskContext.getParentProcessContext();
JobModel jobModel = processContext.getJobModel();
jobModel.setTaskId(taskContext.getTaskId());
RemoteCluster remoteCluster = processContext.getJobSubmissionRemoteCluster();
GroovyMap groovyMap = GFacUtils.createGroovyMap(processContext, taskContext);
groovyMap.getStringValue(Script.JOB_NAME).ifPresent(jobName -> jobModel.setJobName(jobName));
ResourceJobManager resourceJobManager = GFacUtils.getResourceJobManager(processContext);
JobManagerConfiguration jConfig = null;
if (resourceJobManager != null) {
jConfig = Factory.getJobManagerConfiguration(resourceJobManager);
}
JobStatus jobStatus = new JobStatus();
File jobFile = GFacUtils.createJobFile(groovyMap, taskContext, jConfig);
if (jobFile != null && jobFile.exists()) {
jobModel.setJobDescription(FileUtils.readFileToString(jobFile));
JobSubmissionOutput jobSubmissionOutput = remoteCluster.submitBatchJob(jobFile.getPath(), processContext.getWorkingDir());
int exitCode = jobSubmissionOutput.getExitCode();
jobModel.setExitCode(exitCode);
jobModel.setStdErr(jobSubmissionOutput.getStdErr());
jobModel.setStdOut(jobSubmissionOutput.getStdOut());
String jobId = jobSubmissionOutput.getJobId();
String experimentId = taskContext.getExperimentId();
if (exitCode != 0 || jobSubmissionOutput.isJobSubmissionFailed()) {
jobModel.setJobId(DEFAULT_JOB_ID);
if (jobSubmissionOutput.isJobSubmissionFailed()) {
List<JobStatus> statusList = new ArrayList<>();
statusList.add(new JobStatus(JobState.FAILED));
statusList.get(0).setReason(jobSubmissionOutput.getFailureReason());
jobModel.setJobStatuses(statusList);
GFacUtils.saveJobModel(processContext, jobModel);
log.error("expId: {}, processid: {}, taskId: {} :- Job submission failed for job name {}", experimentId, taskContext.getProcessId(), taskContext.getTaskId(), jobModel.getJobName());
ErrorModel errorModel = new ErrorModel();
errorModel.setUserFriendlyMessage(jobSubmissionOutput.getFailureReason());
errorModel.setActualErrorMessage(jobSubmissionOutput.getFailureReason());
GFacUtils.saveExperimentError(processContext, errorModel);
GFacUtils.saveProcessError(processContext, errorModel);
GFacUtils.saveTaskError(taskContext, errorModel);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason("Job submission command didn't return a jobId");
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
taskContext.setTaskStatus(taskStatus);
} else {
String msg;
GFacUtils.saveJobModel(processContext, jobModel);
ErrorModel errorModel = new ErrorModel();
if (exitCode != Integer.MIN_VALUE) {
msg = "expId:" + processContext.getProcessModel().getExperimentId() + ", processId:" + processContext.getProcessId() + ", taskId: " + taskContext.getTaskId() + " return non zero exit code:" + exitCode + " for JobName:" + jobModel.getJobName() + ", with failure reason : " + jobSubmissionOutput.getFailureReason() + " Hence changing job state to Failed.";
errorModel.setActualErrorMessage(jobSubmissionOutput.getFailureReason());
} else {
msg = "expId:" + processContext.getProcessModel().getExperimentId() + ", processId:" + processContext.getProcessId() + ", taskId: " + taskContext.getTaskId() + " doesn't return valid job submission exit code for JobName:" + jobModel.getJobName() + ", with failure reason : stdout ->" + jobSubmissionOutput.getStdOut() + " stderr -> " + jobSubmissionOutput.getStdErr() + " Hence changing job state to Failed.";
errorModel.setActualErrorMessage(msg);
}
log.error(msg);
errorModel.setUserFriendlyMessage(msg);
GFacUtils.saveExperimentError(processContext, errorModel);
GFacUtils.saveProcessError(processContext, errorModel);
GFacUtils.saveTaskError(taskContext, errorModel);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
taskContext.setTaskStatus(taskStatus);
}
try {
GFacUtils.saveAndPublishTaskStatus(taskContext);
} catch (GFacException e) {
log.error("Error while saving task status", e);
}
return taskStatus;
} else if (jobId != null && !jobId.isEmpty()) {
jobModel.setJobId(jobId);
GFacUtils.saveJobModel(processContext, jobModel);
jobStatus.setJobState(JobState.SUBMITTED);
ComputeResourceDescription computeResourceDescription = taskContext.getParentProcessContext().getComputeResourceDescription();
jobStatus.setReason("Successfully Submitted to " + computeResourceDescription.getHostName());
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
jobModel.setJobStatuses(Arrays.asList(jobStatus));
GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
if (verifyJobSubmissionByJobId(remoteCluster, jobId)) {
jobStatus.setJobState(JobState.QUEUED);
jobStatus.setReason("Verification step succeeded");
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
jobModel.setJobStatuses(Arrays.asList(jobStatus));
GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
}
// doing gateway reporting
if (computeResourceDescription.isGatewayUsageReporting()) {
String loadCommand = computeResourceDescription.getGatewayUsageModuleLoadCommand();
String usageExecutable = computeResourceDescription.getGatewayUsageExecutable();
ExperimentModel experiment = (ExperimentModel) taskContext.getParentProcessContext().getExperimentCatalog().get(ExperimentCatalogModelType.EXPERIMENT, experimentId);
String username = experiment.getUserName() + "@" + taskContext.getParentProcessContext().getUsageReportingGatewayId();
RawCommandInfo rawCommandInfo = new RawCommandInfo(loadCommand + " && " + usageExecutable + " -gateway_user " + username + " -submit_time \"`date '+%F %T %:z'`\" -jobid " + jobId);
remoteCluster.execute(rawCommandInfo);
}
taskStatus = new TaskStatus(TaskState.COMPLETED);
taskStatus.setReason("Submitted job to compute resource");
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
} else {
int verificationTryCount = 0;
while (verificationTryCount++ < 3) {
String verifyJobId = verifyJobSubmission(remoteCluster, jobModel);
if (verifyJobId != null && !verifyJobId.isEmpty()) {
// JobStatus either changed from SUBMITTED to QUEUED or directly to QUEUED
jobId = verifyJobId;
jobModel.setJobId(jobId);
GFacUtils.saveJobModel(processContext, jobModel);
jobStatus.setJobState(JobState.QUEUED);
jobStatus.setReason("Verification step succeeded");
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
jobModel.setJobStatuses(Arrays.asList(jobStatus));
GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
taskStatus.setState(TaskState.COMPLETED);
taskStatus.setReason("Submitted job to compute resource");
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
break;
}
log.info("Verify step return invalid jobId, retry verification step in {} secs", verificationTryCount * 10);
Thread.sleep(verificationTryCount * 10000);
}
}
if (jobId == null || jobId.isEmpty()) {
jobModel.setJobId(DEFAULT_JOB_ID);
GFacUtils.saveJobModel(processContext, jobModel);
String msg = "expId:" + processContext.getProcessModel().getExperimentId() + " Couldn't find " + "remote jobId for JobName:" + jobModel.getJobName() + ", both submit and verify steps " + "doesn't return a valid JobId. " + "Hence changing experiment state to Failed";
log.error(msg);
ErrorModel errorModel = new ErrorModel();
errorModel.setUserFriendlyMessage(msg);
errorModel.setActualErrorMessage(msg);
GFacUtils.saveExperimentError(processContext, errorModel);
GFacUtils.saveProcessError(processContext, errorModel);
GFacUtils.saveTaskError(taskContext, errorModel);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason("Couldn't find job id in both submitted and verified steps");
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
} else {
GFacUtils.saveJobModel(processContext, jobModel);
}
} else {
taskStatus.setState(TaskState.FAILED);
if (jobFile == null) {
taskStatus.setReason("JobFile is null");
} else {
taskStatus.setReason("Job file doesn't exist");
}
}
} catch (AppCatalogException e) {
String msg = "Error while instantiating app catalog";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (ApplicationSettingsException e) {
String msg = "Error occurred while creating job descriptor";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (GFacException e) {
String msg = "Error occurred while submitting the job";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (IOException e) {
String msg = "Error while reading the content of the job file";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (InterruptedException e) {
String msg = "Error occurred while verifying the job submission";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (Throwable e) {
String msg = "JobSubmission failed";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
}
taskContext.setTaskStatus(taskStatus);
try {
GFacUtils.saveAndPublishTaskStatus(taskContext);
} catch (GFacException e) {
log.error("Error while saving task status", e);
}
return taskStatus;
}
Aggregations