Search in sources :

Example 21 with ProcessContext

use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.

the class BESJobSubmissionTask method copyOutputFilesToStorage.

private void copyOutputFilesToStorage(TaskContext taskContext, List<OutputDataObjectType> copyOutput) throws GFacException {
    ProcessContext pc = taskContext.getParentProcessContext();
    String remoteFilePath = null, fileName = null, localFilePath = null;
    try {
        authenticationInfo = Factory.getStorageSSHKeyAuthentication(pc);
        ServerInfo serverInfo = pc.getComputeResourceServerInfo();
        Session sshSession = Factory.getSSHSession(authenticationInfo, serverInfo);
        for (OutputDataObjectType output : copyOutput) {
            switch(output.getType()) {
                case STDERR:
                case STDOUT:
                case STRING:
                case URI:
                    localFilePath = output.getValue();
                    if (localFilePath.contains("://")) {
                        localFilePath = localFilePath.substring(localFilePath.indexOf("://") + 2, localFilePath.length());
                    }
                    fileName = localFilePath.substring(localFilePath.lastIndexOf("/") + 1);
                    URI destinationURI = TaskUtils.getDestinationURI(taskContext, hostName, inputPath, fileName);
                    remoteFilePath = destinationURI.getPath();
                    log.info("SCP local file :{} -> from remote :{}", localFilePath, remoteFilePath);
                    SSHUtils.scpTo(localFilePath, remoteFilePath, sshSession);
                    output.setValue(destinationURI.toString());
                    break;
                default:
                    break;
            }
        }
    } catch (IOException | JSchException | SSHApiException | URISyntaxException | CredentialStoreException e) {
        log.error("Error while coping local file " + localFilePath + " to remote " + remoteFilePath, e);
        throw new GFacException("Error while scp output files to remote storage file location", e);
    }
}
Also used : JSchException(com.jcraft.jsch.JSchException) ServerInfo(org.apache.airavata.gfac.core.cluster.ServerInfo) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) CredentialStoreException(org.apache.airavata.credential.store.store.CredentialStoreException) URI(java.net.URI) SSHApiException(org.apache.airavata.gfac.core.SSHApiException) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext) OutputDataObjectType(org.apache.airavata.model.application.io.OutputDataObjectType) GFacException(org.apache.airavata.gfac.core.GFacException) Session(com.jcraft.jsch.Session)

Example 22 with ProcessContext

use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.

the class DefaultJobSubmissionTask method recover.

@Override
public TaskStatus recover(TaskContext taskContext) {
    ProcessContext processContext = taskContext.getParentProcessContext();
    JobModel jobModel = processContext.getJobModel();
    // original job failed before submitting
    if (jobModel == null || jobModel.getJobId() == null) {
        return execute(taskContext);
    } else {
        // job is already submitted and monitor should handle the recovery
        return new TaskStatus(TaskState.COMPLETED);
    }
}
Also used : JobModel(org.apache.airavata.model.job.JobModel) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext)

Example 23 with ProcessContext

use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.

the class DefaultJobSubmissionTask method execute.

@Override
public TaskStatus execute(TaskContext taskContext) {
    // set to completed.
    TaskStatus taskStatus = new TaskStatus(TaskState.COMPLETED);
    try {
        ProcessContext processContext = taskContext.getParentProcessContext();
        JobModel jobModel = processContext.getJobModel();
        jobModel.setTaskId(taskContext.getTaskId());
        RemoteCluster remoteCluster = processContext.getJobSubmissionRemoteCluster();
        GroovyMap groovyMap = GFacUtils.createGroovyMap(processContext, taskContext);
        groovyMap.getStringValue(Script.JOB_NAME).ifPresent(jobName -> jobModel.setJobName(jobName));
        ResourceJobManager resourceJobManager = GFacUtils.getResourceJobManager(processContext);
        JobManagerConfiguration jConfig = null;
        if (resourceJobManager != null) {
            jConfig = Factory.getJobManagerConfiguration(resourceJobManager);
        }
        JobStatus jobStatus = new JobStatus();
        File jobFile = GFacUtils.createJobFile(groovyMap, taskContext, jConfig);
        if (jobFile != null && jobFile.exists()) {
            jobModel.setJobDescription(FileUtils.readFileToString(jobFile));
            JobSubmissionOutput jobSubmissionOutput = remoteCluster.submitBatchJob(jobFile.getPath(), processContext.getWorkingDir());
            int exitCode = jobSubmissionOutput.getExitCode();
            jobModel.setExitCode(exitCode);
            jobModel.setStdErr(jobSubmissionOutput.getStdErr());
            jobModel.setStdOut(jobSubmissionOutput.getStdOut());
            String jobId = jobSubmissionOutput.getJobId();
            String experimentId = taskContext.getExperimentId();
            if (exitCode != 0 || jobSubmissionOutput.isJobSubmissionFailed()) {
                jobModel.setJobId(DEFAULT_JOB_ID);
                if (jobSubmissionOutput.isJobSubmissionFailed()) {
                    List<JobStatus> statusList = new ArrayList<>();
                    statusList.add(new JobStatus(JobState.FAILED));
                    statusList.get(0).setReason(jobSubmissionOutput.getFailureReason());
                    jobModel.setJobStatuses(statusList);
                    GFacUtils.saveJobModel(processContext, jobModel);
                    log.error("expId: {}, processid: {}, taskId: {} :- Job submission failed for job name {}", experimentId, taskContext.getProcessId(), taskContext.getTaskId(), jobModel.getJobName());
                    ErrorModel errorModel = new ErrorModel();
                    errorModel.setUserFriendlyMessage(jobSubmissionOutput.getFailureReason());
                    errorModel.setActualErrorMessage(jobSubmissionOutput.getFailureReason());
                    GFacUtils.saveExperimentError(processContext, errorModel);
                    GFacUtils.saveProcessError(processContext, errorModel);
                    GFacUtils.saveTaskError(taskContext, errorModel);
                    taskStatus.setState(TaskState.FAILED);
                    taskStatus.setReason("Job submission command didn't return a jobId");
                    taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
                    taskContext.setTaskStatus(taskStatus);
                } else {
                    String msg;
                    GFacUtils.saveJobModel(processContext, jobModel);
                    ErrorModel errorModel = new ErrorModel();
                    if (exitCode != Integer.MIN_VALUE) {
                        msg = "expId:" + processContext.getProcessModel().getExperimentId() + ", processId:" + processContext.getProcessId() + ", taskId: " + taskContext.getTaskId() + " return non zero exit code:" + exitCode + "  for JobName:" + jobModel.getJobName() + ", with failure reason : " + jobSubmissionOutput.getFailureReason() + " Hence changing job state to Failed.";
                        errorModel.setActualErrorMessage(jobSubmissionOutput.getFailureReason());
                    } else {
                        msg = "expId:" + processContext.getProcessModel().getExperimentId() + ", processId:" + processContext.getProcessId() + ", taskId: " + taskContext.getTaskId() + " doesn't  return valid job submission exit code for JobName:" + jobModel.getJobName() + ", with failure reason : stdout ->" + jobSubmissionOutput.getStdOut() + " stderr -> " + jobSubmissionOutput.getStdErr() + " Hence changing job state to Failed.";
                        errorModel.setActualErrorMessage(msg);
                    }
                    log.error(msg);
                    errorModel.setUserFriendlyMessage(msg);
                    GFacUtils.saveExperimentError(processContext, errorModel);
                    GFacUtils.saveProcessError(processContext, errorModel);
                    GFacUtils.saveTaskError(taskContext, errorModel);
                    taskStatus.setState(TaskState.FAILED);
                    taskStatus.setReason(msg);
                    taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
                    taskContext.setTaskStatus(taskStatus);
                }
                try {
                    GFacUtils.saveAndPublishTaskStatus(taskContext);
                } catch (GFacException e) {
                    log.error("Error while saving task status", e);
                }
                return taskStatus;
            } else if (jobId != null && !jobId.isEmpty()) {
                jobModel.setJobId(jobId);
                GFacUtils.saveJobModel(processContext, jobModel);
                jobStatus.setJobState(JobState.SUBMITTED);
                ComputeResourceDescription computeResourceDescription = taskContext.getParentProcessContext().getComputeResourceDescription();
                jobStatus.setReason("Successfully Submitted to " + computeResourceDescription.getHostName());
                jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
                jobModel.setJobStatuses(Arrays.asList(jobStatus));
                GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
                if (verifyJobSubmissionByJobId(remoteCluster, jobId)) {
                    jobStatus.setJobState(JobState.QUEUED);
                    jobStatus.setReason("Verification step succeeded");
                    jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
                    jobModel.setJobStatuses(Arrays.asList(jobStatus));
                    GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
                }
                // doing gateway reporting
                if (computeResourceDescription.isGatewayUsageReporting()) {
                    String loadCommand = computeResourceDescription.getGatewayUsageModuleLoadCommand();
                    String usageExecutable = computeResourceDescription.getGatewayUsageExecutable();
                    ExperimentModel experiment = (ExperimentModel) taskContext.getParentProcessContext().getExperimentCatalog().get(ExperimentCatalogModelType.EXPERIMENT, experimentId);
                    String username = experiment.getUserName() + "@" + taskContext.getParentProcessContext().getUsageReportingGatewayId();
                    RawCommandInfo rawCommandInfo = new RawCommandInfo(loadCommand + " && " + usageExecutable + " -gateway_user " + username + " -submit_time \"`date '+%F %T %:z'`\"  -jobid " + jobId);
                    remoteCluster.execute(rawCommandInfo);
                }
                taskStatus = new TaskStatus(TaskState.COMPLETED);
                taskStatus.setReason("Submitted job to compute resource");
                taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            } else {
                int verificationTryCount = 0;
                while (verificationTryCount++ < 3) {
                    String verifyJobId = verifyJobSubmission(remoteCluster, jobModel);
                    if (verifyJobId != null && !verifyJobId.isEmpty()) {
                        // JobStatus either changed from SUBMITTED to QUEUED or directly to QUEUED
                        jobId = verifyJobId;
                        jobModel.setJobId(jobId);
                        GFacUtils.saveJobModel(processContext, jobModel);
                        jobStatus.setJobState(JobState.QUEUED);
                        jobStatus.setReason("Verification step succeeded");
                        jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
                        jobModel.setJobStatuses(Arrays.asList(jobStatus));
                        GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
                        taskStatus.setState(TaskState.COMPLETED);
                        taskStatus.setReason("Submitted job to compute resource");
                        taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
                        break;
                    }
                    log.info("Verify step return invalid jobId, retry verification step in {} secs", verificationTryCount * 10);
                    Thread.sleep(verificationTryCount * 10000);
                }
            }
            if (jobId == null || jobId.isEmpty()) {
                jobModel.setJobId(DEFAULT_JOB_ID);
                GFacUtils.saveJobModel(processContext, jobModel);
                String msg = "expId:" + processContext.getProcessModel().getExperimentId() + " Couldn't find " + "remote jobId for JobName:" + jobModel.getJobName() + ", both submit and verify steps " + "doesn't return a valid JobId. " + "Hence changing experiment state to Failed";
                log.error(msg);
                ErrorModel errorModel = new ErrorModel();
                errorModel.setUserFriendlyMessage(msg);
                errorModel.setActualErrorMessage(msg);
                GFacUtils.saveExperimentError(processContext, errorModel);
                GFacUtils.saveProcessError(processContext, errorModel);
                GFacUtils.saveTaskError(taskContext, errorModel);
                taskStatus.setState(TaskState.FAILED);
                taskStatus.setReason("Couldn't find job id in both submitted and verified steps");
                taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            } else {
                GFacUtils.saveJobModel(processContext, jobModel);
            }
        } else {
            taskStatus.setState(TaskState.FAILED);
            if (jobFile == null) {
                taskStatus.setReason("JobFile is null");
            } else {
                taskStatus.setReason("Job file doesn't exist");
            }
        }
    } catch (AppCatalogException e) {
        String msg = "Error while instantiating app catalog";
        log.error(msg, e);
        taskStatus.setState(TaskState.FAILED);
        taskStatus.setReason(msg);
        taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
        ErrorModel errorModel = new ErrorModel();
        errorModel.setActualErrorMessage(e.getMessage());
        errorModel.setUserFriendlyMessage(msg);
        taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
    } catch (ApplicationSettingsException e) {
        String msg = "Error occurred while creating job descriptor";
        log.error(msg, e);
        taskStatus.setState(TaskState.FAILED);
        taskStatus.setReason(msg);
        taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
        ErrorModel errorModel = new ErrorModel();
        errorModel.setActualErrorMessage(e.getMessage());
        errorModel.setUserFriendlyMessage(msg);
        taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
    } catch (GFacException e) {
        String msg = "Error occurred while submitting the job";
        log.error(msg, e);
        taskStatus.setState(TaskState.FAILED);
        taskStatus.setReason(msg);
        taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
        ErrorModel errorModel = new ErrorModel();
        errorModel.setActualErrorMessage(e.getMessage());
        errorModel.setUserFriendlyMessage(msg);
        taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
    } catch (IOException e) {
        String msg = "Error while reading the content of the job file";
        log.error(msg, e);
        taskStatus.setState(TaskState.FAILED);
        taskStatus.setReason(msg);
        taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
        ErrorModel errorModel = new ErrorModel();
        errorModel.setActualErrorMessage(e.getMessage());
        errorModel.setUserFriendlyMessage(msg);
        taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
    } catch (InterruptedException e) {
        String msg = "Error occurred while verifying the job submission";
        log.error(msg, e);
        taskStatus.setState(TaskState.FAILED);
        taskStatus.setReason(msg);
        taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
        ErrorModel errorModel = new ErrorModel();
        errorModel.setActualErrorMessage(e.getMessage());
        errorModel.setUserFriendlyMessage(msg);
        taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
    } catch (Throwable e) {
        String msg = "JobSubmission failed";
        log.error(msg, e);
        taskStatus.setState(TaskState.FAILED);
        taskStatus.setReason(msg);
        taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
        ErrorModel errorModel = new ErrorModel();
        errorModel.setActualErrorMessage(e.getMessage());
        errorModel.setUserFriendlyMessage(msg);
        taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
    }
    taskContext.setTaskStatus(taskStatus);
    try {
        GFacUtils.saveAndPublishTaskStatus(taskContext);
    } catch (GFacException e) {
        log.error("Error while saving task status", e);
    }
    return taskStatus;
}
Also used : ApplicationSettingsException(org.apache.airavata.common.exception.ApplicationSettingsException) ComputeResourceDescription(org.apache.airavata.model.appcatalog.computeresource.ComputeResourceDescription) RemoteCluster(org.apache.airavata.gfac.core.cluster.RemoteCluster) ArrayList(java.util.ArrayList) ExperimentModel(org.apache.airavata.model.experiment.ExperimentModel) IOException(java.io.IOException) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext) JobSubmissionOutput(org.apache.airavata.gfac.core.cluster.JobSubmissionOutput) AppCatalogException(org.apache.airavata.registry.cpi.AppCatalogException) ResourceJobManager(org.apache.airavata.model.appcatalog.computeresource.ResourceJobManager) ErrorModel(org.apache.airavata.model.commons.ErrorModel) JobModel(org.apache.airavata.model.job.JobModel) File(java.io.File) RawCommandInfo(org.apache.airavata.gfac.core.cluster.RawCommandInfo)

Example 24 with ProcessContext

use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.

the class GFacUtils method saveAndPublishTaskStatus.

public static void saveAndPublishTaskStatus(TaskContext taskContext) throws GFacException {
    try {
        TaskState state = taskContext.getTaskState();
        // first we save job jobModel to the registry for sa and then save the job status.
        ProcessContext processContext = taskContext.getParentProcessContext();
        ExperimentCatalog experimentCatalog = processContext.getExperimentCatalog();
        TaskStatus status = taskContext.getTaskStatus();
        if (status.getTimeOfStateChange() == 0 || status.getTimeOfStateChange() > 0) {
            status.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
        } else {
            status.setTimeOfStateChange(status.getTimeOfStateChange());
        }
        experimentCatalog.add(ExpCatChildDataType.TASK_STATUS, status, taskContext.getTaskId());
        TaskIdentifier identifier = new TaskIdentifier(taskContext.getTaskId(), processContext.getProcessId(), processContext.getProcessModel().getExperimentId(), processContext.getGatewayId());
        TaskStatusChangeEvent taskStatusChangeEvent = new TaskStatusChangeEvent(state, identifier);
        MessageContext msgCtx = new MessageContext(taskStatusChangeEvent, MessageType.TASK, AiravataUtils.getId(MessageType.TASK.name()), taskContext.getParentProcessContext().getGatewayId());
        msgCtx.setUpdatedTime(AiravataUtils.getCurrentTimestamp());
        processContext.getStatusPublisher().publish(msgCtx);
    } catch (Exception e) {
        throw new GFacException("Error persisting task status" + e.getLocalizedMessage(), e);
    }
}
Also used : MessageContext(org.apache.airavata.messaging.core.MessageContext) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext) URISyntaxException(java.net.URISyntaxException) TException(org.apache.thrift.TException) UnknownHostException(java.net.UnknownHostException) ApplicationSettingsException(org.apache.airavata.common.exception.ApplicationSettingsException)

Aggregations

ProcessContext (org.apache.airavata.gfac.core.context.ProcessContext)24 GFacException (org.apache.airavata.gfac.core.GFacException)15 JobModel (org.apache.airavata.model.job.JobModel)11 TaskStatus (org.apache.airavata.model.status.TaskStatus)11 ErrorModel (org.apache.airavata.model.commons.ErrorModel)10 IOException (java.io.IOException)7 URISyntaxException (java.net.URISyntaxException)7 ApplicationSettingsException (org.apache.airavata.common.exception.ApplicationSettingsException)7 TException (org.apache.thrift.TException)7 AiravataException (org.apache.airavata.common.exception.AiravataException)5 CredentialStoreException (org.apache.airavata.credential.store.store.CredentialStoreException)5 RemoteCluster (org.apache.airavata.gfac.core.cluster.RemoteCluster)5 OutputDataObjectType (org.apache.airavata.model.application.io.OutputDataObjectType)5 JobStatus (org.apache.airavata.model.status.JobStatus)5 AppCatalogException (org.apache.airavata.registry.cpi.AppCatalogException)5 JSchException (com.jcraft.jsch.JSchException)4 Session (com.jcraft.jsch.Session)4 File (java.io.File)4 URI (java.net.URI)4 JobSubmissionTask (org.apache.airavata.gfac.core.task.JobSubmissionTask)4