Search in sources :

Example 1 with RemoteCluster

use of org.apache.airavata.gfac.core.cluster.RemoteCluster in project airavata by apache.

the class Factory method getDataMovementRemoteCluster.

public static RemoteCluster getDataMovementRemoteCluster(ProcessContext processContext) throws GFacException, AiravataException, CredentialStoreException {
    String storageResourceId = processContext.getStorageResourceId();
    DataMovementProtocol dataMovementProtocol = processContext.getDataMovementProtocol();
    String key = new StringBuilder(processContext.getComputeResourceLoginUserName()).append(':').append(dataMovementProtocol.name()).append(':').append(storageResourceId).append(":").append(processContext.getStorageResourceCredentialToken()).toString();
    RemoteCluster remoteCluster = remoteClusterMap.get(key);
    if (remoteCluster == null) {
        JobManagerConfiguration jobManagerConfiguration = getJobManagerConfiguration(processContext.getResourceJobManager());
        if (dataMovementProtocol == DataMovementProtocol.LOCAL) {
            remoteCluster = new LocalRemoteCluster(processContext.getStorageResourceServerInfo(), jobManagerConfiguration, null);
        } else if (dataMovementProtocol == DataMovementProtocol.SCP) {
            remoteCluster = new HPCRemoteCluster(processContext.getStorageResourceServerInfo(), jobManagerConfiguration, Factory.getStorageSSHKeyAuthentication(processContext));
        } else {
            throw new GFacException("No remote cluster implementation map to job data movement protocol " + dataMovementProtocol.name());
        }
        remoteClusterMap.put(key, remoteCluster);
    } else {
        AuthenticationInfo authentication = remoteCluster.getAuthentication();
        if (authentication instanceof SSHKeyAuthentication) {
            SSHKeyAuthentication sshKeyAuthentication = (SSHKeyAuthentication) authentication;
            if (!sshKeyAuthentication.getUserName().equals(processContext.getStorageResourceLoginUserName())) {
                JobManagerConfiguration jobManagerConfiguration = getJobManagerConfiguration(processContext.getResourceJobManager());
                dataMovementProtocol = processContext.getDataMovementProtocol();
                if (dataMovementProtocol == DataMovementProtocol.SCP) {
                    remoteCluster = new HPCRemoteCluster(processContext.getStorageResourceServerInfo(), jobManagerConfiguration, Factory.getStorageSSHKeyAuthentication(processContext));
                }
            }
        }
    }
    return remoteCluster;
}
Also used : GFacException(org.apache.airavata.gfac.core.GFacException) JobManagerConfiguration(org.apache.airavata.gfac.core.JobManagerConfiguration) DataMovementProtocol(org.apache.airavata.model.data.movement.DataMovementProtocol) RemoteCluster(org.apache.airavata.gfac.core.cluster.RemoteCluster) SSHKeyAuthentication(org.apache.airavata.gfac.core.authentication.SSHKeyAuthentication) AuthenticationInfo(org.apache.airavata.gfac.core.authentication.AuthenticationInfo)

Example 2 with RemoteCluster

use of org.apache.airavata.gfac.core.cluster.RemoteCluster in project airavata by apache.

the class Factory method getJobSubmissionRemoteCluster.

/**
 * Factory class manage reomete cluster map, this will solve too many connections/ sessions issues with cluster
 * communications.
 * @param processContext
 * @return
 * @throws GFacException
 * @throws AppCatalogException
 * @throws AiravataException
 */
public static RemoteCluster getJobSubmissionRemoteCluster(ProcessContext processContext) throws GFacException, AppCatalogException, AiravataException, CredentialStoreException {
    String computeResourceId = processContext.getComputeResourceId();
    JobSubmissionProtocol jobSubmissionProtocol = processContext.getJobSubmissionProtocol();
    String key = new StringBuilder(processContext.getComputeResourceLoginUserName()).append(':').append(jobSubmissionProtocol.name()).append(':').append(computeResourceId).append(':').append(processContext.getComputeResourceCredentialToken()).toString();
    RemoteCluster remoteCluster = remoteClusterMap.get(key);
    if (remoteCluster == null) {
        JobManagerConfiguration jobManagerConfiguration = getJobManagerConfiguration(processContext.getResourceJobManager());
        if (jobSubmissionProtocol == JobSubmissionProtocol.LOCAL || jobSubmissionProtocol == JobSubmissionProtocol.LOCAL_FORK) {
            remoteCluster = new LocalRemoteCluster(processContext.getComputeResourceServerInfo(), jobManagerConfiguration, null);
        } else if (jobSubmissionProtocol == JobSubmissionProtocol.SSH || jobSubmissionProtocol == JobSubmissionProtocol.SSH_FORK || jobSubmissionProtocol == JobSubmissionProtocol.CLOUD) {
            remoteCluster = new HPCRemoteCluster(processContext.getComputeResourceServerInfo(), jobManagerConfiguration, Factory.getComputerResourceSSHKeyAuthentication(processContext));
        } else {
            throw new GFacException("No remote cluster implementation map to job submission protocol " + jobSubmissionProtocol.name());
        }
        remoteClusterMap.put(key, remoteCluster);
    } else {
        AuthenticationInfo authentication = remoteCluster.getAuthentication();
        if (authentication instanceof SSHKeyAuthentication) {
            SSHKeyAuthentication sshKeyAuthentication = (SSHKeyAuthentication) authentication;
            if (!sshKeyAuthentication.getUserName().equals(processContext.getComputeResourceLoginUserName())) {
                JobManagerConfiguration jobManagerConfiguration = getJobManagerConfiguration(processContext.getResourceJobManager());
                if (jobSubmissionProtocol == JobSubmissionProtocol.SSH || jobSubmissionProtocol == JobSubmissionProtocol.SSH_FORK) {
                    remoteCluster = new HPCRemoteCluster(processContext.getComputeResourceServerInfo(), jobManagerConfiguration, Factory.getComputerResourceSSHKeyAuthentication(processContext));
                }
            }
        }
    }
    return remoteCluster;
}
Also used : JobSubmissionProtocol(org.apache.airavata.model.appcatalog.computeresource.JobSubmissionProtocol) GFacException(org.apache.airavata.gfac.core.GFacException) JobManagerConfiguration(org.apache.airavata.gfac.core.JobManagerConfiguration) RemoteCluster(org.apache.airavata.gfac.core.cluster.RemoteCluster) SSHKeyAuthentication(org.apache.airavata.gfac.core.authentication.SSHKeyAuthentication) AuthenticationInfo(org.apache.airavata.gfac.core.authentication.AuthenticationInfo)

Example 3 with RemoteCluster

use of org.apache.airavata.gfac.core.cluster.RemoteCluster in project airavata by apache.

the class DefaultJobSubmissionTask method cancel.

@Override
public JobStatus cancel(TaskContext taskcontext) throws TaskException {
    ProcessContext processContext = taskcontext.getParentProcessContext();
    RemoteCluster remoteCluster = processContext.getJobSubmissionRemoteCluster();
    JobModel jobModel = processContext.getJobModel();
    int retryCount = 0;
    if (jobModel != null) {
        if (processContext.getProcessState() == ProcessState.EXECUTING) {
            while (jobModel.getJobId() == null) {
                log.info("Cancellation pause {} secs until process get jobId", pauseTimeInSec);
                try {
                    Thread.sleep(waitForProcessIdmillis);
                } catch (InterruptedException e) {
                // ignore
                }
            }
        }
        try {
            JobStatus oldJobStatus = remoteCluster.getJobStatus(jobModel.getJobId());
            while (oldJobStatus == null && retryCount <= 5) {
                retryCount++;
                Thread.sleep(retryCount * 1000);
                oldJobStatus = remoteCluster.getJobStatus(jobModel.getJobId());
            }
            if (oldJobStatus != null) {
                oldJobStatus = remoteCluster.cancelJob(jobModel.getJobId());
                return oldJobStatus;
            } else {
                throw new TaskException("Cancel operation failed, Job status couldn't find in resource, JobId " + jobModel.getJobId());
            }
        } catch (GFacException | InterruptedException e) {
            throw new TaskException("Error while cancelling job " + jobModel.getJobId(), e);
        }
    } else {
        throw new TaskException("Couldn't complete cancel operation, JobModel is null in ProcessContext.");
    }
}
Also used : TaskException(org.apache.airavata.gfac.core.task.TaskException) RemoteCluster(org.apache.airavata.gfac.core.cluster.RemoteCluster) JobModel(org.apache.airavata.model.job.JobModel) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext)

Example 4 with RemoteCluster

use of org.apache.airavata.gfac.core.cluster.RemoteCluster in project airavata by apache.

the class EnvironmentSetupTask method execute.

@Override
public TaskStatus execute(TaskContext taskContext) {
    TaskStatus status = new TaskStatus(TaskState.COMPLETED);
    try {
        RemoteCluster remoteCluster = taskContext.getParentProcessContext().getJobSubmissionRemoteCluster();
        remoteCluster.makeDirectory(taskContext.getParentProcessContext().getWorkingDir());
        status.setReason("Successfully created environment");
    } catch (GFacException e) {
        String msg = "Error while environment setup";
        log.error(msg, e);
        status.setState(TaskState.FAILED);
        status.setReason(msg);
        ErrorModel errorModel = new ErrorModel();
        errorModel.setActualErrorMessage(e.getMessage());
        errorModel.setUserFriendlyMessage(msg);
        taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
    }
    return status;
}
Also used : GFacException(org.apache.airavata.gfac.core.GFacException) RemoteCluster(org.apache.airavata.gfac.core.cluster.RemoteCluster) ErrorModel(org.apache.airavata.model.commons.ErrorModel) TaskStatus(org.apache.airavata.model.status.TaskStatus)

Example 5 with RemoteCluster

use of org.apache.airavata.gfac.core.cluster.RemoteCluster in project airavata by apache.

the class ForkJobSubmissionTask method execute.

@Override
public TaskStatus execute(TaskContext taskContext) {
    TaskStatus taskStatus = new TaskStatus(TaskState.CREATED);
    try {
        ProcessContext processContext = taskContext.getParentProcessContext();
        JobModel jobModel = processContext.getJobModel();
        jobModel.setTaskId(taskContext.getTaskId());
        RemoteCluster remoteCluster = processContext.getJobSubmissionRemoteCluster();
        GroovyMap groovyMap = GFacUtils.createGroovyMap(processContext, taskContext);
        jobModel.setJobName(groovyMap.get(Script.JOB_NAME).toString());
        ResourceJobManager resourceJobManager = GFacUtils.getResourceJobManager(processContext);
        JobManagerConfiguration jConfig = null;
        if (resourceJobManager != null) {
            jConfig = Factory.getJobManagerConfiguration(resourceJobManager);
        }
        JobStatus jobStatus = new JobStatus();
        File jobFile = GFacUtils.createJobFile(groovyMap, taskContext, jConfig);
        if (jobFile != null && jobFile.exists()) {
            jobModel.setJobDescription(FileUtils.readFileToString(jobFile));
            JobSubmissionOutput jobSubmissionOutput = remoteCluster.submitBatchJob(jobFile.getPath(), processContext.getWorkingDir());
            jobModel.setExitCode(jobSubmissionOutput.getExitCode());
            jobModel.setStdErr(jobSubmissionOutput.getStdErr());
            jobModel.setStdOut(jobSubmissionOutput.getStdOut());
            String jobId = jobSubmissionOutput.getJobId();
            if (jobId != null && !jobId.isEmpty()) {
                jobModel.setJobId(jobId);
                GFacUtils.saveJobModel(processContext, jobModel);
                jobStatus.setJobState(JobState.SUBMITTED);
                jobStatus.setReason("Successfully Submitted to " + taskContext.getParentProcessContext().getComputeResourceDescription().getHostName());
                jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
                jobModel.setJobStatuses(Arrays.asList(jobStatus));
                GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
                taskStatus = new TaskStatus(TaskState.COMPLETED);
                taskStatus.setReason("Submitted job to compute resource");
            }
            if (jobId == null || jobId.isEmpty()) {
                String msg = "expId:" + processContext.getProcessModel().getExperimentId() + " Couldn't find " + "remote jobId for JobName:" + jobModel.getJobName() + ", both submit and verify steps " + "doesn't return a valid JobId. " + "Hence changing experiment state to Failed";
                log.error(msg);
                ErrorModel errorModel = new ErrorModel();
                errorModel.setActualErrorMessage(msg);
                errorModel.setCreationTime(AiravataUtils.getCurrentTimestamp().getTime());
                GFacUtils.saveExperimentError(processContext, errorModel);
                GFacUtils.saveProcessError(processContext, errorModel);
                GFacUtils.saveTaskError(taskContext, errorModel);
                taskStatus.setState(TaskState.FAILED);
                taskStatus.setReason("Couldn't find job id in both submitted and verified steps");
            } else {
                GFacUtils.saveJobModel(processContext, jobModel);
            }
        } else {
            taskStatus.setState(TaskState.FAILED);
            if (jobFile == null) {
                taskStatus.setReason("JobFile is null");
            } else {
                taskStatus.setReason("Job file doesn't exist");
            }
        }
    } catch (ApplicationSettingsException e) {
        String msg = "Error occurred while creating job descriptor";
        log.error(msg, e);
        taskStatus.setState(TaskState.FAILED);
        taskStatus.setReason(msg);
        ErrorModel errorModel = new ErrorModel();
        errorModel.setActualErrorMessage(e.getMessage());
        errorModel.setUserFriendlyMessage(msg);
        taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
    } catch (AppCatalogException e) {
        String msg = "Error while instantiating app catalog";
        log.error(msg, e);
        taskStatus.setState(TaskState.FAILED);
        taskStatus.setReason(msg);
        ErrorModel errorModel = new ErrorModel();
        errorModel.setActualErrorMessage(e.getMessage());
        errorModel.setUserFriendlyMessage(msg);
        taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
    } catch (GFacException e) {
        String msg = "Error occurred while submitting the job";
        log.error(msg, e);
        taskStatus.setState(TaskState.FAILED);
        taskStatus.setReason(msg);
        ErrorModel errorModel = new ErrorModel();
        errorModel.setActualErrorMessage(e.getMessage());
        errorModel.setUserFriendlyMessage(msg);
        taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
    } catch (IOException e) {
        String msg = "Error while reading the content of the job file";
        log.error(msg, e);
        taskStatus.setState(TaskState.FAILED);
        taskStatus.setReason(msg);
        ErrorModel errorModel = new ErrorModel();
        errorModel.setActualErrorMessage(e.getMessage());
        errorModel.setUserFriendlyMessage(msg);
        taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
    }
    return taskStatus;
}
Also used : ApplicationSettingsException(org.apache.airavata.common.exception.ApplicationSettingsException) RemoteCluster(org.apache.airavata.gfac.core.cluster.RemoteCluster) IOException(java.io.IOException) TaskStatus(org.apache.airavata.model.status.TaskStatus) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext) JobStatus(org.apache.airavata.model.status.JobStatus) JobSubmissionOutput(org.apache.airavata.gfac.core.cluster.JobSubmissionOutput) AppCatalogException(org.apache.airavata.registry.cpi.AppCatalogException) ResourceJobManager(org.apache.airavata.model.appcatalog.computeresource.ResourceJobManager) ErrorModel(org.apache.airavata.model.commons.ErrorModel) JobModel(org.apache.airavata.model.job.JobModel) File(java.io.File)

Aggregations

RemoteCluster (org.apache.airavata.gfac.core.cluster.RemoteCluster)8 ProcessContext (org.apache.airavata.gfac.core.context.ProcessContext)5 ErrorModel (org.apache.airavata.model.commons.ErrorModel)5 GFacException (org.apache.airavata.gfac.core.GFacException)4 JobModel (org.apache.airavata.model.job.JobModel)4 TaskStatus (org.apache.airavata.model.status.TaskStatus)4 File (java.io.File)3 IOException (java.io.IOException)3 ApplicationSettingsException (org.apache.airavata.common.exception.ApplicationSettingsException)3 AuthenticationInfo (org.apache.airavata.gfac.core.authentication.AuthenticationInfo)3 JobSubmissionOutput (org.apache.airavata.gfac.core.cluster.JobSubmissionOutput)3 ResourceJobManager (org.apache.airavata.model.appcatalog.computeresource.ResourceJobManager)3 AppCatalogException (org.apache.airavata.registry.cpi.AppCatalogException)3 JobManagerConfiguration (org.apache.airavata.gfac.core.JobManagerConfiguration)2 SSHKeyAuthentication (org.apache.airavata.gfac.core.authentication.SSHKeyAuthentication)2 RawCommandInfo (org.apache.airavata.gfac.core.cluster.RawCommandInfo)2 JobStatus (org.apache.airavata.model.status.JobStatus)2 Session (com.jcraft.jsch.Session)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1