Search in sources :

Example 16 with ProcessContext

use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.

the class RedeliveryRequestWatcherImpl method process.

@Override
public void process(WatchedEvent watchedEvent) throws Exception {
    String path = watchedEvent.getPath();
    Watcher.Event.EventType eventType = watchedEvent.getType();
    log.info("Redelivery request came for zk path {} event type {} ", path, eventType.name());
    CuratorFramework curatorClient = Factory.getCuratorClient();
    switch(eventType) {
        case NodeDataChanged:
            byte[] bytes = curatorClient.getData().forPath(path);
            String serverName = new String(bytes);
            if (ServerSettings.getGFacServerName().trim().equals(serverName)) {
                curatorClient.getData().usingWatcher(this).forPath(path);
                log.info("processId: {},event type {}, change data with same server name : {}", processId, eventType, serverName);
            } else {
                ProcessContext processContext = Factory.getGfacContext().getProcess(processId);
                if (processContext != null) {
                    processContext.setHandOver(true);
                    log.info("processId : {}, event type {}, handing over to new server instance : {}", processId, eventType, serverName);
                } else {
                    log.info("Redelivery request came for processId {}, with event type {}, but couldn't find " + "process context", processId, eventType.name());
                }
            }
            break;
        case NodeDeleted:
            // end of experiment execution, ignore this event
            log.info("Redelivery watcher trigger for process {} with event type {}", processId, eventType.name());
            break;
        case NodeCreated:
        case NodeChildrenChanged:
        case None:
            if (path != null) {
                curatorClient.getData().usingWatcher(this).forPath(path);
                log.info("Redelivery watcher trigger for process {} with event type {}", processId, eventType.name());
            }
            break;
        // not yet implemented
        default:
            if (path != null) {
                curatorClient.getData().usingWatcher(this).forPath(path);
                log.info("Redelivery watcher trigger for process {} with event type {}", processId, eventType.name());
            }
            break;
    }
}
Also used : CuratorFramework(org.apache.curator.framework.CuratorFramework) WatchedEvent(org.apache.zookeeper.WatchedEvent) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext)

Example 17 with ProcessContext

use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.

the class EmailBasedMonitor method process.

private void process(JobStatusResult jobStatusResult, TaskContext taskContext) {
    canceledJobs.remove(jobStatusResult.getJobId());
    JobState resultState = jobStatusResult.getState();
    // TODO : update job state on process context
    boolean runOutflowTasks = false;
    JobStatus jobStatus = new JobStatus();
    ProcessContext parentProcessContext = taskContext.getParentProcessContext();
    JobModel jobModel = parentProcessContext.getJobModel();
    String jobDetails = "JobName : " + jobStatusResult.getJobName() + ", JobId : " + jobStatusResult.getJobId();
    JobState currentState = null;
    List<JobStatus> jobStatusList = jobModel.getJobStatuses();
    if (jobStatusList != null && jobStatusList.size() > 0) {
        JobStatus lastStatus = jobStatusList.get(0);
        for (JobStatus temp : jobStatusList) {
            if (temp.getTimeOfStateChange() >= lastStatus.getTimeOfStateChange()) {
                lastStatus = temp;
            }
        }
        currentState = lastStatus.getJobState();
    }
    // FIXME - What if non-authoritative email comes later (getting accumulated in the email account)
    if (resultState == JobState.COMPLETE) {
        if (jobStatusResult.isAuthoritative()) {
            if (currentState != null && currentState == JobState.COMPLETE) {
                jobMonitorMap.remove(jobStatusResult.getJobId());
                runOutflowTasks = false;
                log.info("[EJM]: Authoritative job Complete email received after early Airavata custom complete email," + " removed job from job monitoring. " + jobDetails);
            } else {
                jobMonitorMap.remove(jobStatusResult.getJobId());
                runOutflowTasks = true;
                jobStatus.setJobState(JobState.COMPLETE);
                jobStatus.setReason("Complete email received");
                jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
                log.info("[EJM]: Authoritative job Complete email received , removed job from job monitoring. " + jobDetails);
            }
        } else {
            runOutflowTasks = true;
            jobStatus.setJobState(JobState.COMPLETE);
            jobStatus.setReason("Complete email received");
            jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            log.info("[EJM]: Non Authoritative Job Complete email received. " + jobDetails);
        }
    } else if (resultState == JobState.QUEUED) {
        // scheduler
        if (currentState != JobState.COMPLETE) {
            // nothing special thing to do, update the status change to rabbit mq at the end of this method.
            jobStatus.setJobState(JobState.QUEUED);
            jobStatus.setReason("Queue email received");
            jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            log.info("[EJM]: Job Queued email received, " + jobDetails);
        }
    } else if (resultState == JobState.ACTIVE) {
        // scheduler
        if (currentState != JobState.COMPLETE) {
            // nothing special thing to do, update the status change to rabbit mq at the end of this method.
            jobStatus.setJobState(JobState.ACTIVE);
            jobStatus.setReason("Active email received");
            jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            log.info("[EJM]: Job Active email received, " + jobDetails);
        }
    } else if (resultState == JobState.FAILED) {
        // scheduler
        if (currentState != JobState.COMPLETE) {
            jobMonitorMap.remove(jobStatusResult.getJobId());
            runOutflowTasks = true;
            jobStatus.setJobState(JobState.FAILED);
            jobStatus.setReason("Failed email received");
            jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            log.info("[EJM]: Job failed email received , removed job from job monitoring. " + jobDetails);
        }
    } else if (resultState == JobState.CANCELED) {
        // scheduler
        if (currentState != JobState.COMPLETE) {
            jobMonitorMap.remove(jobStatusResult.getJobId());
            jobStatus.setJobState(JobState.CANCELED);
            jobStatus.setReason("Canceled email received");
            jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            log.info("[EJM]: Job canceled mail received, removed job from job monitoring. " + jobDetails);
            // we run out flow and this will move process to cancel state.
            runOutflowTasks = true;
        }
    }
    if (jobStatus.getJobState() != null) {
        try {
            jobModel.setJobStatuses(Arrays.asList(jobStatus));
            log.info("[EJM]: Publishing status changes to amqp. " + jobDetails);
            GFacUtils.saveJobStatus(parentProcessContext, jobModel);
        } catch (GFacException e) {
            log.error("expId: {}, processId: {}, taskId: {}, jobId: {} :- Error while save and publishing Job " + "status {}", taskContext.getExperimentId(), taskContext.getProcessId(), jobModel.getTaskId(), jobModel.getJobId(), jobStatus.getJobState());
        }
    }
    if (runOutflowTasks) {
        log.info("[EJM]: Calling Out Handler chain of " + jobDetails);
        try {
            TaskStatus taskStatus = new TaskStatus(TaskState.COMPLETED);
            taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            taskStatus.setReason("Job monitoring completed with final state: " + TaskState.COMPLETED.name());
            taskContext.setTaskStatus(taskStatus);
            GFacUtils.saveAndPublishTaskStatus(taskContext);
            if (parentProcessContext.isCancel()) {
                ProcessStatus processStatus = new ProcessStatus(ProcessState.CANCELLING);
                processStatus.setReason("Process has been cancelled");
                parentProcessContext.setProcessStatus(processStatus);
                GFacUtils.saveAndPublishProcessStatus(parentProcessContext);
            }
            GFacThreadPoolExecutor.getCachedThreadPool().execute(new GFacWorker(parentProcessContext));
        } catch (GFacException e) {
            log.info("[EJM]: Error while running output tasks", e);
        }
    }
}
Also used : GFacException(org.apache.airavata.gfac.core.GFacException) GFacWorker(org.apache.airavata.gfac.impl.GFacWorker) JobModel(org.apache.airavata.model.job.JobModel) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext)

Example 18 with ProcessContext

use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.

the class GFacEngineImpl method configureWorkspace.

private boolean configureWorkspace(TaskContext taskContext, boolean recover) throws GFacException {
    try {
        EnvironmentSetupTaskModel subTaskModel = (EnvironmentSetupTaskModel) taskContext.getSubTaskModel();
        Task envSetupTask = null;
        if (subTaskModel.getProtocol() == SecurityProtocol.SSH_KEYS || subTaskModel.getProtocol() == SecurityProtocol.LOCAL) {
            envSetupTask = new EnvironmentSetupTask();
        } else {
            throw new GFacException("Unsupported security protocol, Airavata doesn't support " + subTaskModel.getProtocol().name() + " protocol yet.");
        }
        TaskStatus status = new TaskStatus(TaskState.EXECUTING);
        status.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
        taskContext.setTaskStatus(status);
        GFacUtils.saveAndPublishTaskStatus(taskContext);
        TaskStatus taskStatus = executeTask(taskContext, envSetupTask, recover);
        taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
        taskContext.setTaskStatus(taskStatus);
        GFacUtils.saveAndPublishTaskStatus(taskContext);
        if (taskStatus.getState() == TaskState.FAILED) {
            log.error("expId: {}, processId: {}, taskId: {} type: {},:- Input staging failed, " + "reason:" + " {}", taskContext.getParentProcessContext().getExperimentId(), taskContext.getParentProcessContext().getProcessId(), taskContext.getTaskId(), envSetupTask.getType().name(), taskStatus.getReason());
            ProcessContext processContext = taskContext.getParentProcessContext();
            String errorMsg = new StringBuilder("expId: ").append(processContext.getExperimentId()).append(", processId: ").append(processContext.getProcessId()).append(", taskId: ").append(taskContext.getTaskId()).append(", type: ").append(taskContext.getTaskType().name()).append(" :- Environment Setup failed. Reason: ").append(taskStatus.getReason()).toString();
            ErrorModel errorModel = new ErrorModel();
            errorModel.setUserFriendlyMessage("Error while environment setup");
            errorModel.setActualErrorMessage(errorMsg);
            GFacUtils.saveTaskError(taskContext, errorModel);
            throw new GFacException("Error while environment setup");
        }
    } catch (TException e) {
        throw new GFacException("Couldn't get environment setup task model", e);
    }
    return false;
}
Also used : TException(org.apache.thrift.TException) EnvironmentSetupTaskModel(org.apache.airavata.model.task.EnvironmentSetupTaskModel) DataStreamingTask(org.apache.airavata.gfac.impl.task.DataStreamingTask) Task(org.apache.airavata.gfac.core.task.Task) JobSubmissionTask(org.apache.airavata.gfac.core.task.JobSubmissionTask) EnvironmentSetupTask(org.apache.airavata.gfac.impl.task.EnvironmentSetupTask) GFacException(org.apache.airavata.gfac.core.GFacException) ErrorModel(org.apache.airavata.model.commons.ErrorModel) TaskStatus(org.apache.airavata.model.status.TaskStatus) EnvironmentSetupTask(org.apache.airavata.gfac.impl.task.EnvironmentSetupTask) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext)

Example 19 with ProcessContext

use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.

the class GFacEngineImpl method executeJobSubmission.

private boolean executeJobSubmission(TaskContext taskContext, boolean recovery) throws GFacException {
    TaskStatus taskStatus = new TaskStatus(TaskState.EXECUTING);
    taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
    taskContext.setTaskStatus(taskStatus);
    GFacUtils.saveAndPublishTaskStatus(taskContext);
    try {
        JobSubmissionTaskModel jobSubmissionTaskModel = ((JobSubmissionTaskModel) taskContext.getSubTaskModel());
        JobSubmissionTask jobSubmissionTask = Factory.getJobSubmissionTask(jobSubmissionTaskModel.getJobSubmissionProtocol());
        ProcessContext processContext = taskContext.getParentProcessContext();
        taskStatus = executeTask(taskContext, jobSubmissionTask, recovery);
        taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
        taskContext.setTaskStatus(taskStatus);
        GFacUtils.saveAndPublishTaskStatus(taskContext);
        checkFailures(taskContext, taskStatus, jobSubmissionTask);
        return false;
    } catch (TException e) {
        throw new GFacException(e);
    }
}
Also used : TException(org.apache.thrift.TException) GFacException(org.apache.airavata.gfac.core.GFacException) JobSubmissionTask(org.apache.airavata.gfac.core.task.JobSubmissionTask) TaskStatus(org.apache.airavata.model.status.TaskStatus) JobSubmissionTaskModel(org.apache.airavata.model.task.JobSubmissionTaskModel) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext)

Example 20 with ProcessContext

use of org.apache.airavata.gfac.core.context.ProcessContext in project airavata by apache.

the class GFacEngineImpl method populateProcessContext.

@Override
public ProcessContext populateProcessContext(String processId, String gatewayId, String tokenId) throws GFacException, CredentialStoreException {
    // NOTE: Process context gives precedence to data come with process Computer resources;
    ProcessContext processContext = null;
    ProcessContext.ProcessContextBuilder builder = new ProcessContext.ProcessContextBuilder(processId, gatewayId, tokenId);
    try {
        AppCatalog appCatalog = Factory.getDefaultAppCatalog();
        ExperimentCatalog expCatalog = Factory.getDefaultExpCatalog();
        ProcessModel processModel = (ProcessModel) expCatalog.get(ExperimentCatalogModelType.PROCESS, processId);
        builder.setAppCatalog(appCatalog).setExperimentCatalog(expCatalog).setCuratorClient(Factory.getCuratorClient()).setStatusPublisher(Factory.getStatusPublisher()).setProcessModel(processModel).setGatewayResourceProfile(appCatalog.getGatewayProfile().getGatewayProfile(gatewayId)).setGatewayComputeResourcePreference(appCatalog.getGatewayProfile().getComputeResourcePreference(gatewayId, processModel.getComputeResourceId())).setGatewayStorageResourcePreference(appCatalog.getGatewayProfile().getStoragePreference(gatewayId, processModel.getStorageResourceId()));
        processContext = builder.build();
        /* check point */
        checkpoint(processContext);
        if (processModel.isUseUserCRPref()) {
            setUserResourceProfile(gatewayId, processContext);
            setUserComputeResourcePreference(gatewayId, processContext);
        }
        String scratchLocation = processContext.getScratchLocation();
        String workingDirectory = scratchLocation + File.separator + processId + File.separator;
        StorageResourceDescription storageResource = appCatalog.getStorageResource().getStorageResource(processModel.getStorageResourceId());
        if (storageResource != null) {
            processContext.setStorageResource(storageResource);
        } else {
            // we need to fail the process which will fail the experiment
            processContext.setProcessStatus(new ProcessStatus(ProcessState.FAILED));
            GFacUtils.saveAndPublishProcessStatus(processContext);
            throw new GFacException("expId: " + processModel.getExperimentId() + ", processId: " + processId + ":- Couldn't find storage resource for storage resource id :" + processModel.getStorageResourceId());
        }
        /*            StorageResourceDescription storageResource = appCatalog.getStorageResource().getStorageResource(processModel.getStorageResourceId());
            if (storageResource != null){
                processContext.setStorageResource(storageResource);
            }*/
        processContext.setComputeResourceDescription(appCatalog.getComputeResource().getComputeResource(processContext.getComputeResourceId()));
        processContext.setApplicationDeploymentDescription(appCatalog.getApplicationDeployment().getApplicationDeployement(processModel.getApplicationDeploymentId()));
        ApplicationInterfaceDescription applicationInterface = appCatalog.getApplicationInterface().getApplicationInterface(processModel.getApplicationInterfaceId());
        processContext.setApplicationInterfaceDescription(applicationInterface);
        List<OutputDataObjectType> applicationOutputs = applicationInterface.getApplicationOutputs();
        if (applicationOutputs != null && !applicationOutputs.isEmpty()) {
            for (OutputDataObjectType outputDataObjectType : applicationOutputs) {
                if (outputDataObjectType.getType().equals(DataType.STDOUT)) {
                    if (outputDataObjectType.getValue() == null || outputDataObjectType.getValue().equals("")) {
                        outputDataObjectType.setValue(workingDirectory + applicationInterface.getApplicationName() + ".stdout");
                        processContext.setStdoutLocation(workingDirectory + applicationInterface.getApplicationName() + ".stdout");
                    } else {
                        processContext.setStdoutLocation(outputDataObjectType.getValue());
                    }
                }
                if (outputDataObjectType.getType().equals(DataType.STDERR)) {
                    if (outputDataObjectType.getValue() == null || outputDataObjectType.getValue().equals("")) {
                        String stderrLocation = workingDirectory + applicationInterface.getApplicationName() + ".stderr";
                        outputDataObjectType.setValue(stderrLocation);
                        processContext.setStderrLocation(stderrLocation);
                    } else {
                        processContext.setStderrLocation(outputDataObjectType.getValue());
                    }
                }
            }
        }
        expCatalog.update(ExperimentCatalogModelType.PROCESS, processModel, processId);
        processModel.setProcessOutputs(applicationOutputs);
        if (processContext.getJobSubmissionProtocol() == JobSubmissionProtocol.UNICORE) {
            // process monitor mode set in getResourceJobManager method, but unicore doesn't have resource job manager.
            // hence we set process monitor mode here.
            processContext.setMonitorMode(MonitorMode.FORK);
        } else {
            processContext.setResourceJobManager(getResourceJobManager(processContext));
            processContext.setJobSubmissionRemoteCluster(Factory.getJobSubmissionRemoteCluster(processContext));
            processContext.setDataMovementRemoteCluster(Factory.getDataMovementRemoteCluster(processContext));
        }
        String inputPath = ServerSettings.getLocalDataLocation();
        if (inputPath != null) {
            processContext.setLocalWorkingDir((inputPath.endsWith("/") ? inputPath : inputPath + "/") + processContext.getProcessId());
        }
        List<Object> jobModels = expCatalog.get(ExperimentCatalogModelType.JOB, "processId", processId);
        if (jobModels != null && !jobModels.isEmpty()) {
            if (jobModels.size() > 1) {
                log.warn("Process has more than one job model, take first one");
            }
            processContext.setJobModel(((JobModel) jobModels.get(0)));
        }
        return processContext;
    } catch (AppCatalogException e) {
        String msg = "App catalog access exception ";
        saveErrorModel(processContext, e, msg);
        updateProcessFailure(processContext, msg);
        throw new GFacException(msg, e);
    } catch (RegistryException e) {
        String msg = "Registry access exception";
        saveErrorModel(processContext, e, msg);
        updateProcessFailure(processContext, msg);
        throw new GFacException(msg, e);
    } catch (AiravataException e) {
        String msg = "Remote cluster initialization error";
        saveErrorModel(processContext, e, msg);
        updateProcessFailure(processContext, msg);
        throw new GFacException(msg, e);
    }
}
Also used : ProcessModel(org.apache.airavata.model.process.ProcessModel) ExperimentCatalog(org.apache.airavata.registry.cpi.ExperimentCatalog) ProcessStatus(org.apache.airavata.model.status.ProcessStatus) AppCatalog(org.apache.airavata.registry.cpi.AppCatalog) RegistryException(org.apache.airavata.registry.cpi.RegistryException) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext) AppCatalogException(org.apache.airavata.registry.cpi.AppCatalogException) StorageResourceDescription(org.apache.airavata.model.appcatalog.storageresource.StorageResourceDescription) GFacException(org.apache.airavata.gfac.core.GFacException) OutputDataObjectType(org.apache.airavata.model.application.io.OutputDataObjectType) ApplicationInterfaceDescription(org.apache.airavata.model.appcatalog.appinterface.ApplicationInterfaceDescription) JobModel(org.apache.airavata.model.job.JobModel) AiravataException(org.apache.airavata.common.exception.AiravataException)

Aggregations

ProcessContext (org.apache.airavata.gfac.core.context.ProcessContext)24 GFacException (org.apache.airavata.gfac.core.GFacException)15 JobModel (org.apache.airavata.model.job.JobModel)11 TaskStatus (org.apache.airavata.model.status.TaskStatus)11 ErrorModel (org.apache.airavata.model.commons.ErrorModel)10 IOException (java.io.IOException)7 URISyntaxException (java.net.URISyntaxException)7 ApplicationSettingsException (org.apache.airavata.common.exception.ApplicationSettingsException)7 TException (org.apache.thrift.TException)7 AiravataException (org.apache.airavata.common.exception.AiravataException)5 CredentialStoreException (org.apache.airavata.credential.store.store.CredentialStoreException)5 RemoteCluster (org.apache.airavata.gfac.core.cluster.RemoteCluster)5 OutputDataObjectType (org.apache.airavata.model.application.io.OutputDataObjectType)5 JobStatus (org.apache.airavata.model.status.JobStatus)5 AppCatalogException (org.apache.airavata.registry.cpi.AppCatalogException)5 JSchException (com.jcraft.jsch.JSchException)4 Session (com.jcraft.jsch.Session)4 File (java.io.File)4 URI (java.net.URI)4 JobSubmissionTask (org.apache.airavata.gfac.core.task.JobSubmissionTask)4