Search in sources :

Example 11 with JobModel

use of org.apache.airavata.model.job.JobModel in project airavata by apache.

the class EmailBasedMonitor method process.

private void process(JobStatusResult jobStatusResult, TaskContext taskContext) {
    canceledJobs.remove(jobStatusResult.getJobId());
    JobState resultState = jobStatusResult.getState();
    // TODO : update job state on process context
    boolean runOutflowTasks = false;
    JobStatus jobStatus = new JobStatus();
    ProcessContext parentProcessContext = taskContext.getParentProcessContext();
    JobModel jobModel = parentProcessContext.getJobModel();
    String jobDetails = "JobName : " + jobStatusResult.getJobName() + ", JobId : " + jobStatusResult.getJobId();
    JobState currentState = null;
    List<JobStatus> jobStatusList = jobModel.getJobStatuses();
    if (jobStatusList != null && jobStatusList.size() > 0) {
        JobStatus lastStatus = jobStatusList.get(0);
        for (JobStatus temp : jobStatusList) {
            if (temp.getTimeOfStateChange() >= lastStatus.getTimeOfStateChange()) {
                lastStatus = temp;
            }
        }
        currentState = lastStatus.getJobState();
    }
    // FIXME - What if non-authoritative email comes later (getting accumulated in the email account)
    if (resultState == JobState.COMPLETE) {
        if (jobStatusResult.isAuthoritative()) {
            if (currentState != null && currentState == JobState.COMPLETE) {
                jobMonitorMap.remove(jobStatusResult.getJobId());
                runOutflowTasks = false;
                log.info("[EJM]: Authoritative job Complete email received after early Airavata custom complete email," + " removed job from job monitoring. " + jobDetails);
            } else {
                jobMonitorMap.remove(jobStatusResult.getJobId());
                runOutflowTasks = true;
                jobStatus.setJobState(JobState.COMPLETE);
                jobStatus.setReason("Complete email received");
                jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
                log.info("[EJM]: Authoritative job Complete email received , removed job from job monitoring. " + jobDetails);
            }
        } else {
            runOutflowTasks = true;
            jobStatus.setJobState(JobState.COMPLETE);
            jobStatus.setReason("Complete email received");
            jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            log.info("[EJM]: Non Authoritative Job Complete email received. " + jobDetails);
        }
    } else if (resultState == JobState.QUEUED) {
        // scheduler
        if (currentState != JobState.COMPLETE) {
            // nothing special thing to do, update the status change to rabbit mq at the end of this method.
            jobStatus.setJobState(JobState.QUEUED);
            jobStatus.setReason("Queue email received");
            jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            log.info("[EJM]: Job Queued email received, " + jobDetails);
        }
    } else if (resultState == JobState.ACTIVE) {
        // scheduler
        if (currentState != JobState.COMPLETE) {
            // nothing special thing to do, update the status change to rabbit mq at the end of this method.
            jobStatus.setJobState(JobState.ACTIVE);
            jobStatus.setReason("Active email received");
            jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            log.info("[EJM]: Job Active email received, " + jobDetails);
        }
    } else if (resultState == JobState.FAILED) {
        // scheduler
        if (currentState != JobState.COMPLETE) {
            jobMonitorMap.remove(jobStatusResult.getJobId());
            runOutflowTasks = true;
            jobStatus.setJobState(JobState.FAILED);
            jobStatus.setReason("Failed email received");
            jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            log.info("[EJM]: Job failed email received , removed job from job monitoring. " + jobDetails);
        }
    } else if (resultState == JobState.CANCELED) {
        // scheduler
        if (currentState != JobState.COMPLETE) {
            jobMonitorMap.remove(jobStatusResult.getJobId());
            jobStatus.setJobState(JobState.CANCELED);
            jobStatus.setReason("Canceled email received");
            jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            log.info("[EJM]: Job canceled mail received, removed job from job monitoring. " + jobDetails);
            // we run out flow and this will move process to cancel state.
            runOutflowTasks = true;
        }
    }
    if (jobStatus.getJobState() != null) {
        try {
            jobModel.setJobStatuses(Arrays.asList(jobStatus));
            log.info("[EJM]: Publishing status changes to amqp. " + jobDetails);
            GFacUtils.saveJobStatus(parentProcessContext, jobModel);
        } catch (GFacException e) {
            log.error("expId: {}, processId: {}, taskId: {}, jobId: {} :- Error while save and publishing Job " + "status {}", taskContext.getExperimentId(), taskContext.getProcessId(), jobModel.getTaskId(), jobModel.getJobId(), jobStatus.getJobState());
        }
    }
    if (runOutflowTasks) {
        log.info("[EJM]: Calling Out Handler chain of " + jobDetails);
        try {
            TaskStatus taskStatus = new TaskStatus(TaskState.COMPLETED);
            taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
            taskStatus.setReason("Job monitoring completed with final state: " + TaskState.COMPLETED.name());
            taskContext.setTaskStatus(taskStatus);
            GFacUtils.saveAndPublishTaskStatus(taskContext);
            if (parentProcessContext.isCancel()) {
                ProcessStatus processStatus = new ProcessStatus(ProcessState.CANCELLING);
                processStatus.setReason("Process has been cancelled");
                parentProcessContext.setProcessStatus(processStatus);
                GFacUtils.saveAndPublishProcessStatus(parentProcessContext);
            }
            GFacThreadPoolExecutor.getCachedThreadPool().execute(new GFacWorker(parentProcessContext));
        } catch (GFacException e) {
            log.info("[EJM]: Error while running output tasks", e);
        }
    }
}
Also used : GFacException(org.apache.airavata.gfac.core.GFacException) GFacWorker(org.apache.airavata.gfac.impl.GFacWorker) JobModel(org.apache.airavata.model.job.JobModel) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext)

Example 12 with JobModel

use of org.apache.airavata.model.job.JobModel in project airavata by apache.

the class ExperimentRegistry method getJobIds.

public List<String> getJobIds(String fieldName, Object value) throws RegistryException {
    List<String> jobIds = new ArrayList<String>();
    List<JobModel> jobs = getJobList(fieldName, value);
    for (JobModel job : jobs) {
        jobIds.add(job.getJobId());
    }
    return jobIds;
}
Also used : JobModel(org.apache.airavata.model.job.JobModel)

Example 13 with JobModel

use of org.apache.airavata.model.job.JobModel in project airavata by apache.

the class RegistryServerHandler method getJobDetails.

/**
 * Get Job Details for all the jobs within an Experiment.
 * This method to be used when need to get the job details for one or many jobs of an Experiment.
 *
 * @param airavataExperimentId@return list of JobDetails
 *                                    Job details.
 */
@Override
public List<JobModel> getJobDetails(String airavataExperimentId) throws RegistryServiceException, TException {
    try {
        experimentCatalog = RegistryFactory.getDefaultExpCatalog();
        if (!experimentCatalog.isExist(ExperimentCatalogModelType.EXPERIMENT, airavataExperimentId)) {
            logger.error(airavataExperimentId, "Error while retrieving job details, experiment {} doesn't exist.", airavataExperimentId);
            throw new ExperimentNotFoundException("Requested experiment id " + airavataExperimentId + " does not exist in the system..");
        }
        List<Object> processModels = experimentCatalog.get(ExperimentCatalogModelType.PROCESS, Constants.FieldConstants.ProcessConstants.EXPERIMENT_ID, airavataExperimentId);
        List<JobModel> jobList = new ArrayList<>();
        if (processModels != null && !processModels.isEmpty()) {
            for (Object process : processModels) {
                ProcessModel processModel = (ProcessModel) process;
                List<TaskModel> tasks = processModel.getTasks();
                if (tasks != null && !tasks.isEmpty()) {
                    for (TaskModel taskModel : tasks) {
                        String taskId = taskModel.getTaskId();
                        List<Object> jobs = experimentCatalog.get(ExperimentCatalogModelType.JOB, Constants.FieldConstants.JobConstants.TASK_ID, taskId);
                        for (Object jobObject : jobs) {
                            jobList.add((JobModel) jobObject);
                        }
                    }
                }
            }
        }
        logger.debug("Airavata retrieved job models for experiment with experiment id : " + airavataExperimentId);
        return jobList;
    } catch (Exception e) {
        logger.error(airavataExperimentId, "Error while retrieving the job details", e);
        RegistryServiceException exception = new RegistryServiceException();
        exception.setMessage("Error while retrieving the job details. More info : " + e.getMessage());
        throw exception;
    }
}
Also used : ProcessModel(org.apache.airavata.model.process.ProcessModel) RegistryServiceException(org.apache.airavata.registry.api.exception.RegistryServiceException) JobModel(org.apache.airavata.model.job.JobModel) TaskModel(org.apache.airavata.model.task.TaskModel) RegistryServiceException(org.apache.airavata.registry.api.exception.RegistryServiceException) TException(org.apache.thrift.TException) ApplicationSettingsException(org.apache.airavata.common.exception.ApplicationSettingsException)

Example 14 with JobModel

use of org.apache.airavata.model.job.JobModel in project airavata by apache.

the class RegistryServerHandler method getDetailedExperimentTree.

/**
 * Get Complete Experiment Details
 * Fetch the completed nested tree structue of previously created experiment metadata which includes processes ->
 * tasks -> jobs information.
 *
 * @param airavataExperimentId The identifier for the requested experiment. This is returned during the create experiment step.
 * @return ExperimentModel
 * This method will return the previously stored experiment metadata including application input parameters, computational resource scheduling
 * information, special input output handling and additional quality of service parameters.
 * @throws InvalidRequestException     For any incorrect forming of the request itself.
 * @throws ExperimentNotFoundException If the specified experiment is not previously created, then an Experiment Not Found Exception is thrown.
 * @throws AiravataClientException     The following list of exceptions are thrown which Airavata Client can take corrective actions to resolve:
 *                                     <p>
 *                                     UNKNOWN_GATEWAY_ID - If a Gateway is not registered with Airavata as a one time administrative
 *                                     step, then Airavata Registry will not have a provenance area setup. The client has to follow
 *                                     gateway registration steps and retry this request.
 *                                     <p>
 *                                     AUTHENTICATION_FAILURE - How Authentication will be implemented is yet to be determined.
 *                                     For now this is a place holder.
 *                                     <p>
 *                                     INVALID_AUTHORIZATION - This will throw an authorization exception. When a more robust security hand-shake
 *                                     is implemented, the authorization will be more substantial.
 * @throws AiravataSystemException     This exception will be thrown for any Airavata Server side issues and if the problem cannot be corrected by the client
 *                                     rather an Airavata Administrator will be notified to take corrective action.
 */
@Override
public ExperimentModel getDetailedExperimentTree(String airavataExperimentId) throws RegistryServiceException, TException {
    try {
        ExperimentModel experimentModel = getExperimentInternal(airavataExperimentId);
        experimentCatalog = RegistryFactory.getDefaultExpCatalog();
        List<Object> processObjects = experimentCatalog.get(ExperimentCatalogModelType.PROCESS, Constants.FieldConstants.ExperimentConstants.EXPERIMENT_ID, experimentModel.getExperimentId());
        List<ProcessModel> processList = new ArrayList<>();
        if (processObjects != null) {
            processObjects.stream().forEach(p -> {
                // Process already has the task object
                ((ProcessModel) p).getTasks().stream().forEach(t -> {
                    try {
                        List<Object> jobObjects = experimentCatalog.get(ExperimentCatalogModelType.JOB, Constants.FieldConstants.JobConstants.TASK_ID, ((TaskModel) t).getTaskId());
                        List<JobModel> jobList = new ArrayList<JobModel>();
                        if (jobObjects != null) {
                            jobObjects.stream().forEach(j -> jobList.add((JobModel) j));
                            Collections.sort(jobList, new Comparator<JobModel>() {

                                @Override
                                public int compare(JobModel o1, JobModel o2) {
                                    return (int) (o1.getCreationTime() - o2.getCreationTime());
                                }
                            });
                            t.setJobs(jobList);
                        }
                    } catch (RegistryException e) {
                        logger.error(e.getMessage(), e);
                    }
                });
                processList.add((ProcessModel) p);
            });
            experimentModel.setProcesses(processList);
        }
        logger.debug("Airavata retrieved detailed experiment with experiment id : " + airavataExperimentId);
        return experimentModel;
    } catch (Exception e) {
        logger.error("Error while retrieving the experiment", e);
        RegistryServiceException exception = new RegistryServiceException();
        exception.setMessage("Error while retrieving the experiment. More info : " + e.getMessage());
        throw exception;
    }
}
Also used : ProcessModel(org.apache.airavata.model.process.ProcessModel) RegistryServiceException(org.apache.airavata.registry.api.exception.RegistryServiceException) RegistryServiceException(org.apache.airavata.registry.api.exception.RegistryServiceException) TException(org.apache.thrift.TException) ApplicationSettingsException(org.apache.airavata.common.exception.ApplicationSettingsException) JobModel(org.apache.airavata.model.job.JobModel)

Example 15 with JobModel

use of org.apache.airavata.model.job.JobModel in project airavata by apache.

the class GFacEngineImpl method populateProcessContext.

@Override
public ProcessContext populateProcessContext(String processId, String gatewayId, String tokenId) throws GFacException, CredentialStoreException {
    // NOTE: Process context gives precedence to data come with process Computer resources;
    ProcessContext processContext = null;
    ProcessContext.ProcessContextBuilder builder = new ProcessContext.ProcessContextBuilder(processId, gatewayId, tokenId);
    try {
        AppCatalog appCatalog = Factory.getDefaultAppCatalog();
        ExperimentCatalog expCatalog = Factory.getDefaultExpCatalog();
        ProcessModel processModel = (ProcessModel) expCatalog.get(ExperimentCatalogModelType.PROCESS, processId);
        builder.setAppCatalog(appCatalog).setExperimentCatalog(expCatalog).setCuratorClient(Factory.getCuratorClient()).setStatusPublisher(Factory.getStatusPublisher()).setProcessModel(processModel).setGatewayResourceProfile(appCatalog.getGatewayProfile().getGatewayProfile(gatewayId)).setGatewayComputeResourcePreference(appCatalog.getGatewayProfile().getComputeResourcePreference(gatewayId, processModel.getComputeResourceId())).setGatewayStorageResourcePreference(appCatalog.getGatewayProfile().getStoragePreference(gatewayId, processModel.getStorageResourceId()));
        processContext = builder.build();
        /* check point */
        checkpoint(processContext);
        if (processModel.isUseUserCRPref()) {
            setUserResourceProfile(gatewayId, processContext);
            setUserComputeResourcePreference(gatewayId, processContext);
        }
        String scratchLocation = processContext.getScratchLocation();
        String workingDirectory = scratchLocation + File.separator + processId + File.separator;
        StorageResourceDescription storageResource = appCatalog.getStorageResource().getStorageResource(processModel.getStorageResourceId());
        if (storageResource != null) {
            processContext.setStorageResource(storageResource);
        } else {
            // we need to fail the process which will fail the experiment
            processContext.setProcessStatus(new ProcessStatus(ProcessState.FAILED));
            GFacUtils.saveAndPublishProcessStatus(processContext);
            throw new GFacException("expId: " + processModel.getExperimentId() + ", processId: " + processId + ":- Couldn't find storage resource for storage resource id :" + processModel.getStorageResourceId());
        }
        /*            StorageResourceDescription storageResource = appCatalog.getStorageResource().getStorageResource(processModel.getStorageResourceId());
            if (storageResource != null){
                processContext.setStorageResource(storageResource);
            }*/
        processContext.setComputeResourceDescription(appCatalog.getComputeResource().getComputeResource(processContext.getComputeResourceId()));
        processContext.setApplicationDeploymentDescription(appCatalog.getApplicationDeployment().getApplicationDeployement(processModel.getApplicationDeploymentId()));
        ApplicationInterfaceDescription applicationInterface = appCatalog.getApplicationInterface().getApplicationInterface(processModel.getApplicationInterfaceId());
        processContext.setApplicationInterfaceDescription(applicationInterface);
        List<OutputDataObjectType> applicationOutputs = applicationInterface.getApplicationOutputs();
        if (applicationOutputs != null && !applicationOutputs.isEmpty()) {
            for (OutputDataObjectType outputDataObjectType : applicationOutputs) {
                if (outputDataObjectType.getType().equals(DataType.STDOUT)) {
                    if (outputDataObjectType.getValue() == null || outputDataObjectType.getValue().equals("")) {
                        outputDataObjectType.setValue(workingDirectory + applicationInterface.getApplicationName() + ".stdout");
                        processContext.setStdoutLocation(workingDirectory + applicationInterface.getApplicationName() + ".stdout");
                    } else {
                        processContext.setStdoutLocation(outputDataObjectType.getValue());
                    }
                }
                if (outputDataObjectType.getType().equals(DataType.STDERR)) {
                    if (outputDataObjectType.getValue() == null || outputDataObjectType.getValue().equals("")) {
                        String stderrLocation = workingDirectory + applicationInterface.getApplicationName() + ".stderr";
                        outputDataObjectType.setValue(stderrLocation);
                        processContext.setStderrLocation(stderrLocation);
                    } else {
                        processContext.setStderrLocation(outputDataObjectType.getValue());
                    }
                }
            }
        }
        expCatalog.update(ExperimentCatalogModelType.PROCESS, processModel, processId);
        processModel.setProcessOutputs(applicationOutputs);
        if (processContext.getJobSubmissionProtocol() == JobSubmissionProtocol.UNICORE) {
            // process monitor mode set in getResourceJobManager method, but unicore doesn't have resource job manager.
            // hence we set process monitor mode here.
            processContext.setMonitorMode(MonitorMode.FORK);
        } else {
            processContext.setResourceJobManager(getResourceJobManager(processContext));
            processContext.setJobSubmissionRemoteCluster(Factory.getJobSubmissionRemoteCluster(processContext));
            processContext.setDataMovementRemoteCluster(Factory.getDataMovementRemoteCluster(processContext));
        }
        String inputPath = ServerSettings.getLocalDataLocation();
        if (inputPath != null) {
            processContext.setLocalWorkingDir((inputPath.endsWith("/") ? inputPath : inputPath + "/") + processContext.getProcessId());
        }
        List<Object> jobModels = expCatalog.get(ExperimentCatalogModelType.JOB, "processId", processId);
        if (jobModels != null && !jobModels.isEmpty()) {
            if (jobModels.size() > 1) {
                log.warn("Process has more than one job model, take first one");
            }
            processContext.setJobModel(((JobModel) jobModels.get(0)));
        }
        return processContext;
    } catch (AppCatalogException e) {
        String msg = "App catalog access exception ";
        saveErrorModel(processContext, e, msg);
        updateProcessFailure(processContext, msg);
        throw new GFacException(msg, e);
    } catch (RegistryException e) {
        String msg = "Registry access exception";
        saveErrorModel(processContext, e, msg);
        updateProcessFailure(processContext, msg);
        throw new GFacException(msg, e);
    } catch (AiravataException e) {
        String msg = "Remote cluster initialization error";
        saveErrorModel(processContext, e, msg);
        updateProcessFailure(processContext, msg);
        throw new GFacException(msg, e);
    }
}
Also used : ProcessModel(org.apache.airavata.model.process.ProcessModel) ExperimentCatalog(org.apache.airavata.registry.cpi.ExperimentCatalog) ProcessStatus(org.apache.airavata.model.status.ProcessStatus) AppCatalog(org.apache.airavata.registry.cpi.AppCatalog) RegistryException(org.apache.airavata.registry.cpi.RegistryException) ProcessContext(org.apache.airavata.gfac.core.context.ProcessContext) AppCatalogException(org.apache.airavata.registry.cpi.AppCatalogException) StorageResourceDescription(org.apache.airavata.model.appcatalog.storageresource.StorageResourceDescription) GFacException(org.apache.airavata.gfac.core.GFacException) OutputDataObjectType(org.apache.airavata.model.application.io.OutputDataObjectType) ApplicationInterfaceDescription(org.apache.airavata.model.appcatalog.appinterface.ApplicationInterfaceDescription) JobModel(org.apache.airavata.model.job.JobModel) AiravataException(org.apache.airavata.common.exception.AiravataException)

Aggregations

JobModel (org.apache.airavata.model.job.JobModel)18 ProcessContext (org.apache.airavata.gfac.core.context.ProcessContext)10 ApplicationSettingsException (org.apache.airavata.common.exception.ApplicationSettingsException)7 GFacException (org.apache.airavata.gfac.core.GFacException)5 JobStatus (org.apache.airavata.model.status.JobStatus)5 AppCatalogException (org.apache.airavata.registry.cpi.AppCatalogException)5 IOException (java.io.IOException)4 RemoteCluster (org.apache.airavata.gfac.core.cluster.RemoteCluster)4 ErrorModel (org.apache.airavata.model.commons.ErrorModel)4 TaskStatus (org.apache.airavata.model.status.TaskStatus)4 File (java.io.File)3 JobSubmissionOutput (org.apache.airavata.gfac.core.cluster.JobSubmissionOutput)3 ResourceJobManager (org.apache.airavata.model.appcatalog.computeresource.ResourceJobManager)3 ProcessModel (org.apache.airavata.model.process.ProcessModel)3 RegistryException (org.apache.airavata.registry.cpi.RegistryException)3 ArrayList (java.util.ArrayList)2 AiravataException (org.apache.airavata.common.exception.AiravataException)2 TaskException (org.apache.airavata.gfac.core.task.TaskException)2 GFacWorker (org.apache.airavata.gfac.impl.GFacWorker)2 OutputDataObjectType (org.apache.airavata.model.application.io.OutputDataObjectType)2