use of org.apache.airavata.model.job.JobModel in project airavata by apache.
the class EmailBasedMonitor method process.
private void process(JobStatusResult jobStatusResult, TaskContext taskContext) {
canceledJobs.remove(jobStatusResult.getJobId());
JobState resultState = jobStatusResult.getState();
// TODO : update job state on process context
boolean runOutflowTasks = false;
JobStatus jobStatus = new JobStatus();
ProcessContext parentProcessContext = taskContext.getParentProcessContext();
JobModel jobModel = parentProcessContext.getJobModel();
String jobDetails = "JobName : " + jobStatusResult.getJobName() + ", JobId : " + jobStatusResult.getJobId();
JobState currentState = null;
List<JobStatus> jobStatusList = jobModel.getJobStatuses();
if (jobStatusList != null && jobStatusList.size() > 0) {
JobStatus lastStatus = jobStatusList.get(0);
for (JobStatus temp : jobStatusList) {
if (temp.getTimeOfStateChange() >= lastStatus.getTimeOfStateChange()) {
lastStatus = temp;
}
}
currentState = lastStatus.getJobState();
}
// FIXME - What if non-authoritative email comes later (getting accumulated in the email account)
if (resultState == JobState.COMPLETE) {
if (jobStatusResult.isAuthoritative()) {
if (currentState != null && currentState == JobState.COMPLETE) {
jobMonitorMap.remove(jobStatusResult.getJobId());
runOutflowTasks = false;
log.info("[EJM]: Authoritative job Complete email received after early Airavata custom complete email," + " removed job from job monitoring. " + jobDetails);
} else {
jobMonitorMap.remove(jobStatusResult.getJobId());
runOutflowTasks = true;
jobStatus.setJobState(JobState.COMPLETE);
jobStatus.setReason("Complete email received");
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
log.info("[EJM]: Authoritative job Complete email received , removed job from job monitoring. " + jobDetails);
}
} else {
runOutflowTasks = true;
jobStatus.setJobState(JobState.COMPLETE);
jobStatus.setReason("Complete email received");
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
log.info("[EJM]: Non Authoritative Job Complete email received. " + jobDetails);
}
} else if (resultState == JobState.QUEUED) {
// scheduler
if (currentState != JobState.COMPLETE) {
// nothing special thing to do, update the status change to rabbit mq at the end of this method.
jobStatus.setJobState(JobState.QUEUED);
jobStatus.setReason("Queue email received");
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
log.info("[EJM]: Job Queued email received, " + jobDetails);
}
} else if (resultState == JobState.ACTIVE) {
// scheduler
if (currentState != JobState.COMPLETE) {
// nothing special thing to do, update the status change to rabbit mq at the end of this method.
jobStatus.setJobState(JobState.ACTIVE);
jobStatus.setReason("Active email received");
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
log.info("[EJM]: Job Active email received, " + jobDetails);
}
} else if (resultState == JobState.FAILED) {
// scheduler
if (currentState != JobState.COMPLETE) {
jobMonitorMap.remove(jobStatusResult.getJobId());
runOutflowTasks = true;
jobStatus.setJobState(JobState.FAILED);
jobStatus.setReason("Failed email received");
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
log.info("[EJM]: Job failed email received , removed job from job monitoring. " + jobDetails);
}
} else if (resultState == JobState.CANCELED) {
// scheduler
if (currentState != JobState.COMPLETE) {
jobMonitorMap.remove(jobStatusResult.getJobId());
jobStatus.setJobState(JobState.CANCELED);
jobStatus.setReason("Canceled email received");
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
log.info("[EJM]: Job canceled mail received, removed job from job monitoring. " + jobDetails);
// we run out flow and this will move process to cancel state.
runOutflowTasks = true;
}
}
if (jobStatus.getJobState() != null) {
try {
jobModel.setJobStatuses(Arrays.asList(jobStatus));
log.info("[EJM]: Publishing status changes to amqp. " + jobDetails);
GFacUtils.saveJobStatus(parentProcessContext, jobModel);
} catch (GFacException e) {
log.error("expId: {}, processId: {}, taskId: {}, jobId: {} :- Error while save and publishing Job " + "status {}", taskContext.getExperimentId(), taskContext.getProcessId(), jobModel.getTaskId(), jobModel.getJobId(), jobStatus.getJobState());
}
}
if (runOutflowTasks) {
log.info("[EJM]: Calling Out Handler chain of " + jobDetails);
try {
TaskStatus taskStatus = new TaskStatus(TaskState.COMPLETED);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
taskStatus.setReason("Job monitoring completed with final state: " + TaskState.COMPLETED.name());
taskContext.setTaskStatus(taskStatus);
GFacUtils.saveAndPublishTaskStatus(taskContext);
if (parentProcessContext.isCancel()) {
ProcessStatus processStatus = new ProcessStatus(ProcessState.CANCELLING);
processStatus.setReason("Process has been cancelled");
parentProcessContext.setProcessStatus(processStatus);
GFacUtils.saveAndPublishProcessStatus(parentProcessContext);
}
GFacThreadPoolExecutor.getCachedThreadPool().execute(new GFacWorker(parentProcessContext));
} catch (GFacException e) {
log.info("[EJM]: Error while running output tasks", e);
}
}
}
use of org.apache.airavata.model.job.JobModel in project airavata by apache.
the class ExperimentRegistry method getJobIds.
public List<String> getJobIds(String fieldName, Object value) throws RegistryException {
List<String> jobIds = new ArrayList<String>();
List<JobModel> jobs = getJobList(fieldName, value);
for (JobModel job : jobs) {
jobIds.add(job.getJobId());
}
return jobIds;
}
use of org.apache.airavata.model.job.JobModel in project airavata by apache.
the class RegistryServerHandler method getJobDetails.
/**
* Get Job Details for all the jobs within an Experiment.
* This method to be used when need to get the job details for one or many jobs of an Experiment.
*
* @param airavataExperimentId@return list of JobDetails
* Job details.
*/
@Override
public List<JobModel> getJobDetails(String airavataExperimentId) throws RegistryServiceException, TException {
try {
experimentCatalog = RegistryFactory.getDefaultExpCatalog();
if (!experimentCatalog.isExist(ExperimentCatalogModelType.EXPERIMENT, airavataExperimentId)) {
logger.error(airavataExperimentId, "Error while retrieving job details, experiment {} doesn't exist.", airavataExperimentId);
throw new ExperimentNotFoundException("Requested experiment id " + airavataExperimentId + " does not exist in the system..");
}
List<Object> processModels = experimentCatalog.get(ExperimentCatalogModelType.PROCESS, Constants.FieldConstants.ProcessConstants.EXPERIMENT_ID, airavataExperimentId);
List<JobModel> jobList = new ArrayList<>();
if (processModels != null && !processModels.isEmpty()) {
for (Object process : processModels) {
ProcessModel processModel = (ProcessModel) process;
List<TaskModel> tasks = processModel.getTasks();
if (tasks != null && !tasks.isEmpty()) {
for (TaskModel taskModel : tasks) {
String taskId = taskModel.getTaskId();
List<Object> jobs = experimentCatalog.get(ExperimentCatalogModelType.JOB, Constants.FieldConstants.JobConstants.TASK_ID, taskId);
for (Object jobObject : jobs) {
jobList.add((JobModel) jobObject);
}
}
}
}
}
logger.debug("Airavata retrieved job models for experiment with experiment id : " + airavataExperimentId);
return jobList;
} catch (Exception e) {
logger.error(airavataExperimentId, "Error while retrieving the job details", e);
RegistryServiceException exception = new RegistryServiceException();
exception.setMessage("Error while retrieving the job details. More info : " + e.getMessage());
throw exception;
}
}
use of org.apache.airavata.model.job.JobModel in project airavata by apache.
the class RegistryServerHandler method getDetailedExperimentTree.
/**
* Get Complete Experiment Details
* Fetch the completed nested tree structue of previously created experiment metadata which includes processes ->
* tasks -> jobs information.
*
* @param airavataExperimentId The identifier for the requested experiment. This is returned during the create experiment step.
* @return ExperimentModel
* This method will return the previously stored experiment metadata including application input parameters, computational resource scheduling
* information, special input output handling and additional quality of service parameters.
* @throws InvalidRequestException For any incorrect forming of the request itself.
* @throws ExperimentNotFoundException If the specified experiment is not previously created, then an Experiment Not Found Exception is thrown.
* @throws AiravataClientException The following list of exceptions are thrown which Airavata Client can take corrective actions to resolve:
* <p>
* UNKNOWN_GATEWAY_ID - If a Gateway is not registered with Airavata as a one time administrative
* step, then Airavata Registry will not have a provenance area setup. The client has to follow
* gateway registration steps and retry this request.
* <p>
* AUTHENTICATION_FAILURE - How Authentication will be implemented is yet to be determined.
* For now this is a place holder.
* <p>
* INVALID_AUTHORIZATION - This will throw an authorization exception. When a more robust security hand-shake
* is implemented, the authorization will be more substantial.
* @throws AiravataSystemException This exception will be thrown for any Airavata Server side issues and if the problem cannot be corrected by the client
* rather an Airavata Administrator will be notified to take corrective action.
*/
@Override
public ExperimentModel getDetailedExperimentTree(String airavataExperimentId) throws RegistryServiceException, TException {
try {
ExperimentModel experimentModel = getExperimentInternal(airavataExperimentId);
experimentCatalog = RegistryFactory.getDefaultExpCatalog();
List<Object> processObjects = experimentCatalog.get(ExperimentCatalogModelType.PROCESS, Constants.FieldConstants.ExperimentConstants.EXPERIMENT_ID, experimentModel.getExperimentId());
List<ProcessModel> processList = new ArrayList<>();
if (processObjects != null) {
processObjects.stream().forEach(p -> {
// Process already has the task object
((ProcessModel) p).getTasks().stream().forEach(t -> {
try {
List<Object> jobObjects = experimentCatalog.get(ExperimentCatalogModelType.JOB, Constants.FieldConstants.JobConstants.TASK_ID, ((TaskModel) t).getTaskId());
List<JobModel> jobList = new ArrayList<JobModel>();
if (jobObjects != null) {
jobObjects.stream().forEach(j -> jobList.add((JobModel) j));
Collections.sort(jobList, new Comparator<JobModel>() {
@Override
public int compare(JobModel o1, JobModel o2) {
return (int) (o1.getCreationTime() - o2.getCreationTime());
}
});
t.setJobs(jobList);
}
} catch (RegistryException e) {
logger.error(e.getMessage(), e);
}
});
processList.add((ProcessModel) p);
});
experimentModel.setProcesses(processList);
}
logger.debug("Airavata retrieved detailed experiment with experiment id : " + airavataExperimentId);
return experimentModel;
} catch (Exception e) {
logger.error("Error while retrieving the experiment", e);
RegistryServiceException exception = new RegistryServiceException();
exception.setMessage("Error while retrieving the experiment. More info : " + e.getMessage());
throw exception;
}
}
use of org.apache.airavata.model.job.JobModel in project airavata by apache.
the class GFacEngineImpl method populateProcessContext.
@Override
public ProcessContext populateProcessContext(String processId, String gatewayId, String tokenId) throws GFacException, CredentialStoreException {
// NOTE: Process context gives precedence to data come with process Computer resources;
ProcessContext processContext = null;
ProcessContext.ProcessContextBuilder builder = new ProcessContext.ProcessContextBuilder(processId, gatewayId, tokenId);
try {
AppCatalog appCatalog = Factory.getDefaultAppCatalog();
ExperimentCatalog expCatalog = Factory.getDefaultExpCatalog();
ProcessModel processModel = (ProcessModel) expCatalog.get(ExperimentCatalogModelType.PROCESS, processId);
builder.setAppCatalog(appCatalog).setExperimentCatalog(expCatalog).setCuratorClient(Factory.getCuratorClient()).setStatusPublisher(Factory.getStatusPublisher()).setProcessModel(processModel).setGatewayResourceProfile(appCatalog.getGatewayProfile().getGatewayProfile(gatewayId)).setGatewayComputeResourcePreference(appCatalog.getGatewayProfile().getComputeResourcePreference(gatewayId, processModel.getComputeResourceId())).setGatewayStorageResourcePreference(appCatalog.getGatewayProfile().getStoragePreference(gatewayId, processModel.getStorageResourceId()));
processContext = builder.build();
/* check point */
checkpoint(processContext);
if (processModel.isUseUserCRPref()) {
setUserResourceProfile(gatewayId, processContext);
setUserComputeResourcePreference(gatewayId, processContext);
}
String scratchLocation = processContext.getScratchLocation();
String workingDirectory = scratchLocation + File.separator + processId + File.separator;
StorageResourceDescription storageResource = appCatalog.getStorageResource().getStorageResource(processModel.getStorageResourceId());
if (storageResource != null) {
processContext.setStorageResource(storageResource);
} else {
// we need to fail the process which will fail the experiment
processContext.setProcessStatus(new ProcessStatus(ProcessState.FAILED));
GFacUtils.saveAndPublishProcessStatus(processContext);
throw new GFacException("expId: " + processModel.getExperimentId() + ", processId: " + processId + ":- Couldn't find storage resource for storage resource id :" + processModel.getStorageResourceId());
}
/* StorageResourceDescription storageResource = appCatalog.getStorageResource().getStorageResource(processModel.getStorageResourceId());
if (storageResource != null){
processContext.setStorageResource(storageResource);
}*/
processContext.setComputeResourceDescription(appCatalog.getComputeResource().getComputeResource(processContext.getComputeResourceId()));
processContext.setApplicationDeploymentDescription(appCatalog.getApplicationDeployment().getApplicationDeployement(processModel.getApplicationDeploymentId()));
ApplicationInterfaceDescription applicationInterface = appCatalog.getApplicationInterface().getApplicationInterface(processModel.getApplicationInterfaceId());
processContext.setApplicationInterfaceDescription(applicationInterface);
List<OutputDataObjectType> applicationOutputs = applicationInterface.getApplicationOutputs();
if (applicationOutputs != null && !applicationOutputs.isEmpty()) {
for (OutputDataObjectType outputDataObjectType : applicationOutputs) {
if (outputDataObjectType.getType().equals(DataType.STDOUT)) {
if (outputDataObjectType.getValue() == null || outputDataObjectType.getValue().equals("")) {
outputDataObjectType.setValue(workingDirectory + applicationInterface.getApplicationName() + ".stdout");
processContext.setStdoutLocation(workingDirectory + applicationInterface.getApplicationName() + ".stdout");
} else {
processContext.setStdoutLocation(outputDataObjectType.getValue());
}
}
if (outputDataObjectType.getType().equals(DataType.STDERR)) {
if (outputDataObjectType.getValue() == null || outputDataObjectType.getValue().equals("")) {
String stderrLocation = workingDirectory + applicationInterface.getApplicationName() + ".stderr";
outputDataObjectType.setValue(stderrLocation);
processContext.setStderrLocation(stderrLocation);
} else {
processContext.setStderrLocation(outputDataObjectType.getValue());
}
}
}
}
expCatalog.update(ExperimentCatalogModelType.PROCESS, processModel, processId);
processModel.setProcessOutputs(applicationOutputs);
if (processContext.getJobSubmissionProtocol() == JobSubmissionProtocol.UNICORE) {
// process monitor mode set in getResourceJobManager method, but unicore doesn't have resource job manager.
// hence we set process monitor mode here.
processContext.setMonitorMode(MonitorMode.FORK);
} else {
processContext.setResourceJobManager(getResourceJobManager(processContext));
processContext.setJobSubmissionRemoteCluster(Factory.getJobSubmissionRemoteCluster(processContext));
processContext.setDataMovementRemoteCluster(Factory.getDataMovementRemoteCluster(processContext));
}
String inputPath = ServerSettings.getLocalDataLocation();
if (inputPath != null) {
processContext.setLocalWorkingDir((inputPath.endsWith("/") ? inputPath : inputPath + "/") + processContext.getProcessId());
}
List<Object> jobModels = expCatalog.get(ExperimentCatalogModelType.JOB, "processId", processId);
if (jobModels != null && !jobModels.isEmpty()) {
if (jobModels.size() > 1) {
log.warn("Process has more than one job model, take first one");
}
processContext.setJobModel(((JobModel) jobModels.get(0)));
}
return processContext;
} catch (AppCatalogException e) {
String msg = "App catalog access exception ";
saveErrorModel(processContext, e, msg);
updateProcessFailure(processContext, msg);
throw new GFacException(msg, e);
} catch (RegistryException e) {
String msg = "Registry access exception";
saveErrorModel(processContext, e, msg);
updateProcessFailure(processContext, msg);
throw new GFacException(msg, e);
} catch (AiravataException e) {
String msg = "Remote cluster initialization error";
saveErrorModel(processContext, e, msg);
updateProcessFailure(processContext, msg);
throw new GFacException(msg, e);
}
}
Aggregations