use of org.apache.airavata.model.commons.ErrorModel in project airavata by apache.
the class GFacEngineImpl method configureWorkspace.
private boolean configureWorkspace(TaskContext taskContext, boolean recover) throws GFacException {
try {
EnvironmentSetupTaskModel subTaskModel = (EnvironmentSetupTaskModel) taskContext.getSubTaskModel();
Task envSetupTask = null;
if (subTaskModel.getProtocol() == SecurityProtocol.SSH_KEYS || subTaskModel.getProtocol() == SecurityProtocol.LOCAL) {
envSetupTask = new EnvironmentSetupTask();
} else {
throw new GFacException("Unsupported security protocol, Airavata doesn't support " + subTaskModel.getProtocol().name() + " protocol yet.");
}
TaskStatus status = new TaskStatus(TaskState.EXECUTING);
status.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
taskContext.setTaskStatus(status);
GFacUtils.saveAndPublishTaskStatus(taskContext);
TaskStatus taskStatus = executeTask(taskContext, envSetupTask, recover);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
taskContext.setTaskStatus(taskStatus);
GFacUtils.saveAndPublishTaskStatus(taskContext);
if (taskStatus.getState() == TaskState.FAILED) {
log.error("expId: {}, processId: {}, taskId: {} type: {},:- Input staging failed, " + "reason:" + " {}", taskContext.getParentProcessContext().getExperimentId(), taskContext.getParentProcessContext().getProcessId(), taskContext.getTaskId(), envSetupTask.getType().name(), taskStatus.getReason());
ProcessContext processContext = taskContext.getParentProcessContext();
String errorMsg = new StringBuilder("expId: ").append(processContext.getExperimentId()).append(", processId: ").append(processContext.getProcessId()).append(", taskId: ").append(taskContext.getTaskId()).append(", type: ").append(taskContext.getTaskType().name()).append(" :- Environment Setup failed. Reason: ").append(taskStatus.getReason()).toString();
ErrorModel errorModel = new ErrorModel();
errorModel.setUserFriendlyMessage("Error while environment setup");
errorModel.setActualErrorMessage(errorMsg);
GFacUtils.saveTaskError(taskContext, errorModel);
throw new GFacException("Error while environment setup");
}
} catch (TException e) {
throw new GFacException("Couldn't get environment setup task model", e);
}
return false;
}
use of org.apache.airavata.model.commons.ErrorModel in project airavata by apache.
the class GFacEngineImpl method checkFailures.
private void checkFailures(TaskContext taskContext, TaskStatus taskStatus, Task task) throws GFacException {
if (taskStatus.getState() == TaskState.FAILED) {
log.error("expId: {}, processId: {}, taskId: {} type: {},:- " + task.getType().toString() + " failed, " + "reason:" + " {}", taskContext.getParentProcessContext().getExperimentId(), taskContext.getParentProcessContext().getProcessId(), taskContext.getTaskId(), task.getType().name(), taskStatus.getReason());
String errorMsg = new StringBuilder("expId: ").append(taskContext.getParentProcessContext().getExperimentId()).append(", processId: ").append(taskContext.getParentProcessContext().getProcessId()).append(", taskId: ").append(taskContext.getTaskId()).append(", type: ").append(taskContext.getTaskType().name()).append(" :- " + task.getType().toString() + " failed. Reason: ").append(taskStatus.getReason()).toString();
ErrorModel errorModel = new ErrorModel();
errorModel.setUserFriendlyMessage("Error while executing " + task.getType() + " task");
errorModel.setActualErrorMessage(errorMsg);
GFacUtils.saveTaskError(taskContext, errorModel);
throw new GFacException("Error: userFriendly msg :" + errorModel.getUserFriendlyMessage() + ", actual msg :" + errorModel.getActualErrorMessage());
}
}
use of org.apache.airavata.model.commons.ErrorModel in project airavata by apache.
the class GFacWorker method run.
@Override
public void run() {
try {
ProcessState processState = processContext.getProcessState();
switch(processState) {
case CREATED:
case VALIDATED:
case STARTED:
executeProcess();
break;
case PRE_PROCESSING:
case CONFIGURING_WORKSPACE:
case INPUT_DATA_STAGING:
case EXECUTING:
case MONITORING:
case OUTPUT_DATA_STAGING:
case POST_PROCESSING:
if (continueTaskFlow) {
continueTaskExecution();
} else {
recoverProcess();
}
break;
case COMPLETED:
completeProcess();
break;
case CANCELLING:
cancelProcess();
break;
case CANCELED:
// TODO - implement cancel scenario
break;
case FAILED:
// TODO - implement failed scenario
break;
default:
throw new GFacException("process Id : " + processId + " Couldn't identify process type");
}
if (processContext.isCancel()) {
processState = processContext.getProcessState();
switch(processState) {
case MONITORING:
case EXECUTING:
// don't send ack if the process is in MONITORING or EXECUTING states, wait until cancel email comes to airavata
break;
case CANCELLING:
cancelProcess();
break;
default:
sendAck();
Factory.getGfacContext().removeProcess(processContext.getProcessId());
break;
}
}
} catch (GFacException e) {
log.error("GFac Worker throws an exception", e);
ProcessStatus status = new ProcessStatus(ProcessState.FAILED);
status.setReason(e.getMessage());
status.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
processContext.setProcessStatus(status);
StringWriter errors = new StringWriter();
e.printStackTrace(new PrintWriter(errors));
ErrorModel errorModel = new ErrorModel();
errorModel.setUserFriendlyMessage("GFac Worker throws an exception");
errorModel.setActualErrorMessage(errors.toString());
errorModel.setCreationTime(AiravataUtils.getCurrentTimestamp().getTime());
try {
GFacUtils.saveAndPublishProcessStatus(processContext);
GFacUtils.saveExperimentError(processContext, errorModel);
GFacUtils.saveProcessError(processContext, errorModel);
} catch (GFacException e1) {
log.error("expId: {}, processId: {} :- Couldn't save and publish process status {}", processContext.getExperimentId(), processContext.getProcessId(), processContext.getProcessState());
}
sendAck();
}
}
use of org.apache.airavata.model.commons.ErrorModel in project airavata by apache.
the class DefaultJobSubmissionTask method execute.
@Override
public TaskStatus execute(TaskContext taskContext) {
// set to completed.
TaskStatus taskStatus = new TaskStatus(TaskState.COMPLETED);
try {
ProcessContext processContext = taskContext.getParentProcessContext();
JobModel jobModel = processContext.getJobModel();
jobModel.setTaskId(taskContext.getTaskId());
RemoteCluster remoteCluster = processContext.getJobSubmissionRemoteCluster();
GroovyMap groovyMap = GFacUtils.createGroovyMap(processContext, taskContext);
groovyMap.getStringValue(Script.JOB_NAME).ifPresent(jobName -> jobModel.setJobName(jobName));
ResourceJobManager resourceJobManager = GFacUtils.getResourceJobManager(processContext);
JobManagerConfiguration jConfig = null;
if (resourceJobManager != null) {
jConfig = Factory.getJobManagerConfiguration(resourceJobManager);
}
JobStatus jobStatus = new JobStatus();
File jobFile = GFacUtils.createJobFile(groovyMap, taskContext, jConfig);
if (jobFile != null && jobFile.exists()) {
jobModel.setJobDescription(FileUtils.readFileToString(jobFile));
JobSubmissionOutput jobSubmissionOutput = remoteCluster.submitBatchJob(jobFile.getPath(), processContext.getWorkingDir());
int exitCode = jobSubmissionOutput.getExitCode();
jobModel.setExitCode(exitCode);
jobModel.setStdErr(jobSubmissionOutput.getStdErr());
jobModel.setStdOut(jobSubmissionOutput.getStdOut());
String jobId = jobSubmissionOutput.getJobId();
String experimentId = taskContext.getExperimentId();
if (exitCode != 0 || jobSubmissionOutput.isJobSubmissionFailed()) {
jobModel.setJobId(DEFAULT_JOB_ID);
if (jobSubmissionOutput.isJobSubmissionFailed()) {
List<JobStatus> statusList = new ArrayList<>();
statusList.add(new JobStatus(JobState.FAILED));
statusList.get(0).setReason(jobSubmissionOutput.getFailureReason());
jobModel.setJobStatuses(statusList);
GFacUtils.saveJobModel(processContext, jobModel);
log.error("expId: {}, processid: {}, taskId: {} :- Job submission failed for job name {}", experimentId, taskContext.getProcessId(), taskContext.getTaskId(), jobModel.getJobName());
ErrorModel errorModel = new ErrorModel();
errorModel.setUserFriendlyMessage(jobSubmissionOutput.getFailureReason());
errorModel.setActualErrorMessage(jobSubmissionOutput.getFailureReason());
GFacUtils.saveExperimentError(processContext, errorModel);
GFacUtils.saveProcessError(processContext, errorModel);
GFacUtils.saveTaskError(taskContext, errorModel);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason("Job submission command didn't return a jobId");
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
taskContext.setTaskStatus(taskStatus);
} else {
String msg;
GFacUtils.saveJobModel(processContext, jobModel);
ErrorModel errorModel = new ErrorModel();
if (exitCode != Integer.MIN_VALUE) {
msg = "expId:" + processContext.getProcessModel().getExperimentId() + ", processId:" + processContext.getProcessId() + ", taskId: " + taskContext.getTaskId() + " return non zero exit code:" + exitCode + " for JobName:" + jobModel.getJobName() + ", with failure reason : " + jobSubmissionOutput.getFailureReason() + " Hence changing job state to Failed.";
errorModel.setActualErrorMessage(jobSubmissionOutput.getFailureReason());
} else {
msg = "expId:" + processContext.getProcessModel().getExperimentId() + ", processId:" + processContext.getProcessId() + ", taskId: " + taskContext.getTaskId() + " doesn't return valid job submission exit code for JobName:" + jobModel.getJobName() + ", with failure reason : stdout ->" + jobSubmissionOutput.getStdOut() + " stderr -> " + jobSubmissionOutput.getStdErr() + " Hence changing job state to Failed.";
errorModel.setActualErrorMessage(msg);
}
log.error(msg);
errorModel.setUserFriendlyMessage(msg);
GFacUtils.saveExperimentError(processContext, errorModel);
GFacUtils.saveProcessError(processContext, errorModel);
GFacUtils.saveTaskError(taskContext, errorModel);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
taskContext.setTaskStatus(taskStatus);
}
try {
GFacUtils.saveAndPublishTaskStatus(taskContext);
} catch (GFacException e) {
log.error("Error while saving task status", e);
}
return taskStatus;
} else if (jobId != null && !jobId.isEmpty()) {
jobModel.setJobId(jobId);
GFacUtils.saveJobModel(processContext, jobModel);
jobStatus.setJobState(JobState.SUBMITTED);
ComputeResourceDescription computeResourceDescription = taskContext.getParentProcessContext().getComputeResourceDescription();
jobStatus.setReason("Successfully Submitted to " + computeResourceDescription.getHostName());
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
jobModel.setJobStatuses(Arrays.asList(jobStatus));
GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
if (verifyJobSubmissionByJobId(remoteCluster, jobId)) {
jobStatus.setJobState(JobState.QUEUED);
jobStatus.setReason("Verification step succeeded");
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
jobModel.setJobStatuses(Arrays.asList(jobStatus));
GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
}
// doing gateway reporting
if (computeResourceDescription.isGatewayUsageReporting()) {
String loadCommand = computeResourceDescription.getGatewayUsageModuleLoadCommand();
String usageExecutable = computeResourceDescription.getGatewayUsageExecutable();
ExperimentModel experiment = (ExperimentModel) taskContext.getParentProcessContext().getExperimentCatalog().get(ExperimentCatalogModelType.EXPERIMENT, experimentId);
String username = experiment.getUserName() + "@" + taskContext.getParentProcessContext().getUsageReportingGatewayId();
RawCommandInfo rawCommandInfo = new RawCommandInfo(loadCommand + " && " + usageExecutable + " -gateway_user " + username + " -submit_time \"`date '+%F %T %:z'`\" -jobid " + jobId);
remoteCluster.execute(rawCommandInfo);
}
taskStatus = new TaskStatus(TaskState.COMPLETED);
taskStatus.setReason("Submitted job to compute resource");
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
} else {
int verificationTryCount = 0;
while (verificationTryCount++ < 3) {
String verifyJobId = verifyJobSubmission(remoteCluster, jobModel);
if (verifyJobId != null && !verifyJobId.isEmpty()) {
// JobStatus either changed from SUBMITTED to QUEUED or directly to QUEUED
jobId = verifyJobId;
jobModel.setJobId(jobId);
GFacUtils.saveJobModel(processContext, jobModel);
jobStatus.setJobState(JobState.QUEUED);
jobStatus.setReason("Verification step succeeded");
jobStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
jobModel.setJobStatuses(Arrays.asList(jobStatus));
GFacUtils.saveJobStatus(taskContext.getParentProcessContext(), jobModel);
taskStatus.setState(TaskState.COMPLETED);
taskStatus.setReason("Submitted job to compute resource");
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
break;
}
log.info("Verify step return invalid jobId, retry verification step in {} secs", verificationTryCount * 10);
Thread.sleep(verificationTryCount * 10000);
}
}
if (jobId == null || jobId.isEmpty()) {
jobModel.setJobId(DEFAULT_JOB_ID);
GFacUtils.saveJobModel(processContext, jobModel);
String msg = "expId:" + processContext.getProcessModel().getExperimentId() + " Couldn't find " + "remote jobId for JobName:" + jobModel.getJobName() + ", both submit and verify steps " + "doesn't return a valid JobId. " + "Hence changing experiment state to Failed";
log.error(msg);
ErrorModel errorModel = new ErrorModel();
errorModel.setUserFriendlyMessage(msg);
errorModel.setActualErrorMessage(msg);
GFacUtils.saveExperimentError(processContext, errorModel);
GFacUtils.saveProcessError(processContext, errorModel);
GFacUtils.saveTaskError(taskContext, errorModel);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason("Couldn't find job id in both submitted and verified steps");
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
} else {
GFacUtils.saveJobModel(processContext, jobModel);
}
} else {
taskStatus.setState(TaskState.FAILED);
if (jobFile == null) {
taskStatus.setReason("JobFile is null");
} else {
taskStatus.setReason("Job file doesn't exist");
}
}
} catch (AppCatalogException e) {
String msg = "Error while instantiating app catalog";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (ApplicationSettingsException e) {
String msg = "Error occurred while creating job descriptor";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (GFacException e) {
String msg = "Error occurred while submitting the job";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (IOException e) {
String msg = "Error while reading the content of the job file";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (InterruptedException e) {
String msg = "Error occurred while verifying the job submission";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
} catch (Throwable e) {
String msg = "JobSubmission failed";
log.error(msg, e);
taskStatus.setState(TaskState.FAILED);
taskStatus.setReason(msg);
taskStatus.setTimeOfStateChange(AiravataUtils.getCurrentTimestamp().getTime());
ErrorModel errorModel = new ErrorModel();
errorModel.setActualErrorMessage(e.getMessage());
errorModel.setUserFriendlyMessage(msg);
taskContext.getTaskModel().setTaskErrors(Arrays.asList(errorModel));
}
taskContext.setTaskStatus(taskStatus);
try {
GFacUtils.saveAndPublishTaskStatus(taskContext);
} catch (GFacException e) {
log.error("Error while saving task status", e);
}
return taskStatus;
}
use of org.apache.airavata.model.commons.ErrorModel in project airavata by apache.
the class ExperimentExecution method processMessage.
private void processMessage(MessageContext message) {
if (message.getType().equals(MessageType.EXPERIMENT)) {
try {
ExperimentStatusChangeEvent event = new ExperimentStatusChangeEvent();
TBase messageEvent = message.getEvent();
byte[] bytes = ThriftUtils.serializeThriftObject(messageEvent);
ThriftUtils.createThriftFromBytes(bytes, event);
ExperimentState expState = event.getState();
String expId = event.getExperimentId();
String gatewayId = event.getGatewayId();
if (expState.equals(ExperimentState.COMPLETED)) {
resultWriter.println("Results for experiment : " + expId + " of gateway Id : " + gatewayId);
resultWriter.println("=====================================================================");
resultWriter.println("Status : " + ExperimentState.COMPLETED.toString());
// check file transfers
List<OutputDataObjectType> experimentOutputs = airavata.getExperimentOutputs(authzToken, expId);
int i = 1;
for (OutputDataObjectType output : experimentOutputs) {
System.out.println("################ Experiment : " + expId + " COMPLETES ###################");
System.out.println("Output " + i + " : " + output.getValue());
resultWriter.println("Output " + i + " : " + output.getValue());
i++;
}
resultWriter.println("End of Results for Experiment : " + expId);
resultWriter.println("=====================================================================");
} else if (expState.equals(ExperimentState.FAILED)) {
resultWriter.println("Results for experiment : " + expId + " of gateway Id : " + gatewayId);
resultWriter.println("=====================================================================");
int j = 1;
resultWriter.println("Status : " + ExperimentState.FAILED.toString());
System.out.println("################ Experiment : " + expId + " FAILED ###################");
ExperimentModel experiment = airavata.getExperiment(authzToken, expId);
List<ErrorModel> errors = experiment.getErrors();
if (errors != null && !errors.isEmpty()) {
for (ErrorModel errorDetails : errors) {
System.out.println(errorDetails.getActualErrorMessage());
resultWriter.println("Actual Error : " + j + " : " + errorDetails.getActualErrorMessage());
resultWriter.println("User Friendly Message : " + j + " : " + errorDetails.getUserFriendlyMessage());
}
}
resultWriter.println("End of Results for Experiment : " + expId);
resultWriter.println("=====================================================================");
}
} catch (TException e) {
logger.error(e.getMessage(), e);
}
} else if (message.getType().equals(MessageType.JOB)) {
try {
JobStatusChangeEvent event = new JobStatusChangeEvent();
TBase messageEvent = message.getEvent();
byte[] bytes = ThriftUtils.serializeThriftObject(messageEvent);
ThriftUtils.createThriftFromBytes(bytes, event);
} catch (TException e) {
logger.error(e.getMessage(), e);
}
}
resultWriter.flush();
}
Aggregations