use of org.gridlab.gat.resources.JobDescription in project compss by bsc-wdc.
the class GATJob method processMetricEvent.
// MetricListener interface implementation
@Override
public void processMetricEvent(MetricEvent value) {
Job job = (Job) value.getSource();
JobState newJobState = (JobState) value.getValue();
JobDescription jd = (JobDescription) job.getJobDescription();
SoftwareDescription sd = jd.getSoftwareDescription();
Integer jobId = (Integer) sd.getAttributes().get("jobId");
logger.debug("Processing job ID = " + jobId);
/*
* Check if either the job has finished or there has been a submission error. We don't care about other state
* transitions
*/
if (newJobState == JobState.STOPPED) {
if (Tracer.isActivated()) {
Integer slot = (Integer) sd.getAttributes().get("slot");
String host = getResourceNode().getHost();
Tracer.freeSlot(host, slot);
}
/*
* We must check whether the chosen adaptor is globus In that case, since globus doesn't provide the exit
* status of a job, we must examine the standard error file
*/
try {
if (usingGlobus) {
File errFile = sd.getStderr();
// Error file should always be in the same host as the IT
File localFile = GAT.createFile(context, errFile.toGATURI());
if (localFile.length() > 0) {
GATjob = null;
RUNNING_JOBS.remove(this);
ErrorManager.warn("Error when creating file.");
listener.jobFailed(this, JobEndStatus.EXECUTION_FAILED);
} else {
if (!debug) {
localFile.delete();
}
RUNNING_JOBS.remove(this);
listener.jobCompleted(this);
}
} else {
if (job.getExitStatus() == 0) {
RUNNING_JOBS.remove(this);
listener.jobCompleted(this);
} else {
GATjob = null;
RUNNING_JOBS.remove(this);
listener.jobFailed(this, JobEndStatus.EXECUTION_FAILED);
}
}
} catch (Exception e) {
ErrorManager.fatal(CALLBACK_PROCESSING_ERR + ": " + this, e);
}
} else if (newJobState == JobState.SUBMISSION_ERROR) {
if (Tracer.isActivated()) {
Integer slot = (Integer) sd.getAttributes().get("slot");
String host = getResourceNode().getHost();
Tracer.freeSlot(host, slot);
}
try {
if (usingGlobus && job.getInfo().get("resManError").equals("NO_ERROR")) {
RUNNING_JOBS.remove(this);
listener.jobCompleted(this);
} else {
GATjob = null;
RUNNING_JOBS.remove(this);
listener.jobFailed(this, JobEndStatus.SUBMISSION_FAILED);
}
} catch (GATInvocationException e) {
ErrorManager.fatal(CALLBACK_PROCESSING_ERR + ": " + this, e);
}
}
}
use of org.gridlab.gat.resources.JobDescription in project compss by bsc-wdc.
the class GATJob method submit.
@Override
public void submit() throws Exception {
// Prepare the job
logger.info("Submit GATJob with ID " + jobId);
JobDescription jobDescr = null;
jobDescr = prepareJob();
// Get a broker for the host
ResourceBroker broker = null;
String dest = (String) jobDescr.getResourceDescription().getResourceAttribute(RES_ATTR);
if ((broker = brokers.get(dest)) == null) {
broker = GAT.createResourceBroker(context, new URI(dest));
brokers.put(dest, broker);
}
// Submit the job, registering for notifications of job state
// transitions (associatedJM is the metric listener)
Job job = null;
try {
job = broker.submitJob(jobDescr, this, JOB_STATUS);
RUNNING_JOBS.add(this);
} catch (Exception e) {
if (Tracer.isActivated()) {
Tracer.freeSlot(((GATWorkerNode) worker.getNode()).getHost(), (Integer) jobDescr.getSoftwareDescription().getAttributes().get("slot"));
}
throw e;
}
// Update mapping
GATjob = job;
}
use of org.gridlab.gat.resources.JobDescription in project compss by bsc-wdc.
the class LsfResourceBrokerAdaptor method submitJob.
/*
* (non-Javadoc)
*
* @see org.gridlab.gat.resources.ResourceBroker#submitJob(org.gridlab.gat.resources.JobDescription)
*/
public Job submitJob(AbstractJobDescription abstractDescription, MetricListener listener, String metricDefinitionName) throws GATInvocationException {
if (!(abstractDescription instanceof JobDescription)) {
throw new GATInvocationException("can only handle JobDescriptions: " + abstractDescription.getClass());
}
JobDescription description = (JobDescription) abstractDescription;
SoftwareDescription sd = description.getSoftwareDescription();
if (sd == null) {
throw new GATInvocationException("The job description does not contain a software description");
}
if (description.getProcessCount() < 1) {
throw new GATInvocationException("Adaptor cannot handle: process count < 1: " + description.getProcessCount());
}
if (description.getResourceCount() != 1) {
throw new GATInvocationException("Adaptor cannot handle: resource count > 1: " + description.getResourceCount());
}
String home = System.getProperty("user.home");
if (home == null) {
throw new GATInvocationException("lsf broker could not get user home dir");
}
Sandbox sandbox = new Sandbox(gatContext, description, "localhost", home, true, true, false, false);
LsfJob lsfJob = new LsfJob(gatContext, description, sandbox);
Job job = null;
if (description instanceof WrapperJobDescription) {
WrapperJobCpi tmp = new WrapperJobCpi(gatContext, lsfJob, listener, metricDefinitionName);
listener = tmp;
job = tmp;
} else {
job = lsfJob;
}
if (listener != null && metricDefinitionName != null) {
Metric metric = lsfJob.getMetricDefinitionByName(metricDefinitionName).createMetric(null);
lsfJob.addMetricListener(listener, metric);
}
lsfJob.setState(Job.JobState.PRE_STAGING);
lsfJob.waitForTrigger(Job.JobState.PRE_STAGING);
sandbox.prestage();
String exe;
if (sandbox.getResolvedExecutable() != null) {
exe = sandbox.getResolvedExecutable().getPath();
// try to set the executable bit, it might be lost
/* CDIAZ: The command "exe" can be also in a remote host
* The command must have the right permissions in the remote host
try {
new CommandRunner("chmod", "+x", exe);
} catch (Throwable t) {
// ignore
}
*/
} else {
exe = getExecutable(description);
}
String[] args = getArgumentsArray(description);
// Directory where the lsf command will be executed
java.io.File f = new java.io.File(sandbox.getSandboxPath());
if (!f.exists()) {
throw new GATInvocationException("Unable to find directory " + f.getAbsolutePath());
}
// Check and set the environment for a blaunch command
Map<String, Object> env = sd.getEnvironment();
this.prepareBLaunchEnv(env);
// Encapsulate the original command into a blaunch command
String host = brokerURI.getHost();
String blExe = this.getBlaunchCommand();
String[] blArgs = this.getBlaunchArgs(host, exe, args);
ProcessBundle bundle = new ProcessBundle(description.getProcessCount(), blExe, blArgs, f, env);
lsfJob.setSubmissionTime();
lsfJob.setState(Job.JobState.SCHEDULED);
try {
lsfJob.setState(Job.JobState.RUNNING);
lsfJob.waitForTrigger(Job.JobState.RUNNING);
lsfJob.setStartTime();
bundle.startBundle();
lsfJob.setProcess(bundle);
} catch (IOException e) {
throw new CommandNotFoundException("LsfResourceBrokerAdaptor", e);
}
if (!sd.streamingStderrEnabled()) {
try {
if (sd.getStderr() != null) {
OutputStream err = GAT.createFileOutputStream(gatContext, sd.getStderr());
// to file
StreamForwarder forwarder = new StreamForwarder(bundle.getStderr(), err, sd.getExecutable() + " [stderr]");
lsfJob.setErrorStream(forwarder);
if (logger.isDebugEnabled()) {
logger.debug("Created stderr forwarder to file " + sd.getStderr());
}
} else {
// or throw it away
new StreamForwarder(bundle.getStderr(), null, sd.getExecutable() + " [stderr]");
}
} catch (GATObjectCreationException e) {
throw new GATInvocationException("Unable to create file output stream for stderr!", e);
}
}
if (!sd.streamingStdoutEnabled()) {
// read away the stdout
try {
if (sd.getStdout() != null) {
// to file
OutputStream out = GAT.createFileOutputStream(gatContext, sd.getStdout());
StreamForwarder forwarder = new StreamForwarder(bundle.getStdout(), out, sd.getExecutable() + " [stdout]");
lsfJob.setOutputStream(forwarder);
if (logger.isDebugEnabled()) {
logger.debug("Created stdout forwarder to file " + sd.getStdout());
}
} else {
// or throw it away
new StreamForwarder(bundle.getStdout(), null, sd.getExecutable() + " [stdout]");
}
} catch (GATObjectCreationException e) {
throw new GATInvocationException("Unable to create file output stream for stdout!", e);
}
}
if (!sd.streamingStdinEnabled() && sd.getStdin() != null) {
// forward the stdin from file
try {
InputStream in = GAT.createFileInputStream(gatContext, sd.getStdin());
bundle.setStdin(sd.getExecutable(), in);
} catch (GATObjectCreationException e) {
throw new GATInvocationException("Unable to create file input stream for stdin!", e);
}
}
lsfJob.monitorState();
return job;
}
use of org.gridlab.gat.resources.JobDescription in project compss by bsc-wdc.
the class SshLSFJob method getJobState.
protected void getJobState(String jobID) throws GATInvocationException {
synchronized (this) {
while (jobStateBusy) {
try {
wait();
} catch (InterruptedException e) {
// ignored
}
}
jobStateBusy = true;
}
JobState resultState;
try {
if (state == JobState.POST_STAGING || state == JobState.STOPPED || state == JobState.SUBMISSION_ERROR) {
return;
}
logger.debug("Getting task status in setState()");
// getting the status via ssh ... squeue
java.io.File squeueResultFile = null;
try {
// Create qstat job
SoftwareDescription sd = new SoftwareDescription();
// Use /bin/sh, so that $USER gets expanded.
sd.setExecutable("/bin/sh");
sd.setArguments("-c", "bjobs -noheader " + jobID + " | awk {' print $3 '}");
sd.addAttribute(SoftwareDescription.SANDBOX_USEROOT, "true");
squeueResultFile = java.io.File.createTempFile("GAT", "tmp");
try {
sd.setStdout(GAT.createFile(subContext, new URI("file:///" + squeueResultFile.getAbsolutePath().replace(File.separatorChar, '/'))));
} catch (Throwable e1) {
throw new GATInvocationException("Could not create GAT object for temporary " + squeueResultFile.getAbsolutePath(), e1);
}
JobDescription jd = new JobDescription(sd);
Job job = jobHelper.submitJob(jd, this, "job.status");
synchronized (job) {
while (job.getState() != Job.JobState.STOPPED && job.getState() != Job.JobState.SUBMISSION_ERROR) {
try {
job.wait();
} catch (InterruptedException e) {
// ignore
}
}
}
if (job.getState() != Job.JobState.STOPPED || job.getExitStatus() != 0) {
throw new GATInvocationException("Could not submit squeue job " + sd.toString());
}
// submit success.
BufferedReader in = new BufferedReader(new FileReader(squeueResultFile.getAbsolutePath()));
String status = in.readLine();
// or finished. Set to "" in this case. --Ceriel
if (status == null) {
status = "";
}
if (logger.isDebugEnabled()) {
logger.debug("squeue line: " + status);
}
resultState = mapLSFStatetoGAT(status);
} catch (IOException e) {
logger.debug("retrieving job status sshslurmjob failed", e);
throw new GATInvocationException("Unable to retrieve the Job Status", e);
} finally {
squeueResultFile.delete();
}
} finally {
synchronized (this) {
jobStateBusy = false;
notifyAll();
}
}
if (resultState != JobState.STOPPED) {
setState(resultState);
} else {
setState(JobState.POST_STAGING);
}
}
use of org.gridlab.gat.resources.JobDescription in project compss by bsc-wdc.
the class SshLsfResourceBrokerAdaptor method sshLsfSubmission.
/*private java.io.File createJobScript(JobDescription description)
throws GATInvocationException {
java.io.File temp;
SoftwareDescription sd = description.getSoftwareDescription();
try {
temp = java.io.File.createTempFile("lsf-sub", null);
} catch (IOException e) {
throw new GATInvocationException("Cannot create file", e);
}
PrintWriter job = null;
try {
job = new PrintWriter(new BufferedWriter(new FileWriter(temp)));
job.print("#!/bin/sh\n");
job.print("# job script\n");
// Support DIRECTORY
String dir = sd.getStringAttribute(SoftwareDescription.DIRECTORY, null);
if (dir != null) {
job.print("cd " + dir + "\n");
}
// Support environment.
Map<String, Object> env = sd.getEnvironment();
if (env != null) {
Set<String> s = env.keySet();
Object[] keys = s.toArray();
for (int i = 0; i < keys.length; i++) {
String val = (String) env.get(keys[i]);
job.print(keys[i] + "=" + val + " && export " + keys[i] + "\n");
}
}
// Construct command.
StringBuffer cmd = new StringBuffer();
cmd.append(sd.getExecutable().toString());
if (sd.getArguments() != null) {
String[] args = sd.getArguments();
for (int i = 0; i < args.length; ++i) {
cmd.append(" ");
cmd.append(args[i]);
}
}
job.print(cmd.toString() + "\n");
//job.print("exit $?\n");
} catch (Throwable e) {
throw new GATInvocationException(
"Cannot create temporary job script file "
+ temp.getAbsolutePath(), e);
} finally {
if (job != null)
job.close();
}
return temp;
}*/
private String sshLsfSubmission(SshLSFJob lsfJob, JobDescription description, java.io.File bsubFile, ResourceBroker subBroker, Sandbox sandbox) throws GATInvocationException {
java.io.File slurmResultFile = null;
try {
if (logger.isDebugEnabled()) {
logger.debug("***** Doing sandbox prestage " + sandbox.getSandboxPath());
}
sandbox.prestage();
if (logger.isDebugEnabled()) {
logger.debug("***** Sandbox prestage done " + sandbox.getSandboxPath());
}
// Create sbatch job
SoftwareDescription sd = new SoftwareDescription();
sd.setExecutable("sh");
sd.setArguments("-c", "bsub < " + bsubFile.getName() + " 2>submit.err");
// + " && rm -rf " + bsubFile.getName() + " submit.err");
sd.setAttributes(description.getSoftwareDescription().getAttributes());
sd.addAttribute(SoftwareDescription.SANDBOX_USEROOT, "true");
slurmResultFile = java.io.File.createTempFile("GAT", "tmp");
try {
sd.setStdout(GAT.createFile(gatContext, new URI(slurmResultFile.toURI())));
sd.addPreStagedFile(GAT.createFile(gatContext, new URI(bsubFile.toURI())));
} catch (Throwable e1) {
try {
sandbox.removeSandboxDir();
} catch (Throwable e) {
// ignore
}
throw new GATInvocationException("Could not create GAT object for temporary " + slurmResultFile.getAbsolutePath(), e1);
}
// sd.addAttribute(SoftwareDescription.DIRECTORY, sd.getStringAttribute(SoftwareDescription, defaultVal)));
JobDescription jd = new JobDescription(sd);
if (logger.isDebugEnabled()) {
logger.debug("Submitting lsf job: " + sd);
}
Job job = subBroker.submitJob(jd, this, "job.status");
if (logger.isDebugEnabled()) {
logger.debug("Job submitted.");
}
synchronized (job) {
while (job.getState() != Job.JobState.STOPPED && job.getState() != Job.JobState.SUBMISSION_ERROR) {
try {
job.wait();
} catch (InterruptedException e) {
// ignore
}
}
}
if (job.getState() != Job.JobState.STOPPED || job.getExitStatus() != 0) {
try {
sandbox.removeSandboxDir();
} catch (Throwable e) {
// ignore
}
logger.debug("jobState = " + job.getState() + ", exit status = " + job.getExitStatus());
throw new GATInvocationException("Could not submit LSF job");
}
// submit success.
BufferedReader in = new BufferedReader(new FileReader(slurmResultFile.getAbsolutePath()));
String result = in.readLine();
if (logger.isDebugEnabled()) {
logger.debug("bsub result line = " + result);
}
// Check for LSF bsub result ...
// TODO Check if LSF return the same
String job_prefix = "Job <";
if (result.contains(job_prefix)) {
int i = result.indexOf(job_prefix);
result = result.substring(i + job_prefix.length(), result.indexOf(">", i));
}
return result;
} catch (IOException e) {
try {
sandbox.removeSandboxDir();
} catch (Throwable e1) {
// ignore
}
throw new GATInvocationException("Got IOException", e);
} finally {
slurmResultFile.delete();
bsubFile.delete();
}
}
Aggregations