use of org.gridlab.gat.GATInvocationException in project compss by bsc-wdc.
the class LsfJob method waitForTrigger.
// Wait for the creation of a special file (by the application).
void waitForTrigger(JobState state) throws GATInvocationException {
if (triggerDirectory == null) {
return;
}
if (jobName == null) {
return;
}
if (waiter == null) {
try {
waiter = FileWaiter.createFileWaiter(GAT.createFile(gatContext, triggerDirectory));
} catch (GATObjectCreationException e) {
throw new GATInvocationException("Could not create", e);
}
}
String filename = jobName + "." + state.toString().substring(0, 3);
File file;
try {
file = GAT.createFile(gatContext, triggerDirectory + "/" + filename);
} catch (GATObjectCreationException e) {
throw new GATInvocationException("Could not create");
}
if (logger.isDebugEnabled()) {
logger.debug("Waiting for " + filename + " in directory " + triggerDirectory);
}
waiter.waitFor(filename);
if (logger.isDebugEnabled()) {
logger.debug("Finished waiting for " + filename + " in directory " + triggerDirectory);
}
synchronized (this.getClass()) {
if (!file.delete()) {
if (logger.isDebugEnabled()) {
logger.debug("Could not remove " + file.toGATURI());
}
}
}
}
use of org.gridlab.gat.GATInvocationException in project compss by bsc-wdc.
the class LsfResourceBrokerAdaptor method submitJob.
/*
* (non-Javadoc)
*
* @see org.gridlab.gat.resources.ResourceBroker#submitJob(org.gridlab.gat.resources.JobDescription)
*/
public Job submitJob(AbstractJobDescription abstractDescription, MetricListener listener, String metricDefinitionName) throws GATInvocationException {
if (!(abstractDescription instanceof JobDescription)) {
throw new GATInvocationException("can only handle JobDescriptions: " + abstractDescription.getClass());
}
JobDescription description = (JobDescription) abstractDescription;
SoftwareDescription sd = description.getSoftwareDescription();
if (sd == null) {
throw new GATInvocationException("The job description does not contain a software description");
}
if (description.getProcessCount() < 1) {
throw new GATInvocationException("Adaptor cannot handle: process count < 1: " + description.getProcessCount());
}
if (description.getResourceCount() != 1) {
throw new GATInvocationException("Adaptor cannot handle: resource count > 1: " + description.getResourceCount());
}
String home = System.getProperty("user.home");
if (home == null) {
throw new GATInvocationException("lsf broker could not get user home dir");
}
Sandbox sandbox = new Sandbox(gatContext, description, "localhost", home, true, true, false, false);
LsfJob lsfJob = new LsfJob(gatContext, description, sandbox);
Job job = null;
if (description instanceof WrapperJobDescription) {
WrapperJobCpi tmp = new WrapperJobCpi(gatContext, lsfJob, listener, metricDefinitionName);
listener = tmp;
job = tmp;
} else {
job = lsfJob;
}
if (listener != null && metricDefinitionName != null) {
Metric metric = lsfJob.getMetricDefinitionByName(metricDefinitionName).createMetric(null);
lsfJob.addMetricListener(listener, metric);
}
lsfJob.setState(Job.JobState.PRE_STAGING);
lsfJob.waitForTrigger(Job.JobState.PRE_STAGING);
sandbox.prestage();
String exe;
if (sandbox.getResolvedExecutable() != null) {
exe = sandbox.getResolvedExecutable().getPath();
// try to set the executable bit, it might be lost
/* CDIAZ: The command "exe" can be also in a remote host
* The command must have the right permissions in the remote host
try {
new CommandRunner("chmod", "+x", exe);
} catch (Throwable t) {
// ignore
}
*/
} else {
exe = getExecutable(description);
}
String[] args = getArgumentsArray(description);
// Directory where the lsf command will be executed
java.io.File f = new java.io.File(sandbox.getSandboxPath());
if (!f.exists()) {
throw new GATInvocationException("Unable to find directory " + f.getAbsolutePath());
}
// Check and set the environment for a blaunch command
Map<String, Object> env = sd.getEnvironment();
this.prepareBLaunchEnv(env);
// Encapsulate the original command into a blaunch command
String host = brokerURI.getHost();
String blExe = this.getBlaunchCommand();
String[] blArgs = this.getBlaunchArgs(host, exe, args);
ProcessBundle bundle = new ProcessBundle(description.getProcessCount(), blExe, blArgs, f, env);
lsfJob.setSubmissionTime();
lsfJob.setState(Job.JobState.SCHEDULED);
try {
lsfJob.setState(Job.JobState.RUNNING);
lsfJob.waitForTrigger(Job.JobState.RUNNING);
lsfJob.setStartTime();
bundle.startBundle();
lsfJob.setProcess(bundle);
} catch (IOException e) {
throw new CommandNotFoundException("LsfResourceBrokerAdaptor", e);
}
if (!sd.streamingStderrEnabled()) {
try {
if (sd.getStderr() != null) {
OutputStream err = GAT.createFileOutputStream(gatContext, sd.getStderr());
// to file
StreamForwarder forwarder = new StreamForwarder(bundle.getStderr(), err, sd.getExecutable() + " [stderr]");
lsfJob.setErrorStream(forwarder);
if (logger.isDebugEnabled()) {
logger.debug("Created stderr forwarder to file " + sd.getStderr());
}
} else {
// or throw it away
new StreamForwarder(bundle.getStderr(), null, sd.getExecutable() + " [stderr]");
}
} catch (GATObjectCreationException e) {
throw new GATInvocationException("Unable to create file output stream for stderr!", e);
}
}
if (!sd.streamingStdoutEnabled()) {
// read away the stdout
try {
if (sd.getStdout() != null) {
// to file
OutputStream out = GAT.createFileOutputStream(gatContext, sd.getStdout());
StreamForwarder forwarder = new StreamForwarder(bundle.getStdout(), out, sd.getExecutable() + " [stdout]");
lsfJob.setOutputStream(forwarder);
if (logger.isDebugEnabled()) {
logger.debug("Created stdout forwarder to file " + sd.getStdout());
}
} else {
// or throw it away
new StreamForwarder(bundle.getStdout(), null, sd.getExecutable() + " [stdout]");
}
} catch (GATObjectCreationException e) {
throw new GATInvocationException("Unable to create file output stream for stdout!", e);
}
}
if (!sd.streamingStdinEnabled() && sd.getStdin() != null) {
// forward the stdin from file
try {
InputStream in = GAT.createFileInputStream(gatContext, sd.getStdin());
bundle.setStdin(sd.getExecutable(), in);
} catch (GATObjectCreationException e) {
throw new GATInvocationException("Unable to create file input stream for stdin!", e);
}
}
lsfJob.monitorState();
return job;
}
use of org.gridlab.gat.GATInvocationException in project compss by bsc-wdc.
the class SshLSFJob method getJobState.
protected void getJobState(String jobID) throws GATInvocationException {
synchronized (this) {
while (jobStateBusy) {
try {
wait();
} catch (InterruptedException e) {
// ignored
}
}
jobStateBusy = true;
}
JobState resultState;
try {
if (state == JobState.POST_STAGING || state == JobState.STOPPED || state == JobState.SUBMISSION_ERROR) {
return;
}
logger.debug("Getting task status in setState()");
// getting the status via ssh ... squeue
java.io.File squeueResultFile = null;
try {
// Create qstat job
SoftwareDescription sd = new SoftwareDescription();
// Use /bin/sh, so that $USER gets expanded.
sd.setExecutable("/bin/sh");
sd.setArguments("-c", "bjobs -noheader " + jobID + " | awk {' print $3 '}");
sd.addAttribute(SoftwareDescription.SANDBOX_USEROOT, "true");
squeueResultFile = java.io.File.createTempFile("GAT", "tmp");
try {
sd.setStdout(GAT.createFile(subContext, new URI("file:///" + squeueResultFile.getAbsolutePath().replace(File.separatorChar, '/'))));
} catch (Throwable e1) {
throw new GATInvocationException("Could not create GAT object for temporary " + squeueResultFile.getAbsolutePath(), e1);
}
JobDescription jd = new JobDescription(sd);
Job job = jobHelper.submitJob(jd, this, "job.status");
synchronized (job) {
while (job.getState() != Job.JobState.STOPPED && job.getState() != Job.JobState.SUBMISSION_ERROR) {
try {
job.wait();
} catch (InterruptedException e) {
// ignore
}
}
}
if (job.getState() != Job.JobState.STOPPED || job.getExitStatus() != 0) {
throw new GATInvocationException("Could not submit squeue job " + sd.toString());
}
// submit success.
BufferedReader in = new BufferedReader(new FileReader(squeueResultFile.getAbsolutePath()));
String status = in.readLine();
// or finished. Set to "" in this case. --Ceriel
if (status == null) {
status = "";
}
if (logger.isDebugEnabled()) {
logger.debug("squeue line: " + status);
}
resultState = mapLSFStatetoGAT(status);
} catch (IOException e) {
logger.debug("retrieving job status sshslurmjob failed", e);
throw new GATInvocationException("Unable to retrieve the Job Status", e);
} finally {
squeueResultFile.delete();
}
} finally {
synchronized (this) {
jobStateBusy = false;
notifyAll();
}
}
if (resultState != JobState.STOPPED) {
setState(resultState);
} else {
setState(JobState.POST_STAGING);
}
}
use of org.gridlab.gat.GATInvocationException in project compss by bsc-wdc.
the class FileExample method start.
public void start(URI uri1, URI uri2, URI uri3) {
File file1 = null;
try {
file1 = GAT.createFile(uri1);
} catch (GATObjectCreationException e) {
System.err.println("failed to create file1 at location '" + uri1 + "': " + e);
return;
}
try {
file1.copy(uri2);
System.out.println("file1 at location '" + uri1 + "' copied to file2 at location '" + uri2 + "'");
} catch (GATInvocationException e) {
System.err.println("failed to copy file1 at location '" + uri1 + "' to file2 at location '" + uri2 + "': " + e);
return;
}
file1.delete();
System.out.println("file1 at location '" + uri1 + "' deleted");
File file2 = null;
try {
file2 = GAT.createFile(uri2);
} catch (GATObjectCreationException e) {
System.err.println("failed to create file2 at location '" + uri2 + "': " + e);
return;
}
try {
file2.move(uri3);
System.out.println("file2 at location '" + uri2 + "' moved to file3 at location '" + uri3 + "'");
} catch (GATInvocationException e) {
System.err.println("failed to move file2 at location '" + uri2 + "' to file3 at location '" + uri3 + "': " + e);
return;
}
}
use of org.gridlab.gat.GATInvocationException in project compss by bsc-wdc.
the class SshLsfResourceBrokerAdaptor method createBsubScript.
private java.io.File createBsubScript(JobDescription description, String returnValueFile, int nproc) throws GATInvocationException {
// Adding bsub options
String Queue = null;
long Time = -1;
Integer cpus = null;
String jobname = null;
java.io.File temp;
LSFScriptWriter job = null;
HashMap<String, Object> rd_HashMap = null;
SoftwareDescription sd = description.getSoftwareDescription();
ResourceDescription rd = description.getResourceDescription();
// Corrected initialization of rd_HashMap: rd may be null ... --Ceriel
if (rd != null) {
rd_HashMap = (HashMap<String, Object>) rd.getDescription();
}
if (rd_HashMap == null) {
rd_HashMap = new HashMap<String, Object>();
}
// try {
temp = new java.io.File("lsf" + Math.random());
try {
job = new LSFScriptWriter(new BufferedWriter(new FileWriter(temp)));
String userScript = (String) gatContext.getPreferences().get(SSHLSF_SCRIPT);
if (userScript != null) {
// a specified job script overrides everything, except for
// pre-staging, post-staging,
// and exit status.
BufferedReader f = new BufferedReader(new FileReader(userScript));
for (; ; ) {
String s = f.readLine();
if (s == null) {
break;
}
job.print(s + "\n");
}
} else {
job.print("#!/bin/sh\n");
job.print("# bsub script automatically generated by GAT SshLsf adaptor\n");
// Resources: queue, walltime, memory size, et cetera.
Queue = (String) rd_HashMap.get("machine.queue");
if (Queue == null) {
Queue = sd.getStringAttribute(SoftwareDescription.JOB_QUEUE, null);
}
if (Queue != null) {
job.addOption("q", Queue);
}
Time = sd.getLongAttribute(SoftwareDescription.WALLTIME_MAX, -1L);
cpus = (Integer) rd_HashMap.get(HardwareResourceDescription.CPU_COUNT);
if (cpus == null) {
cpus = sd.getIntAttribute("coreCount", 1);
}
job.addOption("n", cpus);
// In a single node
job.addOption("R", "\"span[ptile=" + cpus + "]\"");
if (Time > 0) {
// Reformat time.
int minutes = (int) (Time % 60);
job.addOption("W", minutes);
} else {
job.addOption("W", 60);
}
String nativeFlags = null;
Object o = rd == null ? null : rd.getResourceAttribute(SSHLSF_NATIVE_FLAGS);
if (o != null && o instanceof String) {
nativeFlags = (String) o;
} else {
String s = sd == null ? null : sd.getStringAttribute(SSHLSF_NATIVE_FLAGS, null);
if (s != null) {
nativeFlags = s;
} else {
o = gatContext.getPreferences().get(SSHLSF_NATIVE_FLAGS);
if (o != null && o instanceof String) {
nativeFlags = (String) o;
}
}
}
if (nativeFlags != null) {
String[] splits = nativeFlags.split("##");
for (String s : splits) {
job.addString(s);
}
}
String path = sd.getStringAttribute(SoftwareDescription.SANDBOX_ROOT, "");
if (!path.isEmpty() && !path.endsWith(File.separator)) {
path = path + File.separator;
}
// Set working dir.
// job.addOption("cwd", path);
// Name for the job.
jobname = (String) rd_HashMap.get("Jobname");
if (jobname == null) {
jobname = brokerURI.getUserInfo();
if (jobname == null || "".equals(jobname)) {
jobname = "compss_remotejob_" + System.getProperty("user.name");
}
}
if (jobname != null)
job.addOption("J", jobname);
if (sd.getStdout() != null) {
job.addOption("oo", path + sd.getStdout().getName());
}
if (sd.getStderr() != null) {
job.addOption("eo", path + sd.getStderr().getName());
}
addScriptExecution(job, sd, rd);
}
job.print("echo retvalue = $? > " + returnValueFile + "\n");
} catch (Throwable e) {
throw new GATInvocationException("Cannot create temporary bsub file " + temp.getAbsolutePath(), e);
} finally {
if (job != null)
job.close();
}
return temp;
}
Aggregations