Search in sources :

Example 1 with JobDescription

use of org.gridlab.gat.resources.JobDescription in project compss by bsc-wdc.

the class GATJob method processMetricEvent.

// MetricListener interface implementation
@Override
public void processMetricEvent(MetricEvent value) {
    Job job = (Job) value.getSource();
    JobState newJobState = (JobState) value.getValue();
    JobDescription jd = (JobDescription) job.getJobDescription();
    SoftwareDescription sd = jd.getSoftwareDescription();
    Integer jobId = (Integer) sd.getAttributes().get("jobId");
    logger.debug("Processing job ID = " + jobId);
    /*
         * Check if either the job has finished or there has been a submission error. We don't care about other state
         * transitions
         */
    if (newJobState == JobState.STOPPED) {
        if (Tracer.isActivated()) {
            Integer slot = (Integer) sd.getAttributes().get("slot");
            String host = getResourceNode().getHost();
            Tracer.freeSlot(host, slot);
        }
        /*
             * We must check whether the chosen adaptor is globus In that case, since globus doesn't provide the exit
             * status of a job, we must examine the standard error file
             */
        try {
            if (usingGlobus) {
                File errFile = sd.getStderr();
                // Error file should always be in the same host as the IT
                File localFile = GAT.createFile(context, errFile.toGATURI());
                if (localFile.length() > 0) {
                    GATjob = null;
                    RUNNING_JOBS.remove(this);
                    ErrorManager.warn("Error when creating file.");
                    listener.jobFailed(this, JobEndStatus.EXECUTION_FAILED);
                } else {
                    if (!debug) {
                        localFile.delete();
                    }
                    RUNNING_JOBS.remove(this);
                    listener.jobCompleted(this);
                }
            } else {
                if (job.getExitStatus() == 0) {
                    RUNNING_JOBS.remove(this);
                    listener.jobCompleted(this);
                } else {
                    GATjob = null;
                    RUNNING_JOBS.remove(this);
                    listener.jobFailed(this, JobEndStatus.EXECUTION_FAILED);
                }
            }
        } catch (Exception e) {
            ErrorManager.fatal(CALLBACK_PROCESSING_ERR + ": " + this, e);
        }
    } else if (newJobState == JobState.SUBMISSION_ERROR) {
        if (Tracer.isActivated()) {
            Integer slot = (Integer) sd.getAttributes().get("slot");
            String host = getResourceNode().getHost();
            Tracer.freeSlot(host, slot);
        }
        try {
            if (usingGlobus && job.getInfo().get("resManError").equals("NO_ERROR")) {
                RUNNING_JOBS.remove(this);
                listener.jobCompleted(this);
            } else {
                GATjob = null;
                RUNNING_JOBS.remove(this);
                listener.jobFailed(this, JobEndStatus.SUBMISSION_FAILED);
            }
        } catch (GATInvocationException e) {
            ErrorManager.fatal(CALLBACK_PROCESSING_ERR + ": " + this, e);
        }
    }
}
Also used : JobDescription(org.gridlab.gat.resources.JobDescription) GATInvocationException(org.gridlab.gat.GATInvocationException) JobState(org.gridlab.gat.resources.Job.JobState) Job(org.gridlab.gat.resources.Job) File(org.gridlab.gat.io.File) SoftwareDescription(org.gridlab.gat.resources.SoftwareDescription) GATInvocationException(org.gridlab.gat.GATInvocationException)

Example 2 with JobDescription

use of org.gridlab.gat.resources.JobDescription in project compss by bsc-wdc.

the class GATJob method submit.

@Override
public void submit() throws Exception {
    // Prepare the job
    logger.info("Submit GATJob with ID " + jobId);
    JobDescription jobDescr = null;
    jobDescr = prepareJob();
    // Get a broker for the host
    ResourceBroker broker = null;
    String dest = (String) jobDescr.getResourceDescription().getResourceAttribute(RES_ATTR);
    if ((broker = brokers.get(dest)) == null) {
        broker = GAT.createResourceBroker(context, new URI(dest));
        brokers.put(dest, broker);
    }
    // Submit the job, registering for notifications of job state
    // transitions (associatedJM is the metric listener)
    Job job = null;
    try {
        job = broker.submitJob(jobDescr, this, JOB_STATUS);
        RUNNING_JOBS.add(this);
    } catch (Exception e) {
        if (Tracer.isActivated()) {
            Tracer.freeSlot(((GATWorkerNode) worker.getNode()).getHost(), (Integer) jobDescr.getSoftwareDescription().getAttributes().get("slot"));
        }
        throw e;
    }
    // Update mapping
    GATjob = job;
}
Also used : JobDescription(org.gridlab.gat.resources.JobDescription) ResourceBroker(org.gridlab.gat.resources.ResourceBroker) Job(org.gridlab.gat.resources.Job) URI(org.gridlab.gat.URI) GATInvocationException(org.gridlab.gat.GATInvocationException)

Example 3 with JobDescription

use of org.gridlab.gat.resources.JobDescription in project compss by bsc-wdc.

the class LsfResourceBrokerAdaptor method submitJob.

/*
     * (non-Javadoc)
     * 
     * @see org.gridlab.gat.resources.ResourceBroker#submitJob(org.gridlab.gat.resources.JobDescription)
     */
public Job submitJob(AbstractJobDescription abstractDescription, MetricListener listener, String metricDefinitionName) throws GATInvocationException {
    if (!(abstractDescription instanceof JobDescription)) {
        throw new GATInvocationException("can only handle JobDescriptions: " + abstractDescription.getClass());
    }
    JobDescription description = (JobDescription) abstractDescription;
    SoftwareDescription sd = description.getSoftwareDescription();
    if (sd == null) {
        throw new GATInvocationException("The job description does not contain a software description");
    }
    if (description.getProcessCount() < 1) {
        throw new GATInvocationException("Adaptor cannot handle: process count < 1: " + description.getProcessCount());
    }
    if (description.getResourceCount() != 1) {
        throw new GATInvocationException("Adaptor cannot handle: resource count > 1: " + description.getResourceCount());
    }
    String home = System.getProperty("user.home");
    if (home == null) {
        throw new GATInvocationException("lsf broker could not get user home dir");
    }
    Sandbox sandbox = new Sandbox(gatContext, description, "localhost", home, true, true, false, false);
    LsfJob lsfJob = new LsfJob(gatContext, description, sandbox);
    Job job = null;
    if (description instanceof WrapperJobDescription) {
        WrapperJobCpi tmp = new WrapperJobCpi(gatContext, lsfJob, listener, metricDefinitionName);
        listener = tmp;
        job = tmp;
    } else {
        job = lsfJob;
    }
    if (listener != null && metricDefinitionName != null) {
        Metric metric = lsfJob.getMetricDefinitionByName(metricDefinitionName).createMetric(null);
        lsfJob.addMetricListener(listener, metric);
    }
    lsfJob.setState(Job.JobState.PRE_STAGING);
    lsfJob.waitForTrigger(Job.JobState.PRE_STAGING);
    sandbox.prestage();
    String exe;
    if (sandbox.getResolvedExecutable() != null) {
        exe = sandbox.getResolvedExecutable().getPath();
    // try to set the executable bit, it might be lost
    /* CDIAZ: The command "exe" can be also in a remote host
             * 		  The command must have the right permissions in the remote host
            try {
                new CommandRunner("chmod", "+x", exe);
            } catch (Throwable t) {
                // ignore
            }
            */
    } else {
        exe = getExecutable(description);
    }
    String[] args = getArgumentsArray(description);
    // Directory where the lsf command will be executed
    java.io.File f = new java.io.File(sandbox.getSandboxPath());
    if (!f.exists()) {
        throw new GATInvocationException("Unable to find directory " + f.getAbsolutePath());
    }
    // Check and set the environment for a blaunch command
    Map<String, Object> env = sd.getEnvironment();
    this.prepareBLaunchEnv(env);
    // Encapsulate the original command into a blaunch command
    String host = brokerURI.getHost();
    String blExe = this.getBlaunchCommand();
    String[] blArgs = this.getBlaunchArgs(host, exe, args);
    ProcessBundle bundle = new ProcessBundle(description.getProcessCount(), blExe, blArgs, f, env);
    lsfJob.setSubmissionTime();
    lsfJob.setState(Job.JobState.SCHEDULED);
    try {
        lsfJob.setState(Job.JobState.RUNNING);
        lsfJob.waitForTrigger(Job.JobState.RUNNING);
        lsfJob.setStartTime();
        bundle.startBundle();
        lsfJob.setProcess(bundle);
    } catch (IOException e) {
        throw new CommandNotFoundException("LsfResourceBrokerAdaptor", e);
    }
    if (!sd.streamingStderrEnabled()) {
        try {
            if (sd.getStderr() != null) {
                OutputStream err = GAT.createFileOutputStream(gatContext, sd.getStderr());
                // to file
                StreamForwarder forwarder = new StreamForwarder(bundle.getStderr(), err, sd.getExecutable() + " [stderr]");
                lsfJob.setErrorStream(forwarder);
                if (logger.isDebugEnabled()) {
                    logger.debug("Created stderr forwarder to file " + sd.getStderr());
                }
            } else {
                // or throw it away
                new StreamForwarder(bundle.getStderr(), null, sd.getExecutable() + " [stderr]");
            }
        } catch (GATObjectCreationException e) {
            throw new GATInvocationException("Unable to create file output stream for stderr!", e);
        }
    }
    if (!sd.streamingStdoutEnabled()) {
        // read away the stdout
        try {
            if (sd.getStdout() != null) {
                // to file
                OutputStream out = GAT.createFileOutputStream(gatContext, sd.getStdout());
                StreamForwarder forwarder = new StreamForwarder(bundle.getStdout(), out, sd.getExecutable() + " [stdout]");
                lsfJob.setOutputStream(forwarder);
                if (logger.isDebugEnabled()) {
                    logger.debug("Created stdout forwarder to file " + sd.getStdout());
                }
            } else {
                // or throw it away
                new StreamForwarder(bundle.getStdout(), null, sd.getExecutable() + " [stdout]");
            }
        } catch (GATObjectCreationException e) {
            throw new GATInvocationException("Unable to create file output stream for stdout!", e);
        }
    }
    if (!sd.streamingStdinEnabled() && sd.getStdin() != null) {
        // forward the stdin from file
        try {
            InputStream in = GAT.createFileInputStream(gatContext, sd.getStdin());
            bundle.setStdin(sd.getExecutable(), in);
        } catch (GATObjectCreationException e) {
            throw new GATInvocationException("Unable to create file input stream for stdin!", e);
        }
    }
    lsfJob.monitorState();
    return job;
}
Also used : ProcessBundle(org.gridlab.gat.engine.util.ProcessBundle) GATObjectCreationException(org.gridlab.gat.GATObjectCreationException) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) StreamForwarder(org.gridlab.gat.engine.util.StreamForwarder) IOException(java.io.IOException) SoftwareDescription(org.gridlab.gat.resources.SoftwareDescription) Sandbox(org.gridlab.gat.resources.cpi.Sandbox) WrapperJobDescription(org.gridlab.gat.resources.WrapperJobDescription) JobDescription(org.gridlab.gat.resources.JobDescription) AbstractJobDescription(org.gridlab.gat.resources.AbstractJobDescription) GATInvocationException(org.gridlab.gat.GATInvocationException) WrapperJobCpi(org.gridlab.gat.resources.cpi.WrapperJobCpi) Metric(org.gridlab.gat.monitoring.Metric) Job(org.gridlab.gat.resources.Job) WrapperJobDescription(org.gridlab.gat.resources.WrapperJobDescription) CommandNotFoundException(org.gridlab.gat.CommandNotFoundException)

Example 4 with JobDescription

use of org.gridlab.gat.resources.JobDescription in project compss by bsc-wdc.

the class SshLSFJob method getJobState.

protected void getJobState(String jobID) throws GATInvocationException {
    synchronized (this) {
        while (jobStateBusy) {
            try {
                wait();
            } catch (InterruptedException e) {
            // ignored
            }
        }
        jobStateBusy = true;
    }
    JobState resultState;
    try {
        if (state == JobState.POST_STAGING || state == JobState.STOPPED || state == JobState.SUBMISSION_ERROR) {
            return;
        }
        logger.debug("Getting task status in setState()");
        // getting the status via ssh ... squeue
        java.io.File squeueResultFile = null;
        try {
            // Create qstat job
            SoftwareDescription sd = new SoftwareDescription();
            // Use /bin/sh, so that $USER gets expanded.
            sd.setExecutable("/bin/sh");
            sd.setArguments("-c", "bjobs -noheader " + jobID + " | awk {' print $3 '}");
            sd.addAttribute(SoftwareDescription.SANDBOX_USEROOT, "true");
            squeueResultFile = java.io.File.createTempFile("GAT", "tmp");
            try {
                sd.setStdout(GAT.createFile(subContext, new URI("file:///" + squeueResultFile.getAbsolutePath().replace(File.separatorChar, '/'))));
            } catch (Throwable e1) {
                throw new GATInvocationException("Could not create GAT object for temporary " + squeueResultFile.getAbsolutePath(), e1);
            }
            JobDescription jd = new JobDescription(sd);
            Job job = jobHelper.submitJob(jd, this, "job.status");
            synchronized (job) {
                while (job.getState() != Job.JobState.STOPPED && job.getState() != Job.JobState.SUBMISSION_ERROR) {
                    try {
                        job.wait();
                    } catch (InterruptedException e) {
                    // ignore
                    }
                }
            }
            if (job.getState() != Job.JobState.STOPPED || job.getExitStatus() != 0) {
                throw new GATInvocationException("Could not submit squeue job " + sd.toString());
            }
            // submit success.
            BufferedReader in = new BufferedReader(new FileReader(squeueResultFile.getAbsolutePath()));
            String status = in.readLine();
            // or finished. Set to "" in this case. --Ceriel
            if (status == null) {
                status = "";
            }
            if (logger.isDebugEnabled()) {
                logger.debug("squeue line: " + status);
            }
            resultState = mapLSFStatetoGAT(status);
        } catch (IOException e) {
            logger.debug("retrieving job status sshslurmjob failed", e);
            throw new GATInvocationException("Unable to retrieve the Job Status", e);
        } finally {
            squeueResultFile.delete();
        }
    } finally {
        synchronized (this) {
            jobStateBusy = false;
            notifyAll();
        }
    }
    if (resultState != JobState.STOPPED) {
        setState(resultState);
    } else {
        setState(JobState.POST_STAGING);
    }
}
Also used : IOException(java.io.IOException) URI(org.gridlab.gat.URI) File(java.io.File) SoftwareDescription(org.gridlab.gat.resources.SoftwareDescription) JobDescription(org.gridlab.gat.resources.JobDescription) GATInvocationException(org.gridlab.gat.GATInvocationException) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SshLSFJob(org.gridlab.gat.resources.cpi.sshlsf.SshLSFJob) Job(org.gridlab.gat.resources.Job)

Example 5 with JobDescription

use of org.gridlab.gat.resources.JobDescription in project compss by bsc-wdc.

the class SshLsfResourceBrokerAdaptor method sshLsfSubmission.

/*private java.io.File createJobScript(JobDescription description)
			throws GATInvocationException {

		java.io.File temp;

		SoftwareDescription sd = description.getSoftwareDescription();

		try {
			temp = java.io.File.createTempFile("lsf-sub", null);
		} catch (IOException e) {
			throw new GATInvocationException("Cannot create file", e);
		}
		PrintWriter job = null;
		try {
			job = new PrintWriter(new BufferedWriter(new FileWriter(temp)));

			job.print("#!/bin/sh\n");
			job.print("# job script\n");

			// Support DIRECTORY
			String dir = sd.getStringAttribute(SoftwareDescription.DIRECTORY, null);
			if (dir != null) {
				job.print("cd " + dir + "\n");
			}

			// Support environment.
			Map<String, Object> env = sd.getEnvironment();
			if (env != null) {
				Set<String> s = env.keySet();
				Object[] keys = s.toArray();

				for (int i = 0; i < keys.length; i++) {
					String val = (String) env.get(keys[i]);
					job.print(keys[i] + "=" + val + " && export " + keys[i] + "\n");
				}
			}

			// Construct command.
			StringBuffer cmd = new StringBuffer();

			cmd.append(sd.getExecutable().toString());
			if (sd.getArguments() != null) {
				String[] args = sd.getArguments();
				for (int i = 0; i < args.length; ++i) {
					cmd.append(" ");
					cmd.append(args[i]);
				}
			}
			job.print(cmd.toString() + "\n");
			//job.print("exit $?\n");
		} catch (Throwable e) {
			throw new GATInvocationException(
					"Cannot create temporary job script file "
							+ temp.getAbsolutePath(), e);
		} finally {
			if (job != null)
				job.close();
		}
		return temp;
	}*/
private String sshLsfSubmission(SshLSFJob lsfJob, JobDescription description, java.io.File bsubFile, ResourceBroker subBroker, Sandbox sandbox) throws GATInvocationException {
    java.io.File slurmResultFile = null;
    try {
        if (logger.isDebugEnabled()) {
            logger.debug("***** Doing sandbox prestage " + sandbox.getSandboxPath());
        }
        sandbox.prestage();
        if (logger.isDebugEnabled()) {
            logger.debug("***** Sandbox prestage done " + sandbox.getSandboxPath());
        }
        // Create sbatch job
        SoftwareDescription sd = new SoftwareDescription();
        sd.setExecutable("sh");
        sd.setArguments("-c", "bsub < " + bsubFile.getName() + " 2>submit.err");
        // + " && rm -rf " + bsubFile.getName() + " submit.err");
        sd.setAttributes(description.getSoftwareDescription().getAttributes());
        sd.addAttribute(SoftwareDescription.SANDBOX_USEROOT, "true");
        slurmResultFile = java.io.File.createTempFile("GAT", "tmp");
        try {
            sd.setStdout(GAT.createFile(gatContext, new URI(slurmResultFile.toURI())));
            sd.addPreStagedFile(GAT.createFile(gatContext, new URI(bsubFile.toURI())));
        } catch (Throwable e1) {
            try {
                sandbox.removeSandboxDir();
            } catch (Throwable e) {
            // ignore
            }
            throw new GATInvocationException("Could not create GAT object for temporary " + slurmResultFile.getAbsolutePath(), e1);
        }
        // sd.addAttribute(SoftwareDescription.DIRECTORY, sd.getStringAttribute(SoftwareDescription, defaultVal)));
        JobDescription jd = new JobDescription(sd);
        if (logger.isDebugEnabled()) {
            logger.debug("Submitting lsf job: " + sd);
        }
        Job job = subBroker.submitJob(jd, this, "job.status");
        if (logger.isDebugEnabled()) {
            logger.debug("Job submitted.");
        }
        synchronized (job) {
            while (job.getState() != Job.JobState.STOPPED && job.getState() != Job.JobState.SUBMISSION_ERROR) {
                try {
                    job.wait();
                } catch (InterruptedException e) {
                // ignore
                }
            }
        }
        if (job.getState() != Job.JobState.STOPPED || job.getExitStatus() != 0) {
            try {
                sandbox.removeSandboxDir();
            } catch (Throwable e) {
            // ignore
            }
            logger.debug("jobState = " + job.getState() + ", exit status = " + job.getExitStatus());
            throw new GATInvocationException("Could not submit LSF job");
        }
        // submit success.
        BufferedReader in = new BufferedReader(new FileReader(slurmResultFile.getAbsolutePath()));
        String result = in.readLine();
        if (logger.isDebugEnabled()) {
            logger.debug("bsub result line = " + result);
        }
        // Check for LSF bsub result ...
        // TODO Check if LSF return the same
        String job_prefix = "Job <";
        if (result.contains(job_prefix)) {
            int i = result.indexOf(job_prefix);
            result = result.substring(i + job_prefix.length(), result.indexOf(">", i));
        }
        return result;
    } catch (IOException e) {
        try {
            sandbox.removeSandboxDir();
        } catch (Throwable e1) {
        // ignore
        }
        throw new GATInvocationException("Got IOException", e);
    } finally {
        slurmResultFile.delete();
        bsubFile.delete();
    }
}
Also used : IOException(java.io.IOException) URI(org.gridlab.gat.URI) File(java.io.File) SoftwareDescription(org.gridlab.gat.resources.SoftwareDescription) JobDescription(org.gridlab.gat.resources.JobDescription) WrapperJobDescription(org.gridlab.gat.resources.WrapperJobDescription) AbstractJobDescription(org.gridlab.gat.resources.AbstractJobDescription) GATInvocationException(org.gridlab.gat.GATInvocationException) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) Job(org.gridlab.gat.resources.Job)

Aggregations

JobDescription (org.gridlab.gat.resources.JobDescription)33 SoftwareDescription (org.gridlab.gat.resources.SoftwareDescription)31 URI (org.gridlab.gat.URI)28 ResourceBroker (org.gridlab.gat.resources.ResourceBroker)26 Job (org.gridlab.gat.resources.Job)25 GATInvocationException (org.gridlab.gat.GATInvocationException)24 GATObjectCreationException (org.gridlab.gat.GATObjectCreationException)18 URISyntaxException (java.net.URISyntaxException)17 IOException (java.io.IOException)10 Preferences (org.gridlab.gat.Preferences)8 File (org.gridlab.gat.io.File)8 BufferedReader (java.io.BufferedReader)6 WrapperJobDescription (org.gridlab.gat.resources.WrapperJobDescription)5 InputStreamReader (java.io.InputStreamReader)4 AbstractJobDescription (org.gridlab.gat.resources.AbstractJobDescription)4 File (java.io.File)3 InputStream (java.io.InputStream)3 HashMap (java.util.HashMap)3 GATContext (org.gridlab.gat.GATContext)3 Metric (org.gridlab.gat.monitoring.Metric)3