use of org.ow2.proactive.resourcemanager.nodesource.common.Configurable in project scheduling by ow2-proactive.
the class PluginDescriptor method packParameters.
/**
* Packs parameters inputed by user into appropriate parameters set required for this plugin.
* Performs some operations such as file loading on user side.
*
* @param parameters input parameters
* @return output parameters
* @throws RMException when error occurs
*/
public Object[] packParameters(Object[] parameters) throws RMException {
int configurableFieldsSize = configurableFields.size();
List<Object> resultParams = new ArrayList<>(configurableFieldsSize);
if (parameters.length != configurableFieldsSize) {
throw new RMException("Incorrect number of parameters: expected " + configurableFieldsSize + ", provided " + parameters.length);
}
int counter = 0;
for (ConfigurableField field : configurableFields) {
Object value = parameters[counter++];
Configurable configurable = field.getMeta();
boolean credentialsFilePath = configurable.credential() && value instanceof String;
if (configurable.fileBrowser() || credentialsFilePath) {
try {
if (value.toString().length() > 0) {
value = FileToBytesConverter.convertFileToByteArray(new File(value.toString()));
} else {
// in case if file path is not specified propagate null to plugin
// it will decide then if it's acceptable or not
value = null;
}
} catch (IOException e) {
throw new RMException("Cannot load file", e);
}
}
resultParams.add(value);
}
return resultParams.toArray();
}
use of org.ow2.proactive.resourcemanager.nodesource.common.Configurable in project scheduling by ow2-proactive.
the class NodeSourceParameterHelper method getParametersWithDynamicParametersUpdatedOnly.
public List<Serializable> getParametersWithDynamicParametersUpdatedOnly(Collection<ConfigurableField> configurableFields, Object[] newParameters, List<Serializable> oldParameters) {
List<Serializable> mergedParameters = new LinkedList<>();
mergedParameters.addAll(oldParameters);
Lambda.forEachWithIndex(configurableFields, (configurableField, index) -> {
Configurable meta = configurableField.getMeta();
String newValue = getStringValue(newParameters, index, meta);
String oldValue = getStringValue(oldParameters.toArray(), index, meta);
this.updateDynamicParameterIfNotEqual(mergedParameters, newValue, oldValue, index, configurableField);
});
return mergedParameters;
}
use of org.ow2.proactive.resourcemanager.nodesource.common.Configurable in project scheduling by ow2-proactive.
the class SchedulerTHelper method testJobSubmission.
/**
* Creates and submit a job from an XML job descriptor, and check, with assertions,
* event related to this job submission :
* 1/ job submitted event
* 2/ job passing from pending to running (with state set to running).
* 3/ job passing from running to finished (with state set to finished).
* 4/ every task finished with or without error (configurable)
* <p>
* Then returns.
* <p>
* This is the simplest events sequence of a job submission. If you need to test
* specific events or task states (failures, rescheduling etc, you must not use this
* helper and check events sequence with waitForEvent**() functions.
*
* @param userInt scheduler interface
* @param jobToSubmit job object to schedule.
* @param acceptSkipped if true then skipped task will not fail the test
* @param failIfTaskError if true then the test will fail if a task was in error
* @return JobId, the job's identifier.
* @throws Exception if an error occurs at job submission, or during
* verification of events sequence.
*/
public JobId testJobSubmission(Scheduler userInt, Job jobToSubmit, boolean acceptSkipped, boolean failIfTaskError) throws Exception {
JobId id = userInt.submit(jobToSubmit);
log("Job submitted, id " + id.toString());
log("Waiting for jobSubmitted");
JobState receivedState = waitForEventJobSubmitted(id);
Assert.assertEquals(id, receivedState.getId());
log("Waiting for job running");
JobInfo jInfo = waitForEventJobRunning(id);
Assert.assertEquals(jInfo.getJobId(), id);
Assert.assertEquals("Job " + jInfo.getJobId(), JobStatus.RUNNING, jInfo.getStatus());
log("Waiting for job finished");
jInfo = waitForEventJobFinished(userInt, id);
Assert.assertEquals("Job " + jInfo.getJobId(), JobStatus.FINISHED, jInfo.getStatus());
log("Job finished");
boolean taskError = false;
String message = "";
if (jobToSubmit instanceof TaskFlowJob) {
JobState jobState = userInt.getJobState(id);
for (TaskState t : jobState.getTasks()) {
log("Looking at the result of task : " + t.getName());
if (t.getStatus() == TaskStatus.FAULTY) {
TaskResult tres = userInt.getTaskResult(jInfo.getJobId(), t.getName());
if (tres == null) {
message = "Task result of " + t.getName() + " should not be null.";
taskError = true;
break;
}
if (tres.getOutput() != null) {
System.err.println("Output of failing task (" + t.getName() + ") :");
System.err.println(tres.getOutput().getAllLogs(true));
}
if (tres.hadException()) {
System.err.println("Exception occurred in task (" + t.getName() + ") :");
tres.getException().printStackTrace(System.err);
message = "Exception occurred in task (" + t.getName() + ")";
taskError = true;
break;
}
} else if (acceptSkipped && t.getStatus() == TaskStatus.SKIPPED) {
// do nothing
} else if (t.getStatus() != TaskStatus.FINISHED) {
message = "Invalid task status for task " + t.getName() + " : " + t.getStatus();
taskError = true;
break;
} else {
TaskResult tres = userInt.getTaskResult(jInfo.getJobId(), t.getName());
System.out.println("Output of task (" + t.getName() + ") :");
System.out.println(tres.getOutput().getAllLogs(true));
}
}
}
if (taskError && failIfTaskError) {
fail(message);
}
return id;
}
use of org.ow2.proactive.resourcemanager.nodesource.common.Configurable in project scheduling by ow2-proactive.
the class PluginDescriptor method findConfigurableFileds.
/*
* Looks through cls which represents a plugin. Collects a configurable
* skeleton of the plugin.
*/
private void findConfigurableFileds(Class<?> cls, Object instance) {
if (cls.getSuperclass() != null && cls.getSuperclass() != Object.class) {
findConfigurableFileds(cls.getSuperclass(), instance);
}
for (Field f : cls.getDeclaredFields()) {
Configurable configurable = f.getAnnotation(Configurable.class);
if (configurable != null) {
String name = f.getName();
f.setAccessible(true);
Object valueObj = null;
try {
valueObj = f.get(instance);
} catch (Exception e) {
}
String value = valueObj == null ? (this.defaultValues.get(name) != null ? this.defaultValues.get(name) : "") : valueObj.toString();
configurableFields.add(new ConfigurableField(name, value, configurable));
}
}
}
use of org.ow2.proactive.resourcemanager.nodesource.common.Configurable in project scheduling by ow2-proactive.
the class BatchJobInfrastructure method startNode.
/**
* Builds the command line to execute on the PBS frontend and wait for every
* launched nodes to register. If the node doesn't register (ie. runs
* {@link #internalRegisterAcquiredNode(Node)} isn't called) before the
* timeout (configurable) value, an exception is raised. If the qSub command
* submitted to the PBS frontend fails, the node supposed to be launched is
* not expected anymore and will be discarded at registration time.
*
* @throws RMException
*/
private void startNode() throws RMException {
CommandLineBuilder clb = new CommandLineBuilder();
// generate the node name
// current rmcore shortID should be added to ensure uniqueness
String nodeName = getBatchinJobSystemName() + "-" + nodeSource.getName() + "-" + ProActiveCounter.getUniqID();
clb.setNodeName(nodeName);
clb.setJavaPath(this.javaPath);
clb.setRmURL(getRmUrl());
clb.setRmHome(this.schedulingPath);
clb.setSourceName(this.nodeSource.getName());
clb.setPaProperties(this.javaOptions);
try {
clb.setCredentialsValueAndNullOthers(new String(getCredentials().getBase64()));
} catch (KeyException e) {
this.handleFailedDeployment(clb, e);
}
InetAddress host = null;
try {
host = InetAddress.getByName(this.serverName);
} catch (UnknownHostException e) {
this.handleFailedDeployment(clb, e);
}
String deleteCmd = getDeleteJobCommand();
String submitCmd = getSubmitJobCommand();
// build the command: echo "script.sh params"|qsub params
String cmd = null;
String obfuscatedCmd = null;
try {
cmd = "echo \\\"" + clb.buildCommandLine(true).replace("\"", "\\\"") + "\\\" | " + submitCmd + " " + this.submitJobOpt;
obfuscatedCmd = "echo \\\"" + clb.buildCommandLine(false).replace("\"", "\\\"") + "\\\" | " + submitCmd + " " + this.submitJobOpt;
} catch (IOException e) {
this.handleFailedDeployment(clb, e);
}
// add an deploying node.
final String dnURL = super.addDeployingNode(nodeName, obfuscatedCmd, "Deploying node on " + getBatchinJobSystemName() + " scheduler", this.nodeTimeOut);
putPnTimeout(dnURL, Boolean.FALSE);
// executing the command
Process p;
try {
p = Utils.runSSHCommand(host, cmd, this.sshOptions);
} catch (IOException e1) {
throw new RMException("Cannot execute ssh command: " + cmd + " on host: " + this.serverName, e1);
}
// recover the Job ID through stdout
String id = "";
InputStream in = p.getInputStream();
int b = -1;
try {
while ((b = in.read()) > -1) {
id += (char) b;
}
} catch (IOException e) {
}
// check for registration
// at this point, the ssh process should have already exited because it
// only handle the job submission, not the execution... furthermore
// the "id" is defined
String lf = System.lineSeparator();
final long timeout = nodeTimeOut;
long t1 = System.currentTimeMillis();
// Hack. SSHClient fails but qSub succeeds.
boolean isJobIDValid = false;
// Tries to wait for this node
// registration...
int circuitBreakerThreshold = 5;
while (!getPnTimeout(dnURL) && circuitBreakerThreshold > 0) {
try {
int exitCode = p.exitValue();
if (exitCode != 0 && !isJobIDValid) {
logger.warn("SSH subprocess at " + host.getHostName() + " exit code != 0 but IM tries to recover from this error...Current submit command's output: " + id + " and associated node's name: " + nodeName);
String extractedID = this.extractSubmitOutput(id);
String errput = this.extractProcessErrput(p);
final String description = "SSH command failed to launch node on " + getBatchinJobSystemName() + " scheduler" + lf + " >Error code: " + exitCode + lf + " >Errput: " + errput + " >Output: " + id;
// registration...
if (extractedID != null && !extractedID.equals("")) {
isJobIDValid = true;
}
// defines how to recover from this state
// throws a RMException if we can't
handleWrongJobTermination(isJobIDValid, nodeName, dnURL, host, id, description, exitCode, submitCmd, deleteCmd);
}
} catch (IllegalThreadStateException e) {
// process has not returned yet
logger.trace("Waiting for ssh process to exit in BatchJobInfrastructure");
}
if (super.checkNodeIsAcquiredAndDo(nodeName, null, null)) {
// registration is ok
p.destroy();
addNodeAndDecrementDeployingNode(nodeName, this.extractSubmitOutput(id));
return;
}
try {
logger.debug("Waiting for node " + nodeName + " registration... time to timeout: " + (timeout - (System.currentTimeMillis() - t1)));
Thread.sleep(BatchJobInfrastructure.NODE_ACQUISITION_CHECK_RATE);
} catch (Exception e) {
circuitBreakerThreshold--;
logger.error("While monitoring ssh subprocess.", e);
}
}
// end of while loop, either deploying node timeout/removed of
// threshold reached
// the node is not expected anymore
atomicRemovePnTimeoutAndJob(nodeName, dnURL, p, id);
if (circuitBreakerThreshold <= 0) {
logger.error("Circuit breaker threshold reached while monitoring ssh subprocess.");
throw new RMException("Several exceptions occurred while monitoring ssh subprocess.");
}
// if we are here we reached an invalid state
throw new RMException("Invalid state, exit from a control loop with threshold > 0 and expected deploying node");
}
Aggregations