use of org.ow2.proactive.resourcemanager.exception.RMException in project scheduling by ow2-proactive.
the class RMTestUser method connect.
public void connect(TestUsers user, String rmUrl) throws RMException, KeyException, LoginException, ActiveObjectCreationException, NodeException {
this.connectedUserName = user.username;
this.connectedUserPassword = user.password;
this.rmUrl = rmUrl;
disconnectFromRM();
if (rmProxy == null) {
monitorsHandler = new RMMonitorsHandler();
RMMonitorEventReceiver passiveEventReceiver = new RMMonitorEventReceiver(monitorsHandler);
rmProxy = PAActiveObject.turnActive(passiveEventReceiver);
RMTHelper.log("RM Proxy initialized : " + PAActiveObject.getUrl(rmProxy));
}
RMTHelper.log("Connecting user " + connectedUserName + " to the Resource Manager at " + rmUrl);
CredData credData = new CredData(CredData.parseLogin(connectedUserName), CredData.parseDomain(connectedUserName), connectedUserPassword);
// this is to prevent starting working with the rmProxy on the calling
// thread whereas it has not finished to be initialized
PAFuture.waitFor(rmProxy.init(rmUrl, credData));
}
use of org.ow2.proactive.resourcemanager.exception.RMException in project scheduling by ow2-proactive.
the class NodeSource method removeNode.
/**
* Removes the node from the node source.
*
* @param nodeUrl the url of the node to be released
*/
public BooleanWrapper removeNode(String nodeUrl, Client initiator) {
// node could have fallen between remove request and the confirm
if (this.nodes.containsKey(nodeUrl)) {
logger.info("[" + name + "] removing node : " + nodeUrl);
Node node = nodes.remove(nodeUrl);
RMCore.topologyManager.removeNode(node);
try {
infrastructureManager.internalRemoveNode(node);
} catch (RMException e) {
logger.error(e.getCause().getMessage(), e);
}
} else {
Node downNode = downNodes.remove(nodeUrl);
if (downNode != null) {
logger.info("[" + name + "] removing down node : " + nodeUrl);
} else {
logger.error("[" + name + "] removing node : " + nodeUrl + " which not belongs to this node source");
return new BooleanWrapper(false);
}
}
if (this.toShutdown && this.nodes.size() == 0) {
this.shutdownNodeSourceServices(initiator);
}
return new BooleanWrapper(true);
}
use of org.ow2.proactive.resourcemanager.exception.RMException in project scheduling by ow2-proactive.
the class NodeSource method detectedPingedDownNode.
/**
* Marks node as down. Remove it from node source node set. It remains in rmcore nodes list until
* user decides to remove them or node source is shutdown.
* @see NodeSource#detectedPingedDownNode(String, String)
*/
public void detectedPingedDownNode(String nodeName, String nodeUrl) {
if (toShutdown) {
logger.warn("[" + name + "] detectedPingedDownNode request discarded because node source is shutting down");
return;
}
logger.warn("[" + name + "] Detected down node: " + nodeUrl);
Node downNode = nodes.remove(nodeUrl);
if (downNode != null) {
downNodes.put(nodeUrl, downNode);
try {
RMCore.topologyManager.removeNode(downNode);
infrastructureManager.internalNotifyDownNode(nodeName, nodeUrl, downNode);
} catch (RMException e) {
logger.error("Error while removing down node: " + nodeUrl, e);
}
} else {
// almost no information about the node apart from its name and url
try {
infrastructureManager.internalNotifyDownNode(nodeName, nodeUrl, null);
} catch (RMException e) {
logger.error("New empty node " + nodeUrl + " could not be created to handle down node", e);
}
}
rmcore.setDownNode(nodeUrl);
}
use of org.ow2.proactive.resourcemanager.exception.RMException in project scheduling by ow2-proactive.
the class AutoUpdateInfrastructure method startNodeImpl.
/**
* Internal node acquisition method
* <p>
* Starts a PA runtime on remote host using a custom script, register it
* manually in the nodesource.
*
* @param hostTracker The host on which one the node will be started
* @param nbNodes number of nodes to deploy
* @param depNodeURLs list of deploying or lost nodes urls created
* @throws org.ow2.proactive.resourcemanager.exception.RMException
* acquisition failed
*/
protected void startNodeImpl(HostTracker hostTracker, int nbNodes, final List<String> depNodeURLs) throws RMException {
final String nodeName = this.nodeSource.getName() + "-" + ProActiveCounter.getUniqID();
String credentials = "";
try {
credentials = new String(nodeSource.getAdministrator().getCredentials().getBase64());
} catch (KeyException e) {
logger.error("Invalid credentials");
return;
}
Properties localProperties = new Properties();
localProperties.put(NODE_NAME, nodeName);
localProperties.put(HOST_NAME, hostTracker.getResolvedAddress().getHostName());
localProperties.put(NODESOURCE_CREDENTIALS, credentials);
localProperties.put(NODESOURCE_NAME, nodeSource.getName());
localProperties.put(NB_NODES, nbNodes);
String filledCommand = replaceProperties(command, localProperties);
filledCommand = replaceProperties(filledCommand, System.getProperties());
final List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(nodeName, nbNodes);
depNodeURLs.addAll(addMultipleDeployingNodes(createdNodeNames, filledCommand, "Deploying node on host " + hostTracker.getResolvedAddress(), this.nodeTimeOut));
addTimeouts(depNodeURLs);
Process p;
try {
logger.debug("Deploying node: " + nodeName);
logger.debug("Launching the command: " + filledCommand);
p = Runtime.getRuntime().exec(new String[] { "bash", "-c", filledCommand });
} catch (IOException e1) {
multipleDeclareDeployingNodeLost(depNodeURLs, "Cannot run command: " + filledCommand + " - \n The following exception occurred: " + getStackTraceAsString(e1));
throw new RMException("Cannot run command: " + filledCommand, e1);
}
String lf = System.lineSeparator();
int circuitBreakerThreshold = 5;
while (!anyTimedOut(depNodeURLs) && circuitBreakerThreshold > 0) {
try {
int exitCode = p.exitValue();
if (exitCode != 0) {
logger.error("Child process at " + hostTracker.getResolvedAddress().getHostName() + " exited abnormally (" + exitCode + ").");
} else {
logger.error("Launching node script has exited normally whereas it shouldn't.");
}
String pOutPut = Utils.extractProcessOutput(p);
String pErrPut = Utils.extractProcessErrput(p);
final String description = "Script failed to launch a node on host " + hostTracker.getResolvedAddress().getHostName() + lf + " >Error code: " + exitCode + lf + " >Errput: " + pErrPut + " >Output: " + pOutPut;
logger.error(description);
if (super.checkNodeIsAcquiredAndDo(nodeName, null, new Runnable() {
public void run() {
multipleDeclareDeployingNodeLost(depNodeURLs, description);
}
})) {
return;
} else {
// there isn't any race regarding node registration
throw new RMException("A node " + nodeName + " is not expected anymore because of an error.");
}
} catch (IllegalThreadStateException e) {
logger.trace("IllegalThreadStateException while waiting for " + nodeName + " registration");
}
if (super.checkNodeIsAcquiredAndDo(nodeName, null, null)) {
// registration is ok, we destroy the process
logger.debug("Destroying the process: " + p);
try {
ProcessTree.get().get(p).kill();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
return;
}
try {
Thread.sleep(1000);
} catch (Exception e) {
circuitBreakerThreshold--;
logger.trace("An exception occurred while monitoring a child process", e);
}
}
// if we exit because of a timeout
if (anyTimedOut(depNodeURLs)) {
// we remove it
removeTimeouts(depNodeURLs);
// we destroy the process
p.destroy();
throw new RMException("Deploying Node " + nodeName + " not expected any more");
}
if (circuitBreakerThreshold <= 0) {
logger.error("Circuit breaker threshold reached while monitoring a child process.");
throw new RMException("Several exceptions occurred while monitoring a child process.");
}
}
use of org.ow2.proactive.resourcemanager.exception.RMException in project scheduling by ow2-proactive.
the class BatchJobInfrastructure method deleteJob.
/**
* Runs a {@link #getDeleteJobCommand()} command on the remote host for the
* given jobID and monitors the exit.
*
* @param jobID
* the jobID string to delete
* @throws RMException
* if the {@link #getDeleteJobCommand()} command failed
*/
private void deleteJob(String jobID) throws RMException {
String deleteCmd = getDeleteJobCommand();
String cmd = deleteCmd + " " + jobID;
Process del = null;
try {
del = Utils.runSSHCommand(InetAddress.getByName(this.serverName), cmd, this.sshOptions);
} catch (Exception e1) {
logger.warn("Cannot ssh " + this.serverName + " to issue " + deleteCmd + " command. job with jobID: " + jobID + " won't be deleted.", e1);
throw new RMException("Cannot ssh " + this.serverName + " to issue " + deleteCmd + " command. job with jobID: " + jobID + " won't be deleted.", e1);
}
long timeStamp = System.currentTimeMillis();
while (true) {
try {
int exitCode = del.exitValue();
if (exitCode != 0) {
logger.error("Cannot delete job " + jobID + ". " + deleteCmd + " command returned != 0 -> " + exitCode);
throw new RMException("Cannot delete job " + jobID + ". " + deleteCmd + " command returned != 0 -> " + exitCode);
} else {
logger.debug("Job " + jobID + " deleted.");
return;
}
} catch (IllegalThreadStateException e) {
// the thread hasn't exited yet... don't eat exception, trace
// it...
logger.trace("waiting for " + deleteCmd + " exit code.", e);
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// the thread was interrupted... don't eat exception, trace
// it...
logger.trace("sleep interrupted while waiting for " + deleteCmd + " to exit.", e);
}
if ((System.currentTimeMillis() - timeStamp) >= nodeTimeOut) {
logger.error("Cannot delete job " + jobID + ". " + deleteCmd + " command timed out.");
throw new RMException("Cannot delete job " + jobID + ". " + deleteCmd + " command timed out.");
}
}
}
Aggregations