use of org.ow2.proactive.resourcemanager.nodesource.NodeSource in project scheduling by ow2-proactive.
the class NodesRecoveryManager method addRMNodeToCoreAndSource.
private synchronized RMNode addRMNodeToCoreAndSource(NodeSource nodeSource, Map<NodeState, Integer> nodeStates, RMNodeData rmNodeData, String nodeUrl, Node node, NodeState previousState) {
RMNode rmNode = nodeSource.internalAddNodeAfterRecovery(node, rmNodeData);
boolean tokenInNodeSource = nodeSource.getNodeUserAccessType().getTokens() != null && nodeSource.getNodeUserAccessType().getTokens().length > 0;
boolean tokenInNode = false;
this.rmCore.registerAvailableNode(rmNode);
if (!(node instanceof FakeDownNodeForRecovery)) {
try {
String nodeAccessToken = node.getProperty(RMNodeStarter.NODE_ACCESS_TOKEN);
tokenInNode = nodeAccessToken != null && nodeAccessToken.length() > 0;
if (tokenInNode) {
logger.debug("Node " + node.getNodeInformation().getURL() + " is protected by access token " + nodeAccessToken);
}
} catch (Exception e) {
throw new AddingNodesException(e);
}
try {
RMCore.topologyManager.addNode(rmNode.getNode());
} catch (Exception e) {
logger.error("Error occurred when adding recovered node to the topology", e);
}
this.nodesLockRestorationManager.handle(rmNode, rmNodeData.getProvider());
} else {
this.nodesLockRestorationManager.handle(rmNode, rmNodeData.getProvider());
logger.info("Triggering down node notification for " + nodeUrl);
this.triggerDownNodeHookIfNecessary(nodeSource, rmNodeData, nodeUrl, previousState);
}
rmNode.setProtectedByToken(tokenInNode || tokenInNodeSource);
this.updateRecoveredNodeStateCounter(nodeStates, rmNode.getState());
return rmNode;
}
use of org.ow2.proactive.resourcemanager.nodesource.NodeSource in project scheduling by ow2-proactive.
the class ListNodeCommand method execute.
@Override
public void execute(ApplicationContext currentContext) throws CLIException {
HttpGet request = new HttpGet(currentContext.getResourceUrl("monitoring/full"));
HttpResponseWrapper response = execute(request, currentContext);
if (statusCode(OK) == statusCode(response)) {
RmStateView state = readValue(response, RmStateView.class, currentContext);
NodeEventView[] nodeEvents = state.getNodesEvents();
NodeEventView[] selectedNodeEvents = null;
if (nodeEvents != null) {
if (nodeSource == null) {
selectedNodeEvents = nodeEvents;
} else {
List<NodeEventView> selectedList = new ArrayList<>();
for (NodeEventView nodeEvent : nodeEvents) {
if (!nodeSource.equals(nodeEvent.getNodeSource())) {
// node source doesn't match
continue;
} else {
selectedList.add(nodeEvent);
}
}
selectedNodeEvents = selectedList.toArray(new NodeEventView[selectedList.size()]);
}
}
resultStack(currentContext).push(selectedNodeEvents);
writeLine(currentContext, "%s", StringUtility.string(selectedNodeEvents));
} else {
handleError("An error occurred while retrieving nodes:", response, currentContext);
}
}
use of org.ow2.proactive.resourcemanager.nodesource.NodeSource in project scheduling by ow2-proactive.
the class SSHInfrastructure method startNodeImpl.
/**
* Internal node acquisition method
* <p>
* Starts a PA runtime on remote host using SSH, register it manually in the
* nodesource.
*
* @param hostTracker The host on which one the node will be started
* @param nbNodes number of nodes to deploy
* @param depNodeURLs list of deploying or lost nodes urls created
* @throws RMException
* acquisition failed
*/
protected void startNodeImpl(HostTracker hostTracker, int nbNodes, final List<String> depNodeURLs) throws RMException {
String fs = getTargetOSObj().fs;
CommandLineBuilder clb = super.getDefaultCommandLineBuilder(getTargetOSObj());
// we take care of spaces in java path
clb.setJavaPath(this.javaPath);
// we set the rm.home prop
clb.setRmHome(schedulingPath);
// we set the java security policy file
StringBuilder sb = new StringBuilder();
final boolean containsSpace = schedulingPath.contains(" ");
String securitycmd = CentralPAPropertyRepository.JAVA_SECURITY_POLICY.getCmdLine();
if (!this.javaOptions.contains(securitycmd)) {
sb.append(securitycmd);
if (containsSpace) {
sb.append("\"");
}
sb.append(schedulingPath);
sb.append(fs);
sb.append("config");
sb.append(fs);
sb.append("security.java.policy-client");
if (containsSpace) {
sb.append("\"");
}
sb.append(" ");
}
// we set the log4j configuration file
String log4jcmd = CentralPAPropertyRepository.LOG4J.getCmdLine();
if (!this.javaOptions.contains(log4jcmd)) {
sb.append(log4jcmd);
if (containsSpace) {
sb.append("\"");
}
// log4j only understands urls
sb.append("file:");
if (!schedulingPath.startsWith("/")) {
sb.append("/" + schedulingPath.replace("\\", "/"));
} else {
sb.append(schedulingPath.replace("\\", "/"));
}
sb.append("/");
sb.append("config");
sb.append("/");
sb.append("log");
sb.append("/");
sb.append("node.properties");
if (containsSpace) {
sb.append("\"");
}
sb.append(" ");
}
// we add extra java/PA configuration
sb.append(this.javaOptions);
clb.setPaProperties(sb.toString());
// afterwards, node's name
// generate the node name
// current rmcore shortID should be added to ensure uniqueness
final String nodeName = nodeNameBuilder.generateNodeName(hostTracker);
clb.setNodeName(nodeName);
clb.setNumberOfNodes(nbNodes);
// finally, the credential's value
String credString = null;
try {
credString = new String(getCredentials().getBase64());
} catch (KeyException e1) {
throw new RMException("Could not get base64 credentials", e1);
}
clb.setCredentialsValueAndNullOthers(credString);
// add an expected node. every unexpected node will be discarded
String cmdLine;
String obfuscatedCmdLine;
try {
cmdLine = clb.buildCommandLine(true);
obfuscatedCmdLine = clb.buildCommandLine(false);
} catch (IOException e2) {
throw new RMException("Cannot build the " + RMNodeStarter.class.getSimpleName() + "'s command line.", e2);
}
// one escape the command to make it runnable through ssh
if (cmdLine.contains("\"")) {
cmdLine = cmdLine.replaceAll("\"", "\\\\\"");
}
// we create a new deploying node before ssh command ran
final List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(nodeName, nbNodes);
depNodeURLs.addAll(addMultipleDeployingNodes(createdNodeNames, obfuscatedCmdLine, "Deploying nodes on host " + hostTracker.getResolvedAddress(), super.nodeTimeOut));
addTimeouts(depNodeURLs);
Process p = null;
try {
p = Utils.runSSHCommand(hostTracker.getResolvedAddress(), cmdLine, sshOptions);
} catch (IOException e1) {
multipleDeclareDeployingNodeLost(depNodeURLs, "Cannot run command: " + cmdLine + ", with ssh options: " + sshOptions + " -\n The following exception occutred:\n " + getStackTraceAsString(e1));
throw new RMException("Cannot run command: " + cmdLine + ", with ssh options: " + sshOptions, e1);
}
String lf = System.lineSeparator();
int circuitBreakerThreshold = 5;
while (!anyTimedOut(depNodeURLs) && circuitBreakerThreshold > 0) {
try {
int exitCode = p.exitValue();
if (exitCode != 0) {
logger.error("SSH subprocess at " + hostTracker.getResolvedAddress().getHostName() + " exited abnormally (" + exitCode + ").");
} else {
logger.error("Launching node process has exited normally whereas it shouldn't.");
}
String pOutPut = Utils.extractProcessOutput(p);
String pErrPut = Utils.extractProcessErrput(p);
final String description = "SSH command failed to launch node on host " + hostTracker.getResolvedAddress().getHostName() + lf + " >Error code: " + exitCode + lf + " >Errput: " + pErrPut + " >Output: " + pOutPut;
logger.error(description);
if (super.checkAllNodesAreAcquiredAndDo(createdNodeNames, null, new Runnable() {
public void run() {
SSHInfrastructure.this.multipleDeclareDeployingNodeLost(depNodeURLs, description);
}
})) {
return;
} else {
// there isn't any race regarding node registration
throw new RMException("SSH Node " + nodeName + " is not expected anymore because of an error.");
}
} catch (IllegalThreadStateException e) {
logger.trace("IllegalThreadStateException while waiting for " + nodeName + " registration");
}
if (super.checkNodeIsAcquiredAndDo(nodeName, null, null)) {
// registration is ok, we destroy the process
p.destroy();
return;
}
try {
Thread.sleep(1000);
} catch (Exception e) {
circuitBreakerThreshold--;
logger.trace("An exception occurred while monitoring ssh subprocess", e);
}
}
// if we exit because of a timeout
if (anyTimedOut(depNodeURLs)) {
// we remove it
removeTimeouts(depNodeURLs);
// we destroy the process
p.destroy();
throw new RMException("Deploying Node " + nodeName + " not expected any more");
}
if (circuitBreakerThreshold <= 0) {
logger.error("Circuit breaker threshold reached while monitoring ssh subprocess.");
throw new RMException("Several exceptions occurred while monitoring ssh subprocess.");
}
}
use of org.ow2.proactive.resourcemanager.nodesource.NodeSource in project scheduling by ow2-proactive.
the class SSHInfrastructureV2 method startNodeImpl.
/**
* Internal node acquisition method
* <p>
* Starts a PA runtime on remote host using SSH, register it manually in the
* nodesource.
*
* @param hostTracker The host on which one the node will be started
* @param nbNodes number of nodes to deploy
* @param depNodeURLs list of deploying or lost nodes urls created
* @throws RMException
* acquisition failed
*/
public void startNodeImpl(final HostTracker hostTracker, final int nbNodes, final List<String> depNodeURLs) throws RMException {
String fs = getTargetOSObj().fs;
// we set the java security policy file
ArrayList<String> sb = new ArrayList<>();
final boolean containsSpace = schedulingPath.contains(" ");
if (!deploymentMode.equals("useNodeJarStartupScript")) {
if (containsSpace) {
sb.add("-Dproactive.home=\"" + schedulingPath + "\"");
} else {
sb.add("-Dproactive.home=" + schedulingPath);
}
}
String securitycmd = CentralPAPropertyRepository.JAVA_SECURITY_POLICY.getCmdLine();
if (!this.javaOptions.contains(securitycmd) && !deploymentMode.equals("useNodeJarStartupScript")) {
if (containsSpace) {
securitycmd += "\"";
}
securitycmd += this.schedulingPath + fs + "config" + fs;
securitycmd += "security.java.policy-client";
if (containsSpace) {
securitycmd += "\"";
}
sb.add(securitycmd);
}
// we set the log4j configuration file
String log4jcmd = CentralPAPropertyRepository.LOG4J.getCmdLine();
if (!this.javaOptions.contains(log4jcmd) && !deploymentMode.equals("useNodeJarStartupScript")) {
// log4j only understands urls
if (containsSpace) {
log4jcmd += "\"";
}
log4jcmd += "file:";
if (!this.schedulingPath.startsWith("/")) {
log4jcmd += "/";
}
log4jcmd += this.schedulingPath.replace("\\", "/");
log4jcmd += "/config/log/node.properties";
if (containsSpace) {
log4jcmd += "\"";
}
sb.add(log4jcmd);
}
// we add extra java/PA configuration
if (this.javaOptions != null && !this.javaOptions.trim().isEmpty()) {
sb.add(this.javaOptions.trim());
}
CommandLineBuilder clb = super.getDefaultCommandLineBuilder(getTargetOSObj());
final boolean deployNodesInDetachedMode = PAResourceManagerProperties.RM_NODES_RECOVERY.getValueAsBoolean() || PAResourceManagerProperties.RM_PRESERVE_NODES_ON_SHUTDOWN.getValueAsBoolean();
if (deployNodesInDetachedMode) {
// if we do not want to kill the nodes when the RM exits or
// restarts, then we should launch the nodes in background and
// ignore the RM termination signal
clb.setDetached();
}
clb.setJavaPath(this.javaPath);
clb.setRmHome(this.schedulingPath);
clb.setPaProperties(sb);
final String nodeName = nodeNameBuilder.generateNodeName(hostTracker);
clb.setNodeName(nodeName);
clb.setNumberOfNodes(nbNodes);
// set the stratup script retrieved from NodeCommandLine.properties
if (!this.deploymentMode.equals("autoGenerated")) {
clb.setDeploymentMode(deploymentMode);
clb.setStartupScript((deploymentMode.equals("useStartupScript") ? startupScriptStandard : startupScriptWithNodeJarDownload));
}
if (this.deploymentMode.equals("useNodeJarStartupScript")) {
clb.setNodeJarUrl(nodeJarUrl);
}
// finally, the credential's value
String credString;
try {
Client currentClient = super.nodeSource.getAdministrator();
credString = new String(currentClient.getCredentials().getBase64());
} catch (KeyException e) {
throw new RMException("Could not get base64 credentials", e);
}
clb.setCredentialsValueAndNullOthers(credString);
// add an expected node. every unexpected node will be discarded
String cmdLine;
String obfuscatedCmdLine;
try {
cmdLine = clb.buildCommandLine(true);
obfuscatedCmdLine = clb.buildCommandLine(false);
} catch (IOException e) {
throw new RMException("Cannot build the " + RMNodeStarter.class.getSimpleName() + "'s command line.", e);
}
// one escape the command to make it runnable through ssh
if (cmdLine.contains("\"")) {
cmdLine = cmdLine.replaceAll("\"", "\\\\\"");
}
final String finalCmdLine = cmdLine;
// The final addDeployingNode() method will initiate a timeout that
// will declare node as lost and set the description of the failure
// with a simplistic message, since there is no way to override this
// mechanism we consider only 90% of timeout to set custom description
// in case of failure and still allow global timeout
final int shorterTimeout = Math.round((90 * super.nodeTimeOut) / 100);
JSch jsch = new JSch();
final String msg = "deploy on " + hostTracker.getResolvedAddress();
final List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(nodeName, nbNodes);
depNodeURLs.addAll(addMultipleDeployingNodes(createdNodeNames, obfuscatedCmdLine, msg, super.nodeTimeOut));
addTimeouts(depNodeURLs);
Session session;
try {
// Create ssh session to the hostname
session = jsch.getSession(this.sshUsername, hostTracker.getResolvedAddress().getHostName(), this.sshPort);
if (this.sshPassword == null) {
jsch.addIdentity(this.sshUsername, this.sshPrivateKey, null, null);
} else {
session.setPassword(this.sshPassword);
}
session.setConfig(this.sshOptions);
session.connect(shorterTimeout);
} catch (JSchException e) {
multipleDeclareDeployingNodeLost(depNodeURLs, "unable to " + msg + "\n" + getStackTraceAsString(e));
throw new RMException("unable to " + msg, e);
}
SSHInfrastructureV2.logger.info("Executing SSH command: '" + finalCmdLine + "'");
ScheduledExecutorService deployService = Executors.newSingleThreadScheduledExecutor();
try {
// Create ssh channel to run the cmd
ByteArrayOutputStream baos = new ByteArrayOutputStream(DEFAULT_OUTPUT_BUFFER_LENGTH);
ChannelExec channel;
try {
channel = (ChannelExec) session.openChannel("exec");
channel.setCommand(finalCmdLine);
channel.setOutputStream(baos);
channel.setErrStream(baos);
channel.connect();
} catch (JSchException e) {
multipleDeclareDeployingNodeLost(depNodeURLs, "unable to " + msg + "\n" + getStackTraceAsString(e));
throw new RMException("unable to " + msg, e);
}
final ChannelExec chan = channel;
Future<Void> deployResult = deployService.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
while (!shutDown.get() && !checkAllNodesAreAcquiredAndDo(createdNodeNames, null, null)) {
if (anyTimedOut(depNodeURLs)) {
throw new IllegalStateException("The upper infrastructure has issued a timeout");
}
// processes live completely independently
if (!deployNodesInDetachedMode && chan.getExitStatus() != PROCESS_STILL_RUNNING_VALUE) {
throw new IllegalStateException("The jvm process of the node has exited prematurely");
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// we know the cause of this
return null;
// interruption just exit
}
}
// Victory
return null;
}
});
try {
deployResult.get(shorterTimeout, TimeUnit.MILLISECONDS);
} catch (ExecutionException e) {
declareLostAndThrow("Unable to " + msg + " due to " + e.getCause(), depNodeURLs, channel, baos, e);
} catch (InterruptedException e) {
deployResult.cancel(true);
declareLostAndThrow("Unable to " + msg + " due to an interruption", depNodeURLs, channel, baos, e);
} catch (TimeoutException e) {
deployResult.cancel(true);
declareLostAndThrow("Unable to " + msg + " due to timeout", depNodeURLs, channel, baos, e);
} finally {
channel.disconnect();
}
} finally {
removeTimeouts(depNodeURLs);
session.disconnect();
deployService.shutdownNow();
}
}
use of org.ow2.proactive.resourcemanager.nodesource.NodeSource in project scheduling by ow2-proactive.
the class CLIInfrastructure method startNodeImpl.
/**
* Internal node acquisition method
* <p>
* Starts a PA runtime on remote host using a custom script, register it
* manually in the nodesource.
*
* @param hostTracker The host on which one the node will be started
* @param nbNodes number of nodes to deploy
* @param depNodeURLs list of deploying or lost nodes urls created
* @throws RMException
* acquisition failed
*/
protected void startNodeImpl(HostTracker hostTracker, int nbNodes, final List<String> depNodeURLs) throws RMException {
final String nodeName = "SCR-" + this.nodeSource.getName() + "-" + ProActiveCounter.getUniqID();
final String commandLine = interpreter + " " + deploymentScript.getAbsolutePath() + " " + hostTracker.getResolvedAddress().getHostName() + " " + nodeName + " " + this.nodeSource.getName() + " " + getRmUrl() + " " + nbNodes;
final List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(nodeName, nbNodes);
depNodeURLs.addAll(addMultipleDeployingNodes(createdNodeNames, commandLine, "Deploying node on host " + hostTracker.getResolvedAddress(), this.nodeTimeOut));
addTimeouts(depNodeURLs);
Process p;
try {
logger.debug("Launching the command: " + commandLine);
p = Runtime.getRuntime().exec(commandLine);
} catch (IOException e1) {
multipleDeclareDeployingNodeLost(depNodeURLs, "Cannot run command: " + commandLine + " - \n The following exception occured: " + getStackTraceAsString(e1));
throw new RMException("Cannot run command: " + commandLine, e1);
}
String lf = System.lineSeparator();
int circuitBreakerThreshold = 5;
while (!anyTimedOut(depNodeURLs) && circuitBreakerThreshold > 0) {
try {
int exitCode = p.exitValue();
if (exitCode != 0) {
logger.error("Child process at " + hostTracker.getResolvedAddress().getHostName() + " exited abnormally (" + exitCode + ").");
} else {
logger.error("Launching node script has exited normally whereas it shouldn't.");
}
String pOutPut = Utils.extractProcessOutput(p);
String pErrPut = Utils.extractProcessErrput(p);
final String description = "Script failed to launch a node on host " + hostTracker.getResolvedAddress().getHostName() + lf + " >Error code: " + exitCode + lf + " >Errput: " + pErrPut + " >Output: " + pOutPut;
logger.error(description);
if (super.checkNodeIsAcquiredAndDo(nodeName, null, new Runnable() {
public void run() {
multipleDeclareDeployingNodeLost(depNodeURLs, description);
}
})) {
return;
} else {
// there isn't any race regarding node registration
throw new RMException("A node " + nodeName + " is not expected anymore because of an error.");
}
} catch (IllegalThreadStateException e) {
logger.trace("IllegalThreadStateException while waiting for " + nodeName + " registration");
}
if (super.checkNodeIsAcquiredAndDo(nodeName, null, null)) {
// registration is ok, we destroy the process
logger.debug("Destroying the process: " + p);
p.destroy();
return;
}
try {
Thread.sleep(1000);
} catch (Exception e) {
circuitBreakerThreshold--;
logger.trace("An exception occurred while monitoring a child process", e);
}
}
// if we exit because of a timeout
if (this.anyTimedOut(depNodeURLs)) {
// we remove it
removeTimeouts(depNodeURLs);
// we destroy the process
p.destroy();
throw new RMException("Deploying Node " + nodeName + " not expected any more");
}
if (circuitBreakerThreshold <= 0) {
logger.error("Circuit breaker threshold reached while monitoring a child process.");
throw new RMException("Several exceptions occurred while monitoring a child process.");
}
}
Aggregations