Search in sources :

Example 56 with NodeSource

use of org.ow2.proactive.resourcemanager.nodesource.NodeSource in project scheduling by ow2-proactive.

the class AutoUpdateInfrastructure method startNodeImpl.

 * Internal node acquisition method
 * <p>
 * Starts a PA runtime on remote host using a custom script, register it
 * manually in the nodesource.
 * @param hostTracker The host on which one the node will be started
 * @param nbNodes number of nodes to deploy
 * @param depNodeURLs list of deploying or lost nodes urls created
 * @throws org.ow2.proactive.resourcemanager.exception.RMException
 *             acquisition failed
protected void startNodeImpl(HostTracker hostTracker, int nbNodes, final List<String> depNodeURLs) throws RMException {
    final String nodeName = this.nodeSource.getName() + "-" + ProActiveCounter.getUniqID();
    String credentials = "";
    try {
        credentials = new String(nodeSource.getAdministrator().getCredentials().getBase64());
    } catch (KeyException e) {
        logger.error("Invalid credentials");
    Properties localProperties = new Properties();
    localProperties.put(NODE_NAME, nodeName);
    localProperties.put(HOST_NAME, hostTracker.getResolvedAddress().getHostName());
    localProperties.put(NODESOURCE_CREDENTIALS, credentials);
    localProperties.put(NODESOURCE_NAME, nodeSource.getName());
    localProperties.put(NB_NODES, nbNodes);
    String filledCommand = replaceProperties(command, localProperties);
    filledCommand = replaceProperties(filledCommand, System.getProperties());
    final List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(nodeName, nbNodes);
    depNodeURLs.addAll(addMultipleDeployingNodes(createdNodeNames, filledCommand, "Deploying node on host " + hostTracker.getResolvedAddress(), this.nodeTimeOut));
    Process p;
    try {
        logger.debug("Deploying node: " + nodeName);
        logger.debug("Launching the command: " + filledCommand);
        p = Runtime.getRuntime().exec(new String[] { "bash", "-c", filledCommand });
    } catch (IOException e1) {
        multipleDeclareDeployingNodeLost(depNodeURLs, "Cannot run command: " + filledCommand + " - \n The following exception occurred: " + getStackTraceAsString(e1));
        throw new RMException("Cannot run command: " + filledCommand, e1);
    String lf = System.lineSeparator();
    int circuitBreakerThreshold = 5;
    while (!anyTimedOut(depNodeURLs) && circuitBreakerThreshold > 0) {
        try {
            int exitCode = p.exitValue();
            if (exitCode != 0) {
                logger.error("Child process at " + hostTracker.getResolvedAddress().getHostName() + " exited abnormally (" + exitCode + ").");
            } else {
                logger.error("Launching node script has exited normally whereas it shouldn't.");
            String pOutPut = Utils.extractProcessOutput(p);
            String pErrPut = Utils.extractProcessErrput(p);
            final String description = "Script failed to launch a node on host " + hostTracker.getResolvedAddress().getHostName() + lf + "   >Error code: " + exitCode + lf + "   >Errput: " + pErrPut + "   >Output: " + pOutPut;
            if (super.checkNodeIsAcquiredAndDo(nodeName, null, new Runnable() {

                public void run() {
                    multipleDeclareDeployingNodeLost(depNodeURLs, description);
            })) {
            } else {
                // there isn't any race regarding node registration
                throw new RMException("A node " + nodeName + " is not expected anymore because of an error.");
        } catch (IllegalThreadStateException e) {
            logger.trace("IllegalThreadStateException while waiting for " + nodeName + " registration");
        if (super.checkNodeIsAcquiredAndDo(nodeName, null, null)) {
            // registration is ok, we destroy the process
            logger.debug("Destroying the process: " + p);
            try {
            } catch (InterruptedException e) {
        try {
        } catch (Exception e) {
            logger.trace("An exception occurred while monitoring a child process", e);
    // if we exit because of a timeout
    if (anyTimedOut(depNodeURLs)) {
        // we remove it
        // we destroy the process
        throw new RMException("Deploying Node " + nodeName + " not expected any more");
    if (circuitBreakerThreshold <= 0) {
        logger.error("Circuit breaker threshold reached while monitoring a child process.");
        throw new RMException("Several exceptions occurred while monitoring a child process.");
Also used : Throwables.getStackTraceAsString( IOException( Properties(java.util.Properties) KeyException( RMException(org.ow2.proactive.resourcemanager.exception.RMException) KeyException( IOException( RMException(org.ow2.proactive.resourcemanager.exception.RMException)

Example 57 with NodeSource

use of org.ow2.proactive.resourcemanager.nodesource.NodeSource in project scheduling by ow2-proactive.

the class CLIInfrastructure method startNodeImpl.

 * Internal node acquisition method
 * <p>
 * Starts a PA runtime on remote host using a custom script, register it
 * manually in the nodesource.
 * @param hostTracker The host on which one the node will be started
 * @param nbNodes number of nodes to deploy
 * @param depNodeURLs list of deploying or lost nodes urls created
 * @throws RMException
 *             acquisition failed
protected void startNodeImpl(HostTracker hostTracker, int nbNodes, final List<String> depNodeURLs) throws RMException {
    final String nodeName = "SCR-" + this.nodeSource.getName() + "-" + ProActiveCounter.getUniqID();
    final String commandLine = interpreter + " " + deploymentScript.getAbsolutePath() + " " + hostTracker.getResolvedAddress().getHostName() + " " + nodeName + " " + this.nodeSource.getName() + " " + getRmUrl() + " " + nbNodes;
    final List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(nodeName, nbNodes);
    depNodeURLs.addAll(addMultipleDeployingNodes(createdNodeNames, commandLine, "Deploying node on host " + hostTracker.getResolvedAddress(), this.nodeTimeOut));
    Process p;
    try {
        logger.debug("Launching the command: " + commandLine);
        p = Runtime.getRuntime().exec(commandLine);
    } catch (IOException e1) {
        multipleDeclareDeployingNodeLost(depNodeURLs, "Cannot run command: " + commandLine + " - \n The following exception occured: " + getStackTraceAsString(e1));
        throw new RMException("Cannot run command: " + commandLine, e1);
    String lf = System.lineSeparator();
    int circuitBreakerThreshold = 5;
    while (!anyTimedOut(depNodeURLs) && circuitBreakerThreshold > 0) {
        try {
            int exitCode = p.exitValue();
            if (exitCode != 0) {
                logger.error("Child process at " + hostTracker.getResolvedAddress().getHostName() + " exited abnormally (" + exitCode + ").");
            } else {
                logger.error("Launching node script has exited normally whereas it shouldn't.");
            String pOutPut = Utils.extractProcessOutput(p);
            String pErrPut = Utils.extractProcessErrput(p);
            final String description = "Script failed to launch a node on host " + hostTracker.getResolvedAddress().getHostName() + lf + "   >Error code: " + exitCode + lf + "   >Errput: " + pErrPut + "   >Output: " + pOutPut;
            if (super.checkNodeIsAcquiredAndDo(nodeName, null, new Runnable() {

                public void run() {
                    multipleDeclareDeployingNodeLost(depNodeURLs, description);
            })) {
            } else {
                // there isn't any race regarding node registration
                throw new RMException("A node " + nodeName + " is not expected anymore because of an error.");
        } catch (IllegalThreadStateException e) {
            logger.trace("IllegalThreadStateException while waiting for " + nodeName + " registration");
        if (super.checkNodeIsAcquiredAndDo(nodeName, null, null)) {
            // registration is ok, we destroy the process
            logger.debug("Destroying the process: " + p);
        try {
        } catch (Exception e) {
            logger.trace("An exception occurred while monitoring a child process", e);
    // if we exit because of a timeout
    if (this.anyTimedOut(depNodeURLs)) {
        // we remove it
        // we destroy the process
        throw new RMException("Deploying Node " + nodeName + " not expected any more");
    if (circuitBreakerThreshold <= 0) {
        logger.error("Circuit breaker threshold reached while monitoring a child process.");
        throw new RMException("Several exceptions occurred while monitoring a child process.");
Also used : Throwables.getStackTraceAsString( IOException( RMException(org.ow2.proactive.resourcemanager.exception.RMException) IOException( RMException(org.ow2.proactive.resourcemanager.exception.RMException)

Example 58 with NodeSource

use of org.ow2.proactive.resourcemanager.nodesource.NodeSource in project scheduling by ow2-proactive.

the class SSHInfrastructure method startNodeImpl.

 * Internal node acquisition method
 * <p>
 * Starts a PA runtime on remote host using SSH, register it manually in the
 * nodesource.
 * @param hostTracker The host on which one the node will be started
 * @param nbNodes number of nodes to deploy
 * @param depNodeURLs list of deploying or lost nodes urls created
 * @throws RMException
 *             acquisition failed
protected void startNodeImpl(HostTracker hostTracker, int nbNodes, final List<String> depNodeURLs) throws RMException {
    String fs = getTargetOSObj().fs;
    CommandLineBuilder clb = super.getDefaultCommandLineBuilder(getTargetOSObj());
    // we take care of spaces in java path
    // we set the rm.home prop
    // we set the java security policy file
    StringBuilder sb = new StringBuilder();
    final boolean containsSpace = schedulingPath.contains(" ");
    String securitycmd = CentralPAPropertyRepository.JAVA_SECURITY_POLICY.getCmdLine();
    if (!this.javaOptions.contains(securitycmd)) {
        if (containsSpace) {
        if (containsSpace) {
        sb.append(" ");
    // we set the log4j configuration file
    String log4jcmd = CentralPAPropertyRepository.LOG4J.getCmdLine();
    if (!this.javaOptions.contains(log4jcmd)) {
        if (containsSpace) {
        // log4j only understands urls
        if (!schedulingPath.startsWith("/")) {
            sb.append("/" + schedulingPath.replace("\\", "/"));
        } else {
            sb.append(schedulingPath.replace("\\", "/"));
        if (containsSpace) {
        sb.append(" ");
    // we add extra java/PA configuration
    // afterwards, node's name
    // generate the node name
    // current rmcore shortID should be added to ensure uniqueness
    final String nodeName = nodeNameBuilder.generateNodeName(hostTracker);
    // finally, the credential's value
    String credString = null;
    try {
        credString = new String(getCredentials().getBase64());
    } catch (KeyException e1) {
        throw new RMException("Could not get base64 credentials", e1);
    // add an expected node. every unexpected node will be discarded
    String cmdLine;
    String obfuscatedCmdLine;
    try {
        cmdLine = clb.buildCommandLine(true);
        obfuscatedCmdLine = clb.buildCommandLine(false);
    } catch (IOException e2) {
        throw new RMException("Cannot build the " + RMNodeStarter.class.getSimpleName() + "'s command line.", e2);
    // one escape the command to make it runnable through ssh
    if (cmdLine.contains("\"")) {
        cmdLine = cmdLine.replaceAll("\"", "\\\\\"");
    // we create a new deploying node before ssh command ran
    final List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(nodeName, nbNodes);
    depNodeURLs.addAll(addMultipleDeployingNodes(createdNodeNames, obfuscatedCmdLine, "Deploying nodes on host " + hostTracker.getResolvedAddress(), super.nodeTimeOut));
    Process p = null;
    try {
        p = Utils.runSSHCommand(hostTracker.getResolvedAddress(), cmdLine, sshOptions);
    } catch (IOException e1) {
        multipleDeclareDeployingNodeLost(depNodeURLs, "Cannot run command: " + cmdLine + ", with ssh options: " + sshOptions + " -\n The following exception occutred:\n " + getStackTraceAsString(e1));
        throw new RMException("Cannot run command: " + cmdLine + ", with ssh options: " + sshOptions, e1);
    String lf = System.lineSeparator();
    int circuitBreakerThreshold = 5;
    while (!anyTimedOut(depNodeURLs) && circuitBreakerThreshold > 0) {
        try {
            int exitCode = p.exitValue();
            if (exitCode != 0) {
                logger.error("SSH subprocess at " + hostTracker.getResolvedAddress().getHostName() + " exited abnormally (" + exitCode + ").");
            } else {
                logger.error("Launching node process has exited normally whereas it shouldn't.");
            String pOutPut = Utils.extractProcessOutput(p);
            String pErrPut = Utils.extractProcessErrput(p);
            final String description = "SSH command failed to launch node on host " + hostTracker.getResolvedAddress().getHostName() + lf + "   >Error code: " + exitCode + lf + "   >Errput: " + pErrPut + "   >Output: " + pOutPut;
            if (super.checkAllNodesAreAcquiredAndDo(createdNodeNames, null, new Runnable() {

                public void run() {
                    SSHInfrastructure.this.multipleDeclareDeployingNodeLost(depNodeURLs, description);
            })) {
            } else {
                // there isn't any race regarding node registration
                throw new RMException("SSH Node " + nodeName + " is not expected anymore because of an error.");
        } catch (IllegalThreadStateException e) {
            logger.trace("IllegalThreadStateException while waiting for " + nodeName + " registration");
        if (super.checkNodeIsAcquiredAndDo(nodeName, null, null)) {
            // registration is ok, we destroy the process
        try {
        } catch (Exception e) {
            logger.trace("An exception occurred while monitoring ssh subprocess", e);
    // if we exit because of a timeout
    if (anyTimedOut(depNodeURLs)) {
        // we remove it
        // we destroy the process
        throw new RMException("Deploying Node " + nodeName + " not expected any more");
    if (circuitBreakerThreshold <= 0) {
        logger.error("Circuit breaker threshold reached while monitoring ssh subprocess.");
        throw new RMException("Several exceptions occurred while monitoring ssh subprocess.");
Also used : Throwables.getStackTraceAsString( CommandLineBuilder(org.ow2.proactive.resourcemanager.utils.CommandLineBuilder) IOException( KeyException( RMException(org.ow2.proactive.resourcemanager.exception.RMException) KeyException( IOException( RMException(org.ow2.proactive.resourcemanager.exception.RMException)

Example 59 with NodeSource

use of org.ow2.proactive.resourcemanager.nodesource.NodeSource in project scheduling by ow2-proactive.

the class SSHInfrastructureV2 method startNodeImpl.

 * Internal node acquisition method
 * <p>
 * Starts a PA runtime on remote host using SSH, register it manually in the
 * nodesource.
 * @param hostTracker The host on which one the node will be started
 * @param nbNodes number of nodes to deploy
 * @param depNodeURLs list of deploying or lost nodes urls created
 * @throws RMException
 *             acquisition failed
public void startNodeImpl(final HostTracker hostTracker, final int nbNodes, final List<String> depNodeURLs) throws RMException {
    String fs = getTargetOSObj().fs;
    // we set the java security policy file
    ArrayList<String> sb = new ArrayList<>();
    final boolean containsSpace = schedulingPath.contains(" ");
    if (containsSpace) {
        sb.add("-Dproactive.home=\"" + schedulingPath + "\"");
    } else {
        sb.add("-Dproactive.home=" + schedulingPath);
    String securitycmd = CentralPAPropertyRepository.JAVA_SECURITY_POLICY.getCmdLine();
    if (!this.javaOptions.contains(securitycmd)) {
        if (containsSpace) {
            securitycmd += "\"";
        securitycmd += this.schedulingPath + fs + "config" + fs;
        securitycmd += "";
        if (containsSpace) {
            securitycmd += "\"";
    // we set the log4j configuration file
    String log4jcmd = CentralPAPropertyRepository.LOG4J.getCmdLine();
    if (!this.javaOptions.contains(log4jcmd)) {
        // log4j only understands urls
        if (containsSpace) {
            log4jcmd += "\"";
        log4jcmd += "file:";
        if (!this.schedulingPath.startsWith("/")) {
            log4jcmd += "/";
        log4jcmd += this.schedulingPath.replace("\\", "/");
        log4jcmd += "/config/log/";
        if (containsSpace) {
            log4jcmd += "\"";
    // we add extra java/PA configuration
    if (this.javaOptions != null && !this.javaOptions.trim().isEmpty()) {
    CommandLineBuilder clb = super.getDefaultCommandLineBuilder(getTargetOSObj());
    final boolean deployNodesInDetachedMode = PAResourceManagerProperties.RM_NODES_RECOVERY.getValueAsBoolean() || PAResourceManagerProperties.RM_PRESERVE_NODES_ON_SHUTDOWN.getValueAsBoolean();
    if (deployNodesInDetachedMode) {
        // if we do not want to kill the nodes when the RM exits or
        // restarts, then we should launch the nodes in background and
        // ignore the RM termination signal
    final String nodeName = nodeNameBuilder.generateNodeName(hostTracker);
    // finally, the credential's value
    String credString;
    try {
        Client currentClient = super.nodeSource.getAdministrator();
        credString = new String(currentClient.getCredentials().getBase64());
    } catch (KeyException e) {
        throw new RMException("Could not get base64 credentials", e);
    // add an expected node. every unexpected node will be discarded
    String cmdLine;
    String obfuscatedCmdLine;
    try {
        cmdLine = clb.buildCommandLine(true);
        obfuscatedCmdLine = clb.buildCommandLine(false);
    } catch (IOException e) {
        throw new RMException("Cannot build the " + RMNodeStarter.class.getSimpleName() + "'s command line.", e);
    // one escape the command to make it runnable through ssh
    if (cmdLine.contains("\"")) {
        cmdLine = cmdLine.replaceAll("\"", "\\\\\"");
    final String finalCmdLine = cmdLine;
    // The final addDeployingNode() method will initiate a timeout that
    // will declare node as lost and set the description of the failure
    // with a simplistic message, since there is no way to override this
    // mechanism we consider only 90% of timeout to set custom description
    // in case of failure and still allow global timeout
    final int shorterTimeout = Math.round((90 * super.nodeTimeOut) / 100);
    JSch jsch = new JSch();
    final String msg = "deploy on " + hostTracker.getResolvedAddress();
    final List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(nodeName, nbNodes);
    depNodeURLs.addAll(addMultipleDeployingNodes(createdNodeNames, obfuscatedCmdLine, msg, super.nodeTimeOut));
    Session session;
    try {
        // Create ssh session to the hostname
        session = jsch.getSession(this.sshUsername, hostTracker.getResolvedAddress().getHostName(), this.sshPort);
        if (this.sshPassword == null) {
            jsch.addIdentity(this.sshUsername, this.sshPrivateKey, null, null);
        } else {
    } catch (JSchException e) {
        multipleDeclareDeployingNodeLost(depNodeURLs, "unable to " + msg + "\n" + getStackTraceAsString(e));
        throw new RMException("unable to " + msg, e);
    }"Executing SSH command: '" + finalCmdLine + "'");
    ScheduledExecutorService deployService = Executors.newSingleThreadScheduledExecutor();
    try {
        // Create ssh channel to run the cmd
        ByteArrayOutputStream baos = new ByteArrayOutputStream(DEFAULT_OUTPUT_BUFFER_LENGTH);
        ChannelExec channel;
        try {
            channel = (ChannelExec) session.openChannel("exec");
        } catch (JSchException e) {
            multipleDeclareDeployingNodeLost(depNodeURLs, "unable to " + msg + "\n" + getStackTraceAsString(e));
            throw new RMException("unable to " + msg, e);
        final ChannelExec chan = channel;
        Future<Void> deployResult = deployService.submit(new Callable<Void>() {

            public Void call() throws Exception {
                while (!shutDown.get() && !checkAllNodesAreAcquiredAndDo(createdNodeNames, null, null)) {
                    if (anyTimedOut(depNodeURLs)) {
                        throw new IllegalStateException("The upper infrastructure has issued a timeout");
                    // processes live completely independently
                    if (!deployNodesInDetachedMode && chan.getExitStatus() != PROCESS_STILL_RUNNING_VALUE) {
                        throw new IllegalStateException("The jvm process of the node has exited prematurely");
                    try {
                    } catch (InterruptedException e) {
                        // we know the cause of this
                        return null;
                    // interruption just exit
                // Victory
                return null;
        try {
            deployResult.get(shorterTimeout, TimeUnit.MILLISECONDS);
        } catch (ExecutionException e) {
            declareLostAndThrow("Unable to " + msg + " due to " + e.getCause(), depNodeURLs, channel, baos, e);
        } catch (InterruptedException e) {
            declareLostAndThrow("Unable to " + msg + " due to an interruption", depNodeURLs, channel, baos, e);
        } catch (TimeoutException e) {
            declareLostAndThrow("Unable to " + msg + " due to timeout", depNodeURLs, channel, baos, e);
        } finally {
    } finally {
Also used : JSchException(com.jcraft.jsch.JSchException) ArrayList(java.util.ArrayList) Throwables.getStackTraceAsString( CommandLineBuilder(org.ow2.proactive.resourcemanager.utils.CommandLineBuilder) JSch(com.jcraft.jsch.JSch) RMException(org.ow2.proactive.resourcemanager.exception.RMException) Client(org.ow2.proactive.resourcemanager.authentication.Client) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) IOException( ByteArrayOutputStream( KeyException( ChannelExec(com.jcraft.jsch.ChannelExec) KeyException( TimeoutException(java.util.concurrent.TimeoutException) IOException( RMException(org.ow2.proactive.resourcemanager.exception.RMException) ExecutionException(java.util.concurrent.ExecutionException) JSchException(com.jcraft.jsch.JSchException) Session(com.jcraft.jsch.Session)

Example 60 with NodeSource

use of org.ow2.proactive.resourcemanager.nodesource.NodeSource in project scheduling by ow2-proactive.

the class RMRest method deployNodeSource.

 * Start the nodes acquisition of the node source
 * @param sessionId a valid session id
 * @param nodeSourceName the name of the node source to start
 * @return the result of the action, possibly containing the error message
 * @throws NotConnectedException
public NSState deployNodeSource(@HeaderParam("sessionid") String sessionId, @FormParam("nodeSourceName") String nodeSourceName) throws NotConnectedException {
    ResourceManager rm = checkAccess(sessionId);
    NSState nsState = new NSState();
    try {
    } catch (RuntimeException ex) {
    return nsState;
Also used : ResourceManager(org.ow2.proactive.resourcemanager.frontend.ResourceManager) NSState(org.ow2.proactive.resourcemanager.common.NSState) Path( Produces( PUT(


NodeSource (org.ow2.proactive.resourcemanager.nodesource.NodeSource)21 Test (org.junit.Test)17 RMNode (org.ow2.proactive.resourcemanager.rmnode.RMNode)13 BooleanWrapper (org.objectweb.proactive.core.util.wrapper.BooleanWrapper)11 RMDeployingNode (org.ow2.proactive.resourcemanager.rmnode.RMDeployingNode)11 Client (org.ow2.proactive.resourcemanager.authentication.Client)9 Node (org.objectweb.proactive.core.node.Node)8 ResourceManager (org.ow2.proactive.resourcemanager.frontend.ResourceManager)7 ArrayList (java.util.ArrayList)6 RMNodeSourceEvent (org.ow2.proactive.resourcemanager.common.event.RMNodeSourceEvent)6 Permission ( NodeSourceData (org.ow2.proactive.resourcemanager.db.NodeSourceData)5 RMException (org.ow2.proactive.resourcemanager.exception.RMException)5 Throwables.getStackTraceAsString ( IOException ( HashMap (java.util.HashMap)4 HashSet (java.util.HashSet)4 Path ( Produces ( Matchers.anyString (org.mockito.Matchers.anyString)4