Search in sources :

Example 16 with RMException

use of org.ow2.proactive.resourcemanager.exception.RMException in project scheduling by ow2-proactive.

the class RMTestUser method connect.

public void connect(TestUsers user, String rmUrl) throws RMException, KeyException, LoginException, ActiveObjectCreationException, NodeException {
    this.connectedUserName = user.username;
    this.connectedUserPassword = user.password;
    this.rmUrl = rmUrl;
    disconnectFromRM();
    if (rmProxy == null) {
        monitorsHandler = new RMMonitorsHandler();
        RMMonitorEventReceiver passiveEventReceiver = new RMMonitorEventReceiver(monitorsHandler);
        rmProxy = PAActiveObject.turnActive(passiveEventReceiver);
        RMTHelper.log("RM Proxy initialized : " + PAActiveObject.getUrl(rmProxy));
    }
    RMTHelper.log("Connecting user " + connectedUserName + " to the Resource Manager at " + rmUrl);
    CredData credData = new CredData(CredData.parseLogin(connectedUserName), CredData.parseDomain(connectedUserName), connectedUserPassword);
    // this is to prevent starting working with the rmProxy on the calling
    // thread whereas it has not finished to be initialized
    PAFuture.waitFor(rmProxy.init(rmUrl, credData));
}
Also used : CredData(org.ow2.proactive.authentication.crypto.CredData) RMMonitorEventReceiver(functionaltests.monitor.RMMonitorEventReceiver) RMMonitorsHandler(functionaltests.monitor.RMMonitorsHandler)

Example 17 with RMException

use of org.ow2.proactive.resourcemanager.exception.RMException in project scheduling by ow2-proactive.

the class NodeSource method removeNode.

/**
 * Removes the node from the node source.
 *
 * @param nodeUrl the url of the node to be released
 */
public BooleanWrapper removeNode(String nodeUrl, Client initiator) {
    // node could have fallen between remove request and the confirm
    if (this.nodes.containsKey(nodeUrl)) {
        logger.info("[" + name + "] removing node : " + nodeUrl);
        Node node = nodes.remove(nodeUrl);
        RMCore.topologyManager.removeNode(node);
        try {
            infrastructureManager.internalRemoveNode(node);
        } catch (RMException e) {
            logger.error(e.getCause().getMessage(), e);
        }
    } else {
        Node downNode = downNodes.remove(nodeUrl);
        if (downNode != null) {
            logger.info("[" + name + "] removing down node : " + nodeUrl);
        } else {
            logger.error("[" + name + "] removing node : " + nodeUrl + " which not belongs to this node source");
            return new BooleanWrapper(false);
        }
    }
    if (this.toShutdown && this.nodes.size() == 0) {
        this.shutdownNodeSourceServices(initiator);
    }
    return new BooleanWrapper(true);
}
Also used : BooleanWrapper(org.objectweb.proactive.core.util.wrapper.BooleanWrapper) RMDeployingNode(org.ow2.proactive.resourcemanager.rmnode.RMDeployingNode) RMNode(org.ow2.proactive.resourcemanager.rmnode.RMNode) Node(org.objectweb.proactive.core.node.Node) AbstractRMNode(org.ow2.proactive.resourcemanager.rmnode.AbstractRMNode) RMException(org.ow2.proactive.resourcemanager.exception.RMException)

Example 18 with RMException

use of org.ow2.proactive.resourcemanager.exception.RMException in project scheduling by ow2-proactive.

the class NodeSource method detectedPingedDownNode.

/**
 * Marks node as down. Remove it from node source node set. It remains in rmcore nodes list until
 * user decides to remove them or node source is shutdown.
 * @see NodeSource#detectedPingedDownNode(String, String)
 */
public void detectedPingedDownNode(String nodeName, String nodeUrl) {
    if (toShutdown) {
        logger.warn("[" + name + "] detectedPingedDownNode request discarded because node source is shutting down");
        return;
    }
    logger.warn("[" + name + "] Detected down node: " + nodeUrl);
    Node downNode = nodes.remove(nodeUrl);
    if (downNode != null) {
        downNodes.put(nodeUrl, downNode);
        try {
            RMCore.topologyManager.removeNode(downNode);
            infrastructureManager.internalNotifyDownNode(nodeName, nodeUrl, downNode);
        } catch (RMException e) {
            logger.error("Error while removing down node: " + nodeUrl, e);
        }
    } else {
        // almost no information about the node apart from its name and url
        try {
            infrastructureManager.internalNotifyDownNode(nodeName, nodeUrl, null);
        } catch (RMException e) {
            logger.error("New empty node " + nodeUrl + " could not be created to handle down node", e);
        }
    }
    rmcore.setDownNode(nodeUrl);
}
Also used : RMDeployingNode(org.ow2.proactive.resourcemanager.rmnode.RMDeployingNode) RMNode(org.ow2.proactive.resourcemanager.rmnode.RMNode) Node(org.objectweb.proactive.core.node.Node) AbstractRMNode(org.ow2.proactive.resourcemanager.rmnode.AbstractRMNode) RMException(org.ow2.proactive.resourcemanager.exception.RMException)

Example 19 with RMException

use of org.ow2.proactive.resourcemanager.exception.RMException in project scheduling by ow2-proactive.

the class AutoUpdateInfrastructure method startNodeImpl.

/**
 * Internal node acquisition method
 * <p>
 * Starts a PA runtime on remote host using a custom script, register it
 * manually in the nodesource.
 *
 * @param hostTracker The host on which one the node will be started
 * @param nbNodes number of nodes to deploy
 * @param depNodeURLs list of deploying or lost nodes urls created
 * @throws org.ow2.proactive.resourcemanager.exception.RMException
 *             acquisition failed
 */
protected void startNodeImpl(HostTracker hostTracker, int nbNodes, final List<String> depNodeURLs) throws RMException {
    final String nodeName = this.nodeSource.getName() + "-" + ProActiveCounter.getUniqID();
    String credentials = "";
    try {
        credentials = new String(nodeSource.getAdministrator().getCredentials().getBase64());
    } catch (KeyException e) {
        logger.error("Invalid credentials");
        return;
    }
    Properties localProperties = new Properties();
    localProperties.put(NODE_NAME, nodeName);
    localProperties.put(HOST_NAME, hostTracker.getResolvedAddress().getHostName());
    localProperties.put(NODESOURCE_CREDENTIALS, credentials);
    localProperties.put(NODESOURCE_NAME, nodeSource.getName());
    localProperties.put(NB_NODES, nbNodes);
    String filledCommand = replaceProperties(command, localProperties);
    filledCommand = replaceProperties(filledCommand, System.getProperties());
    final List<String> createdNodeNames = RMNodeStarter.getWorkersNodeNames(nodeName, nbNodes);
    depNodeURLs.addAll(addMultipleDeployingNodes(createdNodeNames, filledCommand, "Deploying node on host " + hostTracker.getResolvedAddress(), this.nodeTimeOut));
    addTimeouts(depNodeURLs);
    Process p;
    try {
        logger.debug("Deploying node: " + nodeName);
        logger.debug("Launching the command: " + filledCommand);
        p = Runtime.getRuntime().exec(new String[] { "bash", "-c", filledCommand });
    } catch (IOException e1) {
        multipleDeclareDeployingNodeLost(depNodeURLs, "Cannot run command: " + filledCommand + " - \n The following exception occurred: " + getStackTraceAsString(e1));
        throw new RMException("Cannot run command: " + filledCommand, e1);
    }
    String lf = System.lineSeparator();
    int circuitBreakerThreshold = 5;
    while (!anyTimedOut(depNodeURLs) && circuitBreakerThreshold > 0) {
        try {
            int exitCode = p.exitValue();
            if (exitCode != 0) {
                logger.error("Child process at " + hostTracker.getResolvedAddress().getHostName() + " exited abnormally (" + exitCode + ").");
            } else {
                logger.error("Launching node script has exited normally whereas it shouldn't.");
            }
            String pOutPut = Utils.extractProcessOutput(p);
            String pErrPut = Utils.extractProcessErrput(p);
            final String description = "Script failed to launch a node on host " + hostTracker.getResolvedAddress().getHostName() + lf + "   >Error code: " + exitCode + lf + "   >Errput: " + pErrPut + "   >Output: " + pOutPut;
            logger.error(description);
            if (super.checkNodeIsAcquiredAndDo(nodeName, null, new Runnable() {

                public void run() {
                    multipleDeclareDeployingNodeLost(depNodeURLs, description);
                }
            })) {
                return;
            } else {
                // there isn't any race regarding node registration
                throw new RMException("A node " + nodeName + " is not expected anymore because of an error.");
            }
        } catch (IllegalThreadStateException e) {
            logger.trace("IllegalThreadStateException while waiting for " + nodeName + " registration");
        }
        if (super.checkNodeIsAcquiredAndDo(nodeName, null, null)) {
            // registration is ok, we destroy the process
            logger.debug("Destroying the process: " + p);
            try {
                ProcessTree.get().get(p).kill();
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
            return;
        }
        try {
            Thread.sleep(1000);
        } catch (Exception e) {
            circuitBreakerThreshold--;
            logger.trace("An exception occurred while monitoring a child process", e);
        }
    }
    // if we exit because of a timeout
    if (anyTimedOut(depNodeURLs)) {
        // we remove it
        removeTimeouts(depNodeURLs);
        // we destroy the process
        p.destroy();
        throw new RMException("Deploying Node " + nodeName + " not expected any more");
    }
    if (circuitBreakerThreshold <= 0) {
        logger.error("Circuit breaker threshold reached while monitoring a child process.");
        throw new RMException("Several exceptions occurred while monitoring a child process.");
    }
}
Also used : Throwables.getStackTraceAsString(com.google.common.base.Throwables.getStackTraceAsString) IOException(java.io.IOException) Properties(java.util.Properties) KeyException(java.security.KeyException) RMException(org.ow2.proactive.resourcemanager.exception.RMException) KeyException(java.security.KeyException) IOException(java.io.IOException) RMException(org.ow2.proactive.resourcemanager.exception.RMException)

Example 20 with RMException

use of org.ow2.proactive.resourcemanager.exception.RMException in project scheduling by ow2-proactive.

the class BatchJobInfrastructure method deleteJob.

/**
 * Runs a {@link #getDeleteJobCommand()} command on the remote host for the
 * given jobID and monitors the exit.
 *
 * @param jobID
 *            the jobID string to delete
 * @throws RMException
 *             if the {@link #getDeleteJobCommand()} command failed
 */
private void deleteJob(String jobID) throws RMException {
    String deleteCmd = getDeleteJobCommand();
    String cmd = deleteCmd + " " + jobID;
    Process del = null;
    try {
        del = Utils.runSSHCommand(InetAddress.getByName(this.serverName), cmd, this.sshOptions);
    } catch (Exception e1) {
        logger.warn("Cannot ssh " + this.serverName + " to issue " + deleteCmd + " command. job with jobID: " + jobID + " won't be deleted.", e1);
        throw new RMException("Cannot ssh " + this.serverName + " to issue " + deleteCmd + " command. job with jobID: " + jobID + " won't be deleted.", e1);
    }
    long timeStamp = System.currentTimeMillis();
    while (true) {
        try {
            int exitCode = del.exitValue();
            if (exitCode != 0) {
                logger.error("Cannot delete job " + jobID + ". " + deleteCmd + " command returned != 0 -> " + exitCode);
                throw new RMException("Cannot delete job " + jobID + ". " + deleteCmd + " command returned != 0 -> " + exitCode);
            } else {
                logger.debug("Job " + jobID + " deleted.");
                return;
            }
        } catch (IllegalThreadStateException e) {
            // the thread hasn't exited yet... don't eat exception, trace
            // it...
            logger.trace("waiting for " + deleteCmd + " exit code.", e);
        }
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            // the thread was interrupted... don't eat exception, trace
            // it...
            logger.trace("sleep interrupted while waiting for " + deleteCmd + " to exit.", e);
        }
        if ((System.currentTimeMillis() - timeStamp) >= nodeTimeOut) {
            logger.error("Cannot delete job " + jobID + ". " + deleteCmd + " command timed out.");
            throw new RMException("Cannot delete job " + jobID + ". " + deleteCmd + " command timed out.");
        }
    }
}
Also used : Throwables.getStackTraceAsString(com.google.common.base.Throwables.getStackTraceAsString) KeyException(java.security.KeyException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) RMException(org.ow2.proactive.resourcemanager.exception.RMException) RMException(org.ow2.proactive.resourcemanager.exception.RMException)

Aggregations

RMException (org.ow2.proactive.resourcemanager.exception.RMException)19 Throwables.getStackTraceAsString (com.google.common.base.Throwables.getStackTraceAsString)9 IOException (java.io.IOException)8 Node (org.objectweb.proactive.core.node.Node)8 KeyException (java.security.KeyException)6 RMNode (org.ow2.proactive.resourcemanager.rmnode.RMNode)6 RMDeployingNode (org.ow2.proactive.resourcemanager.rmnode.RMDeployingNode)5 CredData (org.ow2.proactive.authentication.crypto.CredData)4 UnknownHostException (java.net.UnknownHostException)3 Test (org.junit.Test)3 Credentials (org.ow2.proactive.authentication.crypto.Credentials)3 AbstractRMNode (org.ow2.proactive.resourcemanager.rmnode.AbstractRMNode)3 CommandLineBuilder (org.ow2.proactive.resourcemanager.utils.CommandLineBuilder)3 Permission (java.security.Permission)2 ArrayList (java.util.ArrayList)2 POST (javax.ws.rs.POST)2 Path (javax.ws.rs.Path)2 Produces (javax.ws.rs.Produces)2 BooleanWrapper (org.objectweb.proactive.core.util.wrapper.BooleanWrapper)2 RMProxyUserInterface (org.ow2.proactive.resourcemanager.common.util.RMProxyUserInterface)2