Search in sources :

Example 36 with Scheduler

use of org.ow2.proactive.scheduler.common.Scheduler in project scheduling by ow2-proactive.

the class NodeSource method buildRMNodeAfterRecovery.

/**
 * Rebuild a RMNode from a node that could be looked up again after a
 * recovery of the RM. This builder configures nothing for the node
 * because it is configured already as it suppoesed to be recovered from
 * the database.
 * @return the expected RMNode
 */
private RMNode buildRMNodeAfterRecovery(Node node, RMNodeData rmNodeData) {
    RMNodeImpl rmNode = new RMNodeImpl(node, stub, rmNodeData.getName(), rmNodeData.getNodeUrl(), rmNodeData.getProvider(), rmNodeData.getHostname(), rmNodeData.getJmxUrls(), rmNodeData.getJvmName(), rmNodeData.getUserPermission(), rmNodeData.getState());
    if (rmNodeData.getState().equals(NodeState.BUSY)) {
        logger.info("Node " + rmNodeData.getName() + " was found busy after scheduler recovery with owner " + rmNodeData.getOwner());
        rmNode.setBusy(rmNodeData.getOwner());
    }
    return rmNode;
}
Also used : RMNodeImpl(org.ow2.proactive.resourcemanager.rmnode.RMNodeImpl)

Example 37 with Scheduler

use of org.ow2.proactive.scheduler.common.Scheduler in project scheduling by ow2-proactive.

the class ScriptExecutor method executeScripts.

/**
 * Runs selection scripts and process the results
 * returns node if it matches, null otherwise
 */
private Node executeScripts() {
    boolean selectionScriptSpecified = selectionScriptList != null && selectionScriptList.size() > 0;
    boolean nodeMatch = true;
    ScriptException exception = null;
    if (selectionScriptSpecified) {
        // initializing parallel script execution
        for (SelectionScript script : selectionScriptList) {
            if (manager.isPassed(script, criteria.getBindings(), rmnode)) {
                // already executed static script
                logger.debug(rmnode.getNodeURL() + " : " + script.hashCode() + " skipping script execution");
                continue;
            }
            logger.info(rmnode.getNodeURL() + " : " + script.hashCode() + " executing");
            try {
                ScriptResult<Boolean> scriptResult = rmnode.executeScript(script, criteria.getBindings());
                // processing the results
                if (!MOP.isReifiedObject(scriptResult) && scriptResult.getException() != null) {
                    // could not create script execution handler
                    // probably the node id down
                    logger.warn(rmnode.getNodeURL() + " : " + script.hashCode() + " exception", scriptResult.getException());
                    logger.warn(rmnode.getNodeURL() + " : pinging the node");
                    rmnode.getNodeSource().pingNode(rmnode.getNode());
                    nodeMatch = false;
                    break;
                } else {
                    try {
                        PAFuture.waitFor(scriptResult, PAResourceManagerProperties.RM_SELECT_SCRIPT_TIMEOUT.getValueAsLong());
                    } catch (ProActiveTimeoutException e) {
                        logger.warn("Timeout on " + rmnode.getNodeURL());
                        // do not produce an exception here
                        nodeMatch = false;
                        break;
                    }
                    // display the script result and output in the scheduler logs
                    if (scriptResult != null && logger.isInfoEnabled()) {
                        logger.info(rmnode.getNodeURL() + " : " + script.hashCode() + " result " + scriptResult.getResult());
                        if (scriptResult.getOutput() != null && scriptResult.getOutput().length() > 0) {
                            logger.info(rmnode.getNodeURL() + " : " + script.hashCode() + " output\n" + scriptResult.getOutput());
                        }
                    }
                    if (scriptResult != null && scriptResult.errorOccured()) {
                        nodeMatch = false;
                        exception = new ScriptException(scriptResult.getException());
                        logger.warn(rmnode.getNodeURL() + " : exception during the script execution", scriptResult.getException());
                    }
                    // selection manager at the same time. Returns whether node is selected.
                    if (!manager.processScriptResult(script, criteria.getBindings(), scriptResult, rmnode)) {
                        nodeMatch = false;
                        break;
                    }
                }
            } catch (Exception ex) {
                // proactive or network exception occurred when script was executed
                logger.warn(rmnode.getNodeURL() + " : " + script.hashCode() + " exception", ex);
                nodeMatch = false;
                exception = new ScriptException(ex);
                break;
            }
        }
    }
    manager.scriptExecutionFinished(rmnode.getNodeURL());
    if (selectionScriptSpecified && logger.isDebugEnabled()) {
        if (nodeMatch) {
            logger.debug(rmnode.getNodeURL() + " : selected");
        } else {
            logger.debug(rmnode.getNodeURL() + " : not selected");
        }
    }
    // cleaning the node
    try {
        rmnode.clean();
    } catch (Throwable t) {
        logger.warn(rmnode.getNodeURL() + " : exception in cleaning", t);
        logger.warn(rmnode.getNodeURL() + " : pinging the node");
        try {
            // 'pingNode' call can fail with exception if NodeSource was destroyed
            rmnode.getNodeSource().pingNode(rmnode.getNode());
        } catch (Throwable pingError) {
            logger.warn(rmnode.getNodeURL() + " : nodeSource " + rmnode.getNodeSourceName() + " seems to be removed ", pingError);
        }
        return null;
    }
    if (exception != null) {
        throw exception;
    }
    if (nodeMatch) {
        return rmnode.getNode();
    } else {
        return null;
    }
}
Also used : ProActiveTimeoutException(org.objectweb.proactive.core.ProActiveTimeoutException) ScriptException(org.ow2.proactive.scripting.ScriptException) SelectionScript(org.ow2.proactive.scripting.SelectionScript) ScriptException(org.ow2.proactive.scripting.ScriptException) ProActiveTimeoutException(org.objectweb.proactive.core.ProActiveTimeoutException)

Example 38 with Scheduler

use of org.ow2.proactive.scheduler.common.Scheduler in project scheduling by ow2-proactive.

the class TestSSHInfrastructureV2 method testSSHInfrastructureV2.

@Test
public void testSSHInfrastructureV2() throws Exception {
    nsname = "testSSHInfra";
    resourceManager = this.rmHelper.getResourceManager();
    RMTHelper.log("Test - Create SSH infrastructure on ssh://localhost on port " + this.port);
    resourceManager.createNodeSource(nsname, SSHInfrastructureV2.class.getName(), infraParams, StaticPolicy.class.getName(), policyParameters, NODES_NOT_RECOVERABLE);
    this.rmHelper.waitForNodeSourceCreation(nsname, NB_NODES, this.rmHelper.getMonitorsHandler());
    RMTHelper.log("Checking scheduler state after node source creation");
    RMState s = resourceManager.getState();
    assertEquals(NB_NODES, s.getTotalNodesNumber());
    assertEquals(NB_NODES, s.getFreeNodesNumber());
}
Also used : StaticPolicy(org.ow2.proactive.resourcemanager.nodesource.policy.StaticPolicy) SSHInfrastructureV2(org.ow2.proactive.resourcemanager.nodesource.infrastructure.SSHInfrastructureV2) RMState(org.ow2.proactive.resourcemanager.common.RMState) Test(org.junit.Test) RMFunctionalTest(functionaltests.utils.RMFunctionalTest)

Example 39 with Scheduler

use of org.ow2.proactive.scheduler.common.Scheduler in project scheduling by ow2-proactive.

the class TestSSHInfrastructureV2RestartDownNodesPolicy method testSSHInfrastructureV2WithRestartDownNodes.

@Test
public void testSSHInfrastructureV2WithRestartDownNodes() throws Exception {
    nsname = "testSSHInfraRestart";
    resourceManager = this.rmHelper.getResourceManager();
    RMTHelper.log("Test - Create SSH infrastructure with RestartDownNodes policy on ssh://localhost on port " + TestSSHInfrastructureV2.port);
    resourceManager.createNodeSource(nsname, SSHInfrastructureV2.class.getName(), TestSSHInfrastructureV2.infraParams, RestartDownNodesPolicy.class.getName(), TestSSHInfrastructureV2.policyParameters, NODES_NOT_RECOVERABLE);
    RMMonitorsHandler monitorsHandler = this.rmHelper.getMonitorsHandler();
    this.rmHelper.waitForNodeSourceCreation(nsname, NB_NODES, monitorsHandler);
    RMState s = resourceManager.getState();
    assertEquals(NB_NODES, s.getTotalNodesNumber());
    assertEquals(NB_NODES, s.getFreeNodesNumber());
    NodeSet nodeset = resourceManager.getNodes(new Criteria(NB_NODES));
    if (nodeset.size() != NB_NODES) {
        RMTHelper.log("Illegal state : the infrastructure could not deploy nodes or they died immediately. Ending test");
        throw new RuntimeException("Illegal state : the infrastructure could not deploy nodes or they died immediately. Ending test");
    }
    for (Node n : nodeset) {
        rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, n.getNodeInformation().getURL(), 60000, monitorsHandler);
    }
    String nodeUrl = nodeset.get(0).getNodeInformation().getURL();
    RMTHelper.log("Killing nodes");
    // Nodes will be redeployed only if we kill the whole runtime
    rmHelper.killRuntime(nodeUrl);
    RMTHelper.log("Wait for down nodes detection by the rm");
    for (Node n : nodeset) {
        RMNodeEvent ev = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, n.getNodeInformation().getURL(), 120000, monitorsHandler);
        assertEquals(NodeState.DOWN, ev.getNodeState());
    }
    for (Node n : nodeset) {
        rmHelper.waitForNodeEvent(RMEventType.NODE_REMOVED, n.getNodeInformation().getURL(), 120000, monitorsHandler);
    }
    RMTHelper.log("Dumping events not consumed yet");
    monitorsHandler.dumpEvents();
    RMTHelper.log("Wait for nodes restart by the policy");
    rmHelper.waitForAnyMultipleNodeEvent(RMEventType.NODE_ADDED, NB_NODES, monitorsHandler);
    for (int i = 0; i < NB_NODES; i++) {
        rmHelper.waitForAnyNodeEvent(RMEventType.NODE_REMOVED, monitorsHandler);
        rmHelper.waitForAnyNodeEvent(RMEventType.NODE_ADDED, monitorsHandler);
        rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED, monitorsHandler);
    }
    RMTHelper.log("Final checks on the scheduler state");
    nodeset = resourceManager.getNodes(new Criteria(NB_NODES));
    for (Node n : nodeset) {
        System.out.println("NODE::" + n.getNodeInformation().getURL());
    }
    s = resourceManager.getState();
    assertEquals(NB_NODES, s.getTotalNodesNumber());
    // check amount of all nodes that are not down
    assertEquals(NB_NODES, s.getTotalAliveNodesNumber());
}
Also used : NodeSet(org.ow2.proactive.utils.NodeSet) RestartDownNodesPolicy(org.ow2.proactive.resourcemanager.nodesource.policy.RestartDownNodesPolicy) Node(org.objectweb.proactive.core.node.Node) SSHInfrastructureV2(org.ow2.proactive.resourcemanager.nodesource.infrastructure.SSHInfrastructureV2) Criteria(org.ow2.proactive.utils.Criteria) RMState(org.ow2.proactive.resourcemanager.common.RMState) RMNodeEvent(org.ow2.proactive.resourcemanager.common.event.RMNodeEvent) RMMonitorsHandler(functionaltests.monitor.RMMonitorsHandler) Test(org.junit.Test) RMFunctionalTest(functionaltests.utils.RMFunctionalTest)

Example 40 with Scheduler

use of org.ow2.proactive.scheduler.common.Scheduler in project scheduling by ow2-proactive.

the class FreezeCommand method execute.

@Override
public void execute(ApplicationContext currentContext) throws CLIException {
    SchedulerRestInterface scheduler = currentContext.getRestClient().getScheduler();
    try {
        boolean success = scheduler.freezeScheduler(currentContext.getSessionId());
        resultStack(currentContext).push(success);
        if (success) {
            writeLine(currentContext, "Scheduler successfully frozen.");
        } else {
            writeLine(currentContext, "Cannot freeze scheduler.");
        }
    } catch (Exception e) {
        handleError("Error occurred while trying to freeze the scheduler:", e, currentContext);
    }
}
Also used : SchedulerRestInterface(org.ow2.proactive_grid_cloud_portal.common.SchedulerRestInterface) CLIException(org.ow2.proactive_grid_cloud_portal.cli.CLIException)

Aggregations

Scheduler (org.ow2.proactive.scheduler.common.Scheduler)97 JobId (org.ow2.proactive.scheduler.common.job.JobId)51 PermissionException (org.ow2.proactive.scheduler.common.exception.PermissionException)49 NotConnectedException (org.ow2.proactive.scheduler.common.exception.NotConnectedException)46 Path (javax.ws.rs.Path)45 Produces (javax.ws.rs.Produces)43 NotConnectedRestException (org.ow2.proactive_grid_cloud_portal.scheduler.exception.NotConnectedRestException)42 Test (org.junit.Test)39 PermissionRestException (org.ow2.proactive_grid_cloud_portal.scheduler.exception.PermissionRestException)38 UnknownJobException (org.ow2.proactive.scheduler.common.exception.UnknownJobException)36 File (java.io.File)34 GET (javax.ws.rs.GET)34 TaskResult (org.ow2.proactive.scheduler.common.task.TaskResult)31 SchedulerRestInterface (org.ow2.proactive_grid_cloud_portal.common.SchedulerRestInterface)31 CLIException (org.ow2.proactive_grid_cloud_portal.cli.CLIException)30 JobState (org.ow2.proactive.scheduler.common.job.JobState)29 UnknownJobRestException (org.ow2.proactive_grid_cloud_portal.scheduler.exception.UnknownJobRestException)28 GZIP (org.jboss.resteasy.annotations.GZIP)23 KeyException (java.security.KeyException)20 CredData (org.ow2.proactive.authentication.crypto.CredData)19