Search in sources :

Example 16 with RMNodeEvent

use of org.ow2.proactive.resourcemanager.common.event.RMNodeEvent in project scheduling by ow2-proactive.

the class TestTaskRestartOnNodeFailure method testTaskKillNode.

private void testTaskKillNode(FileLock fileLock, boolean waitBeforeKill) throws Exception {
    Path fileLockPath = fileLock.lock();
    TestNode nodeToKill = startNode();
    log("Submit job");
    final JobId jobId = schedulerHelper.submitJob(createJob(fileLockPath.toString()));
    log("Wait when node becomes busy");
    RMNodeEvent event;
    do {
        event = schedulerHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED, TIMEOUT);
    } while (!event.getNodeState().equals(NodeState.BUSY));
    log("Wait when task starts");
    schedulerHelper.waitForEventTaskRunning(jobId, "Test task");
    /*
         * Want to test two cases (existed at the time of this writing): - if wait some time before
         * killing node then node failure is detected by the pinger thread - if kill node
         * immediately then node failure is detected by the thread calling TaskLauncher.doTask
         */
    if (waitBeforeKill) {
        log("Wait some time");
        Thread.sleep(5000);
    }
    log("Stop task node process (node " + nodeToKill.getNode().getNodeInformation().getURL() + ")");
    nodeToKill.kill();
    TestNode newNode = startNode();
    log("Let task finish");
    fileLock.unlock();
    log("Wait when job finish");
    schedulerHelper.waitForEventJobFinished(jobId, TIMEOUT);
    event = schedulerHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, newNode.getNode().getNodeInformation().getURL(), TIMEOUT);
    assertEquals(NodeState.BUSY, event.getNodeState());
    event = schedulerHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, newNode.getNode().getNodeInformation().getURL(), TIMEOUT);
    assertEquals(NodeState.FREE, event.getNodeState());
    log("Check job result");
    checkJobResult(schedulerHelper.getSchedulerInterface(), jobId);
    schedulerHelper.getResourceManager().removeNode(newNode.getNodeURL(), true);
    newNode.kill();
}
Also used : Path(java.nio.file.Path) TestNode(functionaltests.utils.TestNode) RMNodeEvent(org.ow2.proactive.resourcemanager.common.event.RMNodeEvent) JobId(org.ow2.proactive.scheduler.common.job.JobId)

Example 17 with RMNodeEvent

use of org.ow2.proactive.resourcemanager.common.event.RMNodeEvent in project scheduling by ow2-proactive.

the class TestTaskRestartOnNodeFailure method startNode.

private TestNode startNode() throws Exception {
    int nodeNumber = startedNodesCounter++;
    log("Start new node: node-" + nodeNumber);
    testNode = schedulerHelper.createNode("node" + nodeNumber);
    String nodeUrl = testNode.getNode().getNodeInformation().getURL();
    schedulerHelper.getResourceManager().addNode(nodeUrl);
    schedulerHelper.waitForNodeEvent(RMEventType.NODE_ADDED, nodeUrl, TIMEOUT);
    RMNodeEvent event = schedulerHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, nodeUrl, TIMEOUT);
    assertEquals(NodeState.FREE, event.getNodeState());
    return testNode;
}
Also used : RMNodeEvent(org.ow2.proactive.resourcemanager.common.event.RMNodeEvent)

Example 18 with RMNodeEvent

use of org.ow2.proactive.resourcemanager.common.event.RMNodeEvent in project scheduling by ow2-proactive.

the class TestLocalInfrastructureRestartDownNodesPolicy method testRestartDownNodesPolicy.

@Test
public void testRestartDownNodesPolicy() throws Exception {
    nodeSourceName = "Node_source_1";
    RMTHelper.log("Test 1 - restart down nodes policy");
    createNodeSourceWithNodes(nodeSourceName, new Object[] { "ALL", "ALL", "10000" });
    RMState stateTest1 = resourceManager.getState();
    assertEquals(defaultDescriptorNodesNb, stateTest1.getTotalNodesNumber());
    assertEquals(defaultDescriptorNodesNb, stateTest1.getFreeNodesNumber());
    NodeSet ns = resourceManager.getNodes(new Criteria(defaultDescriptorNodesNb));
    for (Node n : ns) {
        rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, n.getNodeInformation().getURL());
    }
    String nodeUrl = ns.get(0).getNodeInformation().getURL();
    // Nodes will be redeployed only if we kill the whole runtime
    rmHelper.killRuntime(nodeUrl);
    RMNodeEvent ev = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, nodeUrl);
    assertEquals(NodeState.DOWN, ev.getNodeState());
    // one node is down - the policy should detect it and redeploy
    rmHelper.waitForAnyNodeEvent(RMEventType.NODE_ADDED);
    rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
    assertEquals(defaultDescriptorNodesNb, stateTest1.getTotalNodesNumber());
    assertEquals(defaultDescriptorNodesNb, stateTest1.getTotalAliveNodesNumber());
}
Also used : NodeSet(org.ow2.proactive.utils.NodeSet) Node(org.objectweb.proactive.core.node.Node) Criteria(org.ow2.proactive.utils.Criteria) RMState(org.ow2.proactive.resourcemanager.common.RMState) RMNodeEvent(org.ow2.proactive.resourcemanager.common.event.RMNodeEvent) Test(org.junit.Test) RMFunctionalTest(functionaltests.utils.RMFunctionalTest)

Example 19 with RMNodeEvent

use of org.ow2.proactive.resourcemanager.common.event.RMNodeEvent in project scheduling by ow2-proactive.

the class TestAdminAddingNodes method testAddNodes.

@Test
public void testAddNodes() throws Exception {
    final String NS_NAME = "TestAdminAddingNodes";
    int pingFrequency = 6000;
    ResourceManager resourceManager = rmHelper.getResourceManager();
    resourceManager.createNodeSource(NS_NAME, DefaultInfrastructureManager.class.getName(), null, StaticPolicy.class.getName(), null, NODES_NOT_RECOVERABLE);
    rmHelper.waitForNodeSourceEvent(RMEventType.NODESOURCE_CREATED, NS_NAME);
    resourceManager.setNodeSourcePingFrequency(pingFrequency, NS_NAME);
    log("Test 1 : add a node");
    String node1Name = "node1";
    TestNode testNode1 = RMTHelper.createNode(node1Name);
    testNodes.add(testNode1);
    String node1URL = testNode1.getNodeURL();
    resourceManager.addNode(node1URL, NS_NAME);
    rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node1URL);
    // wait for the node to be in free state
    rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(1, resourceManager.getState().getTotalAliveNodesNumber());
    assertEquals(1, resourceManager.getState().getFreeNodesNumber());
    log("Test 2 : remove an already deployed node");
    // preemptive removal is useless for this case, because node is free
    resourceManager.removeNode(node1URL, false);
    rmHelper.waitForNodeEvent(RMEventType.NODE_REMOVED, node1URL);
    assertEquals(0, resourceManager.getState().getTotalNodesNumber());
    assertEquals(0, resourceManager.getState().getTotalAliveNodesNumber());
    assertEquals(0, resourceManager.getState().getFreeNodesNumber());
    log("Test 3 : add a node, kill this node, node is detected down, and add a node that has the same URL");
    // Test seems to have a race condition at this step
    String node2Name = "node2";
    TestNode testNode2 = RMTHelper.createNode(node2Name);
    testNodes.add(testNode2);
    String node2URL = testNode2.getNodeURL();
    resourceManager.addNode(node2URL, NS_NAME);
    // wait the node added event
    rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node2URL);
    // wait for the node to be in free state
    rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(1, resourceManager.getState().getFreeNodesNumber());
    assertEquals(1, resourceManager.getState().getTotalAliveNodesNumber());
    testNode2.kill();
    testNodes.remove(testNode2);
    RMNodeEvent evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
    assertEquals(evt.getNodeState(), NodeState.DOWN);
    // wait the node down event
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(0, resourceManager.getState().getFreeNodesNumber());
    assertEquals(0, resourceManager.getState().getTotalAliveNodesNumber());
    // create another node with the same URL, and add it to Resource manager
    testNode2 = RMTHelper.createNode(node2Name, new URI(node2URL).getPort());
    testNodes.add(testNode2);
    node2URL = testNode2.getNodeURL();
    resourceManager.addNode(node2URL, NS_NAME);
    // wait the node added event
    rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node2URL);
    // wait for the node to be in free state
    rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(1, resourceManager.getState().getFreeNodesNumber());
    assertEquals(1, resourceManager.getState().getTotalAliveNodesNumber());
    log("Test 4 : add a node, keep this node free, kill this node, and add a node that has the same URL");
    // put a large ping frequency in order to avoid down nodes detection
    resourceManager.setNodeSourcePingFrequency(Integer.MAX_VALUE, NS_NAME);
    // wait the end of last ping sequence
    Thread.sleep(pingFrequency * 2);
    // node2 is free, kill the node
    testNode2.kill();
    testNodes.remove(testNode2);
    // create another node with the same URL, and add it to Resource manager
    testNode2 = RMTHelper.createNode(node2Name, new URI(node2URL).getPort());
    testNodes.add(testNode2);
    node2URL = testNode2.getNodeURL();
    resourceManager.addNode(node2URL, NS_NAME);
    NodeFactory.getNode(node2URL);
    // wait the node added event, node added is configuring
    rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node2URL);
    // wait for the node to be in free state
    rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(1, resourceManager.getState().getFreeNodesNumber());
    log("Test 5 : add a node, put this node busy, kill this node, and add a node that has the same URL");
    // put the the node to busy state
    NodeSet nodes = resourceManager.getAtMostNodes(1, null);
    PAFuture.waitFor(nodes);
    // wait the node busy event
    evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
    assertEquals(evt.getNodeState(), NodeState.BUSY);
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(0, resourceManager.getState().getFreeNodesNumber());
    // node2 is busy, kill the node
    testNode2.kill();
    testNodes.remove(testNode2);
    // create another node with the same URL, and add it to Resource manager
    testNode2 = RMTHelper.createNode(node2Name, new URI(node2URL).getPort());
    testNodes.add(testNode2);
    node2URL = testNode2.getNodeURL();
    resourceManager.addNode(node2URL, NS_NAME);
    NodeFactory.getNode(node2URL);
    // wait the node added event, node added is configuring
    rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node2URL);
    // wait for the node to be in free state
    rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(1, resourceManager.getState().getFreeNodesNumber());
    log("Test 6 : add a node, put this node toRelease, kill this node, and add a node that has the same URL");
    // put the the node to busy state
    nodes = resourceManager.getAtMostNodes(1, null);
    PAFuture.waitFor(nodes);
    // wait the node busy event
    evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
    assertEquals(evt.getNodeState(), NodeState.BUSY);
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(0, resourceManager.getState().getFreeNodesNumber());
    // put the node in to Release state
    resourceManager.removeNode(node2URL, false);
    // wait the node to release event
    evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
    assertEquals(evt.getNodeState(), NodeState.TO_BE_REMOVED);
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(0, resourceManager.getState().getFreeNodesNumber());
    testNode2.kill();
    testNodes.remove(testNode2);
    // create another node with the same URL, and add it to Resource manager
    testNode2 = RMTHelper.createNode(node2Name, new URI(node2URL).getPort());
    testNodes.add(testNode2);
    node2URL = testNode2.getNodeURL();
    resourceManager.addNode(node2URL, NS_NAME);
    NodeFactory.getNode(node2URL);
    // wait the node added event, node added is configuring
    rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node2URL);
    // wait for the node to be in free state
    evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
    assertEquals(evt.getNodeState(), NodeState.FREE);
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(1, resourceManager.getState().getFreeNodesNumber());
    log("Test 7");
    // add the same node twice and check that RM will not kill the node. If it does
    // second attempt will fail
    BooleanWrapper result = resourceManager.addNode(node2URL, NS_NAME);
    assertFalse(result.getBooleanValue());
    try {
        rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node2URL, 8000);
        fail("Should timeout");
    } catch (ProActiveTimeoutException expected) {
    // expected
    }
}
Also used : NodeSet(org.ow2.proactive.utils.NodeSet) ProActiveTimeoutException(org.objectweb.proactive.core.ProActiveTimeoutException) BooleanWrapper(org.objectweb.proactive.core.util.wrapper.BooleanWrapper) DefaultInfrastructureManager(org.ow2.proactive.resourcemanager.nodesource.infrastructure.DefaultInfrastructureManager) StaticPolicy(org.ow2.proactive.resourcemanager.nodesource.policy.StaticPolicy) TestNode(functionaltests.utils.TestNode) ResourceManager(org.ow2.proactive.resourcemanager.frontend.ResourceManager) RMNodeEvent(org.ow2.proactive.resourcemanager.common.event.RMNodeEvent) URI(java.net.URI) Test(org.junit.Test) RMFunctionalTest(functionaltests.utils.RMFunctionalTest)

Example 20 with RMNodeEvent

use of org.ow2.proactive.resourcemanager.common.event.RMNodeEvent in project scheduling by ow2-proactive.

the class TestConcurrentUsers method testConcurrency.

@Test
public void testConcurrency() throws Exception {
    ResourceManager resourceManager = rmHelper.getResourceManager();
    String nsName = "TestConcurrentUsers";
    String node1Name = "node1";
    testNode = rmHelper.createNode(node1Name);
    String node1URL = testNode.getNode().getNodeInformation().getURL();
    resourceManager.createNodeSource(nsName, DefaultInfrastructureManager.class.getName(), null, StaticPolicy.class.getName(), null, NODES_NOT_RECOVERABLE);
    rmHelper.waitForNodeSourceEvent(RMEventType.NODESOURCE_CREATED, nsName);
    resourceManager.addNode(node1URL, nsName);
    // waiting for node adding event
    rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node1URL);
    // waiting for the node to be free
    rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
    assertTrue(resourceManager.getState().getTotalNodesNumber() == 1);
    assertTrue(resourceManager.getState().getFreeNodesNumber() == 1);
    log("Test 1 - releasing of the foreign node");
    // acquiring a node
    final NodeSet ns = resourceManager.getAtMostNodes(1, null);
    // waiting for node busy event
    RMNodeEvent evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node1URL);
    assertEquals(evt.getNodeState(), NodeState.BUSY);
    assertEquals(ns.size(), 1);
    assertTrue(resourceManager.getState().getTotalNodesNumber() == 1);
    assertTrue(resourceManager.getState().getFreeNodesNumber() == 0);
    Thread t = new Thread() {

        @Override
        public void run() {
            try {
                Credentials cred = Credentials.createCredentials(new CredData(CredData.parseLogin("user"), CredData.parseDomain("user"), "pwd"), TestConcurrentUsers.this.rmHelper.getRMAuth().getPublicKey());
                ResourceManager rm2 = TestConcurrentUsers.this.rmHelper.getRMAuth().login(cred);
                rm2.releaseNode(ns.get(0)).getBooleanValue();
                Assert.assertTrue("Should not be able to release foreign node", false);
            } catch (Exception e) {
                log(e.getMessage());
            }
        }
    };
    t.start();
    t.join();
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(0, resourceManager.getState().getFreeNodesNumber());
    resourceManager.releaseNodes(ns);
    evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node1URL);
    assertEquals(evt.getNodeState(), NodeState.FREE);
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(1, resourceManager.getState().getFreeNodesNumber());
    log("Test 2 - releasing node twice");
    resourceManager.releaseNodes(ns);
    // to make sure everything has been processed
    Thread.sleep(1000);
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(1, resourceManager.getState().getFreeNodesNumber());
    log("Test 3 - client crash detection");
    JVMProcessImpl nodeProcess = new JVMProcessImpl(new org.objectweb.proactive.core.process.AbstractExternalProcess.StandardOutputMessageLogger());
    nodeProcess.setJvmOptions(Collections.singletonList(PAResourceManagerProperties.RM_HOME.getCmdLine() + PAResourceManagerProperties.RM_HOME.getValueAsString()));
    nodeProcess.setClassname(GetAllNodes.class.getName());
    nodeProcess.startProcess();
    // node busy event
    for (int i = 0; i < 1; i++) {
        evt = rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
        assertEquals(evt.getNodeState(), NodeState.BUSY);
    }
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(0, resourceManager.getState().getFreeNodesNumber());
    // client does not exist anymore
    log("Client does not exist anymore. Waiting for client crash detection.");
    // waiting for node free event
    for (int i = 0; i < 1; i++) {
        evt = rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
        assertEquals(NodeState.FREE, evt.getNodeState());
    }
    assertEquals(1, resourceManager.getState().getTotalNodesNumber());
    assertEquals(1, resourceManager.getState().getFreeNodesNumber());
    log("Test 4 - disconnecting");
    NodeSet ns2 = resourceManager.getAtMostNodes(1, null);
    RMNodeEvent event = rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED, 10000);
    Assert.assertTrue(event.getNodeState() == NodeState.BUSY);
    PAFuture.waitFor(ns2);
    log("Number of found nodes " + ns2.size());
    assertEquals(1, ns2.size());
    t = new Thread() {

        public void run() {
            try {
                RMAuthentication auth = rmHelper.getRMAuth();
                Credentials cred = Credentials.createCredentials(new CredData(TestUsers.TEST.username, TestUsers.TEST.password), auth.getPublicKey());
                ResourceManager rm = auth.login(cred);
                rm.disconnect().getBooleanValue();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    };
    t.start();
    t.join();
    try {
        event = rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED, 10000);
        fail("Unexpected event: " + event);
    } catch (ProActiveTimeoutException e) {
    }
}
Also used : NodeSet(org.ow2.proactive.utils.NodeSet) DefaultInfrastructureManager(org.ow2.proactive.resourcemanager.nodesource.infrastructure.DefaultInfrastructureManager) StaticPolicy(org.ow2.proactive.resourcemanager.nodesource.policy.StaticPolicy) CredData(org.ow2.proactive.authentication.crypto.CredData) JVMProcessImpl(org.objectweb.proactive.core.process.JVMProcessImpl) ResourceManager(org.ow2.proactive.resourcemanager.frontend.ResourceManager) ProActiveTimeoutException(org.objectweb.proactive.core.ProActiveTimeoutException) ProActiveTimeoutException(org.objectweb.proactive.core.ProActiveTimeoutException) RMAuthentication(org.ow2.proactive.resourcemanager.authentication.RMAuthentication) RMNodeEvent(org.ow2.proactive.resourcemanager.common.event.RMNodeEvent) Credentials(org.ow2.proactive.authentication.crypto.Credentials) Test(org.junit.Test) RMFunctionalTest(functionaltests.utils.RMFunctionalTest)

Aggregations

RMNodeEvent (org.ow2.proactive.resourcemanager.common.event.RMNodeEvent)27 Test (org.junit.Test)14 RMFunctionalTest (functionaltests.utils.RMFunctionalTest)13 NodeSet (org.ow2.proactive.utils.NodeSet)13 ResourceManager (org.ow2.proactive.resourcemanager.frontend.ResourceManager)10 File (java.io.File)8 TestNode (functionaltests.utils.TestNode)7 Node (org.objectweb.proactive.core.node.Node)7 HashMap (java.util.HashMap)6 RMDeployingNode (org.ow2.proactive.resourcemanager.rmnode.RMDeployingNode)6 SelectionScript (org.ow2.proactive.scripting.SelectionScript)6 StaticPolicy (org.ow2.proactive.resourcemanager.nodesource.policy.StaticPolicy)5 NodeState (org.ow2.proactive.resourcemanager.common.NodeState)4 RMState (org.ow2.proactive.resourcemanager.common.RMState)4 Criteria (org.ow2.proactive.utils.Criteria)4 ArrayList (java.util.ArrayList)3 ProActiveTimeoutException (org.objectweb.proactive.core.ProActiveTimeoutException)3 DefaultInfrastructureManager (org.ow2.proactive.resourcemanager.nodesource.infrastructure.DefaultInfrastructureManager)3 URI (java.net.URI)2 HashSet (java.util.HashSet)2