use of org.ow2.proactive.resourcemanager.common.event.RMNodeEvent in project scheduling by ow2-proactive.
the class TestTaskRestartOnNodeFailure method testTaskKillNode.
private void testTaskKillNode(FileLock fileLock, boolean waitBeforeKill) throws Exception {
Path fileLockPath = fileLock.lock();
TestNode nodeToKill = startNode();
log("Submit job");
final JobId jobId = schedulerHelper.submitJob(createJob(fileLockPath.toString()));
log("Wait when node becomes busy");
RMNodeEvent event;
do {
event = schedulerHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED, TIMEOUT);
} while (!event.getNodeState().equals(NodeState.BUSY));
log("Wait when task starts");
schedulerHelper.waitForEventTaskRunning(jobId, "Test task");
/*
* Want to test two cases (existed at the time of this writing): - if wait some time before
* killing node then node failure is detected by the pinger thread - if kill node
* immediately then node failure is detected by the thread calling TaskLauncher.doTask
*/
if (waitBeforeKill) {
log("Wait some time");
Thread.sleep(5000);
}
log("Stop task node process (node " + nodeToKill.getNode().getNodeInformation().getURL() + ")");
nodeToKill.kill();
TestNode newNode = startNode();
log("Let task finish");
fileLock.unlock();
log("Wait when job finish");
schedulerHelper.waitForEventJobFinished(jobId, TIMEOUT);
event = schedulerHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, newNode.getNode().getNodeInformation().getURL(), TIMEOUT);
assertEquals(NodeState.BUSY, event.getNodeState());
event = schedulerHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, newNode.getNode().getNodeInformation().getURL(), TIMEOUT);
assertEquals(NodeState.FREE, event.getNodeState());
log("Check job result");
checkJobResult(schedulerHelper.getSchedulerInterface(), jobId);
schedulerHelper.getResourceManager().removeNode(newNode.getNodeURL(), true);
newNode.kill();
}
use of org.ow2.proactive.resourcemanager.common.event.RMNodeEvent in project scheduling by ow2-proactive.
the class TestTaskRestartOnNodeFailure method startNode.
private TestNode startNode() throws Exception {
int nodeNumber = startedNodesCounter++;
log("Start new node: node-" + nodeNumber);
testNode = schedulerHelper.createNode("node" + nodeNumber);
String nodeUrl = testNode.getNode().getNodeInformation().getURL();
schedulerHelper.getResourceManager().addNode(nodeUrl);
schedulerHelper.waitForNodeEvent(RMEventType.NODE_ADDED, nodeUrl, TIMEOUT);
RMNodeEvent event = schedulerHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, nodeUrl, TIMEOUT);
assertEquals(NodeState.FREE, event.getNodeState());
return testNode;
}
use of org.ow2.proactive.resourcemanager.common.event.RMNodeEvent in project scheduling by ow2-proactive.
the class TestLocalInfrastructureRestartDownNodesPolicy method testRestartDownNodesPolicy.
@Test
public void testRestartDownNodesPolicy() throws Exception {
nodeSourceName = "Node_source_1";
RMTHelper.log("Test 1 - restart down nodes policy");
createNodeSourceWithNodes(nodeSourceName, new Object[] { "ALL", "ALL", "10000" });
RMState stateTest1 = resourceManager.getState();
assertEquals(defaultDescriptorNodesNb, stateTest1.getTotalNodesNumber());
assertEquals(defaultDescriptorNodesNb, stateTest1.getFreeNodesNumber());
NodeSet ns = resourceManager.getNodes(new Criteria(defaultDescriptorNodesNb));
for (Node n : ns) {
rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, n.getNodeInformation().getURL());
}
String nodeUrl = ns.get(0).getNodeInformation().getURL();
// Nodes will be redeployed only if we kill the whole runtime
rmHelper.killRuntime(nodeUrl);
RMNodeEvent ev = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, nodeUrl);
assertEquals(NodeState.DOWN, ev.getNodeState());
// one node is down - the policy should detect it and redeploy
rmHelper.waitForAnyNodeEvent(RMEventType.NODE_ADDED);
rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
assertEquals(defaultDescriptorNodesNb, stateTest1.getTotalNodesNumber());
assertEquals(defaultDescriptorNodesNb, stateTest1.getTotalAliveNodesNumber());
}
use of org.ow2.proactive.resourcemanager.common.event.RMNodeEvent in project scheduling by ow2-proactive.
the class TestAdminAddingNodes method testAddNodes.
@Test
public void testAddNodes() throws Exception {
final String NS_NAME = "TestAdminAddingNodes";
int pingFrequency = 6000;
ResourceManager resourceManager = rmHelper.getResourceManager();
resourceManager.createNodeSource(NS_NAME, DefaultInfrastructureManager.class.getName(), null, StaticPolicy.class.getName(), null, NODES_NOT_RECOVERABLE);
rmHelper.waitForNodeSourceEvent(RMEventType.NODESOURCE_CREATED, NS_NAME);
resourceManager.setNodeSourcePingFrequency(pingFrequency, NS_NAME);
log("Test 1 : add a node");
String node1Name = "node1";
TestNode testNode1 = RMTHelper.createNode(node1Name);
testNodes.add(testNode1);
String node1URL = testNode1.getNodeURL();
resourceManager.addNode(node1URL, NS_NAME);
rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node1URL);
// wait for the node to be in free state
rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(1, resourceManager.getState().getTotalAliveNodesNumber());
assertEquals(1, resourceManager.getState().getFreeNodesNumber());
log("Test 2 : remove an already deployed node");
// preemptive removal is useless for this case, because node is free
resourceManager.removeNode(node1URL, false);
rmHelper.waitForNodeEvent(RMEventType.NODE_REMOVED, node1URL);
assertEquals(0, resourceManager.getState().getTotalNodesNumber());
assertEquals(0, resourceManager.getState().getTotalAliveNodesNumber());
assertEquals(0, resourceManager.getState().getFreeNodesNumber());
log("Test 3 : add a node, kill this node, node is detected down, and add a node that has the same URL");
// Test seems to have a race condition at this step
String node2Name = "node2";
TestNode testNode2 = RMTHelper.createNode(node2Name);
testNodes.add(testNode2);
String node2URL = testNode2.getNodeURL();
resourceManager.addNode(node2URL, NS_NAME);
// wait the node added event
rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node2URL);
// wait for the node to be in free state
rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(1, resourceManager.getState().getFreeNodesNumber());
assertEquals(1, resourceManager.getState().getTotalAliveNodesNumber());
testNode2.kill();
testNodes.remove(testNode2);
RMNodeEvent evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
assertEquals(evt.getNodeState(), NodeState.DOWN);
// wait the node down event
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(0, resourceManager.getState().getFreeNodesNumber());
assertEquals(0, resourceManager.getState().getTotalAliveNodesNumber());
// create another node with the same URL, and add it to Resource manager
testNode2 = RMTHelper.createNode(node2Name, new URI(node2URL).getPort());
testNodes.add(testNode2);
node2URL = testNode2.getNodeURL();
resourceManager.addNode(node2URL, NS_NAME);
// wait the node added event
rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node2URL);
// wait for the node to be in free state
rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(1, resourceManager.getState().getFreeNodesNumber());
assertEquals(1, resourceManager.getState().getTotalAliveNodesNumber());
log("Test 4 : add a node, keep this node free, kill this node, and add a node that has the same URL");
// put a large ping frequency in order to avoid down nodes detection
resourceManager.setNodeSourcePingFrequency(Integer.MAX_VALUE, NS_NAME);
// wait the end of last ping sequence
Thread.sleep(pingFrequency * 2);
// node2 is free, kill the node
testNode2.kill();
testNodes.remove(testNode2);
// create another node with the same URL, and add it to Resource manager
testNode2 = RMTHelper.createNode(node2Name, new URI(node2URL).getPort());
testNodes.add(testNode2);
node2URL = testNode2.getNodeURL();
resourceManager.addNode(node2URL, NS_NAME);
NodeFactory.getNode(node2URL);
// wait the node added event, node added is configuring
rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node2URL);
// wait for the node to be in free state
rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(1, resourceManager.getState().getFreeNodesNumber());
log("Test 5 : add a node, put this node busy, kill this node, and add a node that has the same URL");
// put the the node to busy state
NodeSet nodes = resourceManager.getAtMostNodes(1, null);
PAFuture.waitFor(nodes);
// wait the node busy event
evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
assertEquals(evt.getNodeState(), NodeState.BUSY);
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(0, resourceManager.getState().getFreeNodesNumber());
// node2 is busy, kill the node
testNode2.kill();
testNodes.remove(testNode2);
// create another node with the same URL, and add it to Resource manager
testNode2 = RMTHelper.createNode(node2Name, new URI(node2URL).getPort());
testNodes.add(testNode2);
node2URL = testNode2.getNodeURL();
resourceManager.addNode(node2URL, NS_NAME);
NodeFactory.getNode(node2URL);
// wait the node added event, node added is configuring
rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node2URL);
// wait for the node to be in free state
rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(1, resourceManager.getState().getFreeNodesNumber());
log("Test 6 : add a node, put this node toRelease, kill this node, and add a node that has the same URL");
// put the the node to busy state
nodes = resourceManager.getAtMostNodes(1, null);
PAFuture.waitFor(nodes);
// wait the node busy event
evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
assertEquals(evt.getNodeState(), NodeState.BUSY);
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(0, resourceManager.getState().getFreeNodesNumber());
// put the node in to Release state
resourceManager.removeNode(node2URL, false);
// wait the node to release event
evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
assertEquals(evt.getNodeState(), NodeState.TO_BE_REMOVED);
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(0, resourceManager.getState().getFreeNodesNumber());
testNode2.kill();
testNodes.remove(testNode2);
// create another node with the same URL, and add it to Resource manager
testNode2 = RMTHelper.createNode(node2Name, new URI(node2URL).getPort());
testNodes.add(testNode2);
node2URL = testNode2.getNodeURL();
resourceManager.addNode(node2URL, NS_NAME);
NodeFactory.getNode(node2URL);
// wait the node added event, node added is configuring
rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node2URL);
// wait for the node to be in free state
evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node2URL);
assertEquals(evt.getNodeState(), NodeState.FREE);
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(1, resourceManager.getState().getFreeNodesNumber());
log("Test 7");
// add the same node twice and check that RM will not kill the node. If it does
// second attempt will fail
BooleanWrapper result = resourceManager.addNode(node2URL, NS_NAME);
assertFalse(result.getBooleanValue());
try {
rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node2URL, 8000);
fail("Should timeout");
} catch (ProActiveTimeoutException expected) {
// expected
}
}
use of org.ow2.proactive.resourcemanager.common.event.RMNodeEvent in project scheduling by ow2-proactive.
the class TestConcurrentUsers method testConcurrency.
@Test
public void testConcurrency() throws Exception {
ResourceManager resourceManager = rmHelper.getResourceManager();
String nsName = "TestConcurrentUsers";
String node1Name = "node1";
testNode = rmHelper.createNode(node1Name);
String node1URL = testNode.getNode().getNodeInformation().getURL();
resourceManager.createNodeSource(nsName, DefaultInfrastructureManager.class.getName(), null, StaticPolicy.class.getName(), null, NODES_NOT_RECOVERABLE);
rmHelper.waitForNodeSourceEvent(RMEventType.NODESOURCE_CREATED, nsName);
resourceManager.addNode(node1URL, nsName);
// waiting for node adding event
rmHelper.waitForNodeEvent(RMEventType.NODE_ADDED, node1URL);
// waiting for the node to be free
rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
assertTrue(resourceManager.getState().getTotalNodesNumber() == 1);
assertTrue(resourceManager.getState().getFreeNodesNumber() == 1);
log("Test 1 - releasing of the foreign node");
// acquiring a node
final NodeSet ns = resourceManager.getAtMostNodes(1, null);
// waiting for node busy event
RMNodeEvent evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node1URL);
assertEquals(evt.getNodeState(), NodeState.BUSY);
assertEquals(ns.size(), 1);
assertTrue(resourceManager.getState().getTotalNodesNumber() == 1);
assertTrue(resourceManager.getState().getFreeNodesNumber() == 0);
Thread t = new Thread() {
@Override
public void run() {
try {
Credentials cred = Credentials.createCredentials(new CredData(CredData.parseLogin("user"), CredData.parseDomain("user"), "pwd"), TestConcurrentUsers.this.rmHelper.getRMAuth().getPublicKey());
ResourceManager rm2 = TestConcurrentUsers.this.rmHelper.getRMAuth().login(cred);
rm2.releaseNode(ns.get(0)).getBooleanValue();
Assert.assertTrue("Should not be able to release foreign node", false);
} catch (Exception e) {
log(e.getMessage());
}
}
};
t.start();
t.join();
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(0, resourceManager.getState().getFreeNodesNumber());
resourceManager.releaseNodes(ns);
evt = rmHelper.waitForNodeEvent(RMEventType.NODE_STATE_CHANGED, node1URL);
assertEquals(evt.getNodeState(), NodeState.FREE);
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(1, resourceManager.getState().getFreeNodesNumber());
log("Test 2 - releasing node twice");
resourceManager.releaseNodes(ns);
// to make sure everything has been processed
Thread.sleep(1000);
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(1, resourceManager.getState().getFreeNodesNumber());
log("Test 3 - client crash detection");
JVMProcessImpl nodeProcess = new JVMProcessImpl(new org.objectweb.proactive.core.process.AbstractExternalProcess.StandardOutputMessageLogger());
nodeProcess.setJvmOptions(Collections.singletonList(PAResourceManagerProperties.RM_HOME.getCmdLine() + PAResourceManagerProperties.RM_HOME.getValueAsString()));
nodeProcess.setClassname(GetAllNodes.class.getName());
nodeProcess.startProcess();
// node busy event
for (int i = 0; i < 1; i++) {
evt = rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
assertEquals(evt.getNodeState(), NodeState.BUSY);
}
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(0, resourceManager.getState().getFreeNodesNumber());
// client does not exist anymore
log("Client does not exist anymore. Waiting for client crash detection.");
// waiting for node free event
for (int i = 0; i < 1; i++) {
evt = rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED);
assertEquals(NodeState.FREE, evt.getNodeState());
}
assertEquals(1, resourceManager.getState().getTotalNodesNumber());
assertEquals(1, resourceManager.getState().getFreeNodesNumber());
log("Test 4 - disconnecting");
NodeSet ns2 = resourceManager.getAtMostNodes(1, null);
RMNodeEvent event = rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED, 10000);
Assert.assertTrue(event.getNodeState() == NodeState.BUSY);
PAFuture.waitFor(ns2);
log("Number of found nodes " + ns2.size());
assertEquals(1, ns2.size());
t = new Thread() {
public void run() {
try {
RMAuthentication auth = rmHelper.getRMAuth();
Credentials cred = Credentials.createCredentials(new CredData(TestUsers.TEST.username, TestUsers.TEST.password), auth.getPublicKey());
ResourceManager rm = auth.login(cred);
rm.disconnect().getBooleanValue();
} catch (Exception e) {
e.printStackTrace();
}
}
};
t.start();
t.join();
try {
event = rmHelper.waitForAnyNodeEvent(RMEventType.NODE_STATE_CHANGED, 10000);
fail("Unexpected event: " + event);
} catch (ProActiveTimeoutException e) {
}
}
Aggregations