use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAfterContainerStart.
@Test(timeout = 60000)
public void testContainerPromoteAfterContainerStart() throws Exception {
HashMap<NodeId, MockNM> nodes = new HashMap<>();
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm1.getNodeId(), nm1);
MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm2.getNodeId(), nm2);
nm1.registerNode();
nm2.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
// All nodes 1 to 2 will be applicable for scheduling.
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
Thread.sleep(1000);
QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
// Verify Metrics
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
Assert.assertEquals(2, allocatedContainers.size());
Container container = allocatedContainers.get(0);
MockNM allocNode = nodes.get(container.getNodeId());
// Start Container in NM
allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
Thread.sleep(200);
// Verify that container is actually running wrt the RM..
RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
// Verify Metrics After OPP allocation (Nothing should change)
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
// Verify Metrics After OPP allocation (Nothing should change again)
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
// Send Promotion req again... this should result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Start Container in NM
allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
// Verify that the Container is still in RUNNING state wrt RM..
rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
// Verify Metrics After OPP allocation :
// Allocated cores+mem should have increased, available should decrease
verifyMetrics(metrics, 6144, 6, 2048, 2, 2);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.
the class TestDecommissioningNodesWatcher method testDecommissioningNodesWatcher.
@Test
public void testDecommissioningNodesWatcher() throws Exception {
Configuration conf = new Configuration();
conf.set(YarnConfiguration.RM_NODE_GRACEFUL_DECOMMISSION_TIMEOUT, "40");
rm = new MockRM(conf);
rm.start();
DecommissioningNodesWatcher watcher = new DecommissioningNodesWatcher(rm.getRMContext());
MockNM nm1 = rm.registerNode("host1:1234", 10240);
RMNode node1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
NodeId id1 = nm1.getNodeId();
rm.waitForState(id1, NodeState.RUNNING);
Assert.assertFalse(watcher.checkReadyToBeDecommissioned(id1));
RMApp app = rm.submitApp(2000);
MockAM am = MockRM.launchAndRegisterAM(app, rm, nm1);
// Setup nm1 as DECOMMISSIONING for DecommissioningNodesWatcher.
rm.sendNodeEvent(nm1, RMNodeEventType.GRACEFUL_DECOMMISSION);
rm.waitForState(id1, NodeState.DECOMMISSIONING);
// Update status with decreasing number of running containers until 0.
watcher.update(node1, createNodeStatus(id1, app, 12));
watcher.update(node1, createNodeStatus(id1, app, 11));
Assert.assertFalse(watcher.checkReadyToBeDecommissioned(id1));
watcher.update(node1, createNodeStatus(id1, app, 1));
Assert.assertEquals(DecommissioningNodeStatus.WAIT_CONTAINER, watcher.checkDecommissioningStatus(id1));
watcher.update(node1, createNodeStatus(id1, app, 0));
Assert.assertEquals(DecommissioningNodeStatus.WAIT_APP, watcher.checkDecommissioningStatus(id1));
// Set app to be FINISHED and verified DecommissioningNodeStatus is READY.
MockRM.finishAMAndVerifyAppState(app, rm, nm1, am);
rm.waitForState(app.getApplicationId(), RMAppState.FINISHED);
Assert.assertEquals(DecommissioningNodeStatus.READY, watcher.checkDecommissioningStatus(id1));
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.
the class TestRMAdminService method testModifyLabelsOnUnknownNodes.
@Test
public void testModifyLabelsOnUnknownNodes() throws IOException, YarnException {
// create RM and set it's ACTIVE, and set distributed node label
// configuration to true
rm = new MockRM();
((RMContextImpl) rm.getRMContext()).setHAServiceState(HAServiceState.ACTIVE);
Map<NodeId, RMNode> rmNodes = rm.getRMContext().getRMNodes();
rmNodes.put(NodeId.newInstance("host1", 1111), new RMNodeImpl(null, rm.getRMContext(), "host1", 0, 0, null, null, null));
rmNodes.put(NodeId.newInstance("host2", 2222), new RMNodeImpl(null, rm.getRMContext(), "host2", 0, 0, null, null, null));
rmNodes.put(NodeId.newInstance("host3", 3333), new RMNodeImpl(null, rm.getRMContext(), "host3", 0, 0, null, null, null));
Map<NodeId, RMNode> rmInactiveNodes = rm.getRMContext().getInactiveRMNodes();
rmInactiveNodes.put(NodeId.newInstance("host4", 4444), new RMNodeImpl(null, rm.getRMContext(), "host4", 0, 0, null, null, null));
RMNodeLabelsManager labelMgr = rm.rmContext.getNodeLabelManager();
// by default, distributed configuration for node label is disabled, this
// should pass
labelMgr.addToCluserNodeLabelsWithDefaultExclusivity(ImmutableSet.of("x", "y"));
// replace known node
ReplaceLabelsOnNodeRequest request1 = ReplaceLabelsOnNodeRequest.newInstance(ImmutableMap.of(NodeId.newInstance("host1", 1111), (Set<String>) ImmutableSet.of("x")));
request1.setFailOnUnknownNodes(true);
try {
rm.adminService.replaceLabelsOnNode(request1);
} catch (Exception ex) {
fail("should not fail on known node");
}
// replace known node with wildcard port
ReplaceLabelsOnNodeRequest request2 = ReplaceLabelsOnNodeRequest.newInstance(ImmutableMap.of(NodeId.newInstance("host1", 0), (Set<String>) ImmutableSet.of("x")));
request2.setFailOnUnknownNodes(true);
try {
rm.adminService.replaceLabelsOnNode(request2);
} catch (Exception ex) {
fail("should not fail on known node");
}
// replace unknown node
ReplaceLabelsOnNodeRequest request3 = ReplaceLabelsOnNodeRequest.newInstance(ImmutableMap.of(NodeId.newInstance("host5", 0), (Set<String>) ImmutableSet.of("x")));
request3.setFailOnUnknownNodes(true);
try {
rm.adminService.replaceLabelsOnNode(request3);
fail("Should fail on unknown node");
} catch (Exception ex) {
}
// replace known node but wrong port
ReplaceLabelsOnNodeRequest request4 = ReplaceLabelsOnNodeRequest.newInstance(ImmutableMap.of(NodeId.newInstance("host2", 1111), (Set<String>) ImmutableSet.of("x")));
request4.setFailOnUnknownNodes(true);
try {
rm.adminService.replaceLabelsOnNode(request4);
fail("Should fail on node with wrong port");
} catch (Exception ex) {
}
// replace non-exist node but not check
ReplaceLabelsOnNodeRequest request5 = ReplaceLabelsOnNodeRequest.newInstance(ImmutableMap.of(NodeId.newInstance("host5", 0), (Set<String>) ImmutableSet.of("x")));
request5.setFailOnUnknownNodes(false);
try {
rm.adminService.replaceLabelsOnNode(request5);
} catch (Exception ex) {
fail("Should not fail on unknown node when " + "fail-on-unkown-nodes is set false");
}
// replace on inactive node
ReplaceLabelsOnNodeRequest request6 = ReplaceLabelsOnNodeRequest.newInstance(ImmutableMap.of(NodeId.newInstance("host4", 0), (Set<String>) ImmutableSet.of("x")));
request6.setFailOnUnknownNodes(true);
try {
rm.adminService.replaceLabelsOnNode(request6);
} catch (Exception ex) {
fail("should not fail on inactive node");
}
rm.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.
the class TestRMAdminService method testRefreshNodesResourceWithFileSystemBasedConfigurationProvider.
@Test
public void testRefreshNodesResourceWithFileSystemBasedConfigurationProvider() throws IOException, YarnException {
configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider");
//upload default configurations
uploadDefaultConfiguration();
try {
rm = new MockRM(configuration);
rm.init(configuration);
rm.start();
rm.registerNode("h1:1234", 5120);
} catch (Exception ex) {
fail("Should not get any exceptions");
}
NodeId nid = NodeId.fromString("h1:1234");
RMNode ni = rm.getRMContext().getRMNodes().get(nid);
Resource resource = ni.getTotalCapability();
Assert.assertEquals("<memory:5120, vCores:5>", resource.toString());
DynamicResourceConfiguration drConf = new DynamicResourceConfiguration();
drConf.set(PREFIX + NODES, "h1:1234");
drConf.set(PREFIX + "h1:1234.vcores", "4");
drConf.set(PREFIX + "h1:1234.memory", "4096");
uploadConfiguration(drConf, "dynamic-resources.xml");
rm.adminService.refreshNodesResources(RefreshNodesResourcesRequest.newInstance());
rm.drainEvents();
RMNode niAfter = rm.getRMContext().getRMNodes().get(nid);
Resource resourceAfter = niAfter.getTotalCapability();
Assert.assertEquals("<memory:4096, vCores:4>", resourceAfter.toString());
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.
the class TestRMAdminService method testRefreshNodesResourceWithResourceReturnInHeartbeat.
@Test
public void testRefreshNodesResourceWithResourceReturnInHeartbeat() throws IOException, YarnException {
configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider");
//upload default configurations
uploadDefaultConfiguration();
MockNM nm = null;
try {
rm = new MockRM(configuration);
rm.init(configuration);
rm.start();
nm = rm.registerNode("h1:1234", 2048, 2);
} catch (Exception ex) {
fail("Should not get any exceptions");
}
NodeId nid = NodeId.fromString("h1:1234");
RMNode ni = rm.getRMContext().getRMNodes().get(nid);
Resource resource = ni.getTotalCapability();
Assert.assertEquals("<memory:2048, vCores:2>", resource.toString());
DynamicResourceConfiguration drConf = new DynamicResourceConfiguration();
drConf.set(PREFIX + NODES, "h1:1234");
drConf.set(PREFIX + "h1:1234.vcores", "4");
drConf.set(PREFIX + "h1:1234.memory", "4096");
uploadConfiguration(drConf, "dynamic-resources.xml");
rm.adminService.refreshNodesResources(RefreshNodesResourcesRequest.newInstance());
try {
// NM-RM heartbeat, validate that this will get new resource back.
nm.nodeHeartbeat(true);
} catch (Exception ex) {
fail("Should not get any exceptions");
}
RMNode niAfter = rm.getRMContext().getRMNodes().get(nid);
Resource resourceAfter = niAfter.getTotalCapability();
Assert.assertEquals("<memory:4096, vCores:4>", resourceAfter.toString());
Assert.assertEquals(4096, nm.getMemory());
Assert.assertEquals(4, nm.getvCores());
}
Aggregations