Search in sources :

Example 96 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAfterContainerStart.

@Test(timeout = 60000)
public void testContainerPromoteAfterContainerStart() throws Exception {
    HashMap<NodeId, MockNM> nodes = new HashMap<>();
    MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm1.getNodeId(), nm1);
    MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm2.getNodeId(), nm2);
    nm1.registerNode();
    nm2.registerNode();
    OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
    RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
    ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
    ResourceScheduler scheduler = rm.getResourceScheduler();
    RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    ((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
    // Send add and update node events to AM Service.
    amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
    // All nodes 1 to 2 will be applicable for scheduling.
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    Thread.sleep(1000);
    QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
    // Verify Metrics
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
    List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
    Assert.assertEquals(2, allocatedContainers.size());
    Container container = allocatedContainers.get(0);
    MockNM allocNode = nodes.get(container.getNodeId());
    // Start Container in NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
    Thread.sleep(200);
    // Verify that container is actually running wrt the RM..
    RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
    Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
    // Verify Metrics After OPP allocation (Nothing should change)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    // Verify Metrics After OPP allocation (Nothing should change again)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    // Send Promotion req again... this should result in update error
    allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
    Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
    Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
    Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
    // Start Container in NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
    Thread.sleep(200);
    allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
    Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
    Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
    Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
    Assert.assertEquals(uc.getId(), container.getId());
    Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
    // Verify that the Container is still in RUNNING state wrt RM..
    rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
    Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
    // Verify Metrics After OPP allocation :
    // Allocated cores+mem should have increased, available should decrease
    verifyMetrics(metrics, 6144, 6, 2048, 2, 2);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) DistributedSchedulingAllocateResponse(org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateResponse) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Example 97 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestDecommissioningNodesWatcher method testDecommissioningNodesWatcher.

@Test
public void testDecommissioningNodesWatcher() throws Exception {
    Configuration conf = new Configuration();
    conf.set(YarnConfiguration.RM_NODE_GRACEFUL_DECOMMISSION_TIMEOUT, "40");
    rm = new MockRM(conf);
    rm.start();
    DecommissioningNodesWatcher watcher = new DecommissioningNodesWatcher(rm.getRMContext());
    MockNM nm1 = rm.registerNode("host1:1234", 10240);
    RMNode node1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    NodeId id1 = nm1.getNodeId();
    rm.waitForState(id1, NodeState.RUNNING);
    Assert.assertFalse(watcher.checkReadyToBeDecommissioned(id1));
    RMApp app = rm.submitApp(2000);
    MockAM am = MockRM.launchAndRegisterAM(app, rm, nm1);
    // Setup nm1 as DECOMMISSIONING for DecommissioningNodesWatcher.
    rm.sendNodeEvent(nm1, RMNodeEventType.GRACEFUL_DECOMMISSION);
    rm.waitForState(id1, NodeState.DECOMMISSIONING);
    // Update status with decreasing number of running containers until 0.
    watcher.update(node1, createNodeStatus(id1, app, 12));
    watcher.update(node1, createNodeStatus(id1, app, 11));
    Assert.assertFalse(watcher.checkReadyToBeDecommissioned(id1));
    watcher.update(node1, createNodeStatus(id1, app, 1));
    Assert.assertEquals(DecommissioningNodeStatus.WAIT_CONTAINER, watcher.checkDecommissioningStatus(id1));
    watcher.update(node1, createNodeStatus(id1, app, 0));
    Assert.assertEquals(DecommissioningNodeStatus.WAIT_APP, watcher.checkDecommissioningStatus(id1));
    // Set app to be FINISHED and verified DecommissioningNodeStatus is READY.
    MockRM.finishAMAndVerifyAppState(app, rm, nm1, am);
    rm.waitForState(app.getApplicationId(), RMAppState.FINISHED);
    Assert.assertEquals(DecommissioningNodeStatus.READY, watcher.checkDecommissioningStatus(id1));
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Test(org.junit.Test)

Example 98 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestRMAdminService method testModifyLabelsOnUnknownNodes.

@Test
public void testModifyLabelsOnUnknownNodes() throws IOException, YarnException {
    // create RM and set it's ACTIVE, and set distributed node label
    // configuration to true
    rm = new MockRM();
    ((RMContextImpl) rm.getRMContext()).setHAServiceState(HAServiceState.ACTIVE);
    Map<NodeId, RMNode> rmNodes = rm.getRMContext().getRMNodes();
    rmNodes.put(NodeId.newInstance("host1", 1111), new RMNodeImpl(null, rm.getRMContext(), "host1", 0, 0, null, null, null));
    rmNodes.put(NodeId.newInstance("host2", 2222), new RMNodeImpl(null, rm.getRMContext(), "host2", 0, 0, null, null, null));
    rmNodes.put(NodeId.newInstance("host3", 3333), new RMNodeImpl(null, rm.getRMContext(), "host3", 0, 0, null, null, null));
    Map<NodeId, RMNode> rmInactiveNodes = rm.getRMContext().getInactiveRMNodes();
    rmInactiveNodes.put(NodeId.newInstance("host4", 4444), new RMNodeImpl(null, rm.getRMContext(), "host4", 0, 0, null, null, null));
    RMNodeLabelsManager labelMgr = rm.rmContext.getNodeLabelManager();
    // by default, distributed configuration for node label is disabled, this
    // should pass
    labelMgr.addToCluserNodeLabelsWithDefaultExclusivity(ImmutableSet.of("x", "y"));
    // replace known node
    ReplaceLabelsOnNodeRequest request1 = ReplaceLabelsOnNodeRequest.newInstance(ImmutableMap.of(NodeId.newInstance("host1", 1111), (Set<String>) ImmutableSet.of("x")));
    request1.setFailOnUnknownNodes(true);
    try {
        rm.adminService.replaceLabelsOnNode(request1);
    } catch (Exception ex) {
        fail("should not fail on known node");
    }
    // replace known node with wildcard port
    ReplaceLabelsOnNodeRequest request2 = ReplaceLabelsOnNodeRequest.newInstance(ImmutableMap.of(NodeId.newInstance("host1", 0), (Set<String>) ImmutableSet.of("x")));
    request2.setFailOnUnknownNodes(true);
    try {
        rm.adminService.replaceLabelsOnNode(request2);
    } catch (Exception ex) {
        fail("should not fail on known node");
    }
    // replace unknown node
    ReplaceLabelsOnNodeRequest request3 = ReplaceLabelsOnNodeRequest.newInstance(ImmutableMap.of(NodeId.newInstance("host5", 0), (Set<String>) ImmutableSet.of("x")));
    request3.setFailOnUnknownNodes(true);
    try {
        rm.adminService.replaceLabelsOnNode(request3);
        fail("Should fail on unknown node");
    } catch (Exception ex) {
    }
    // replace known node but wrong port
    ReplaceLabelsOnNodeRequest request4 = ReplaceLabelsOnNodeRequest.newInstance(ImmutableMap.of(NodeId.newInstance("host2", 1111), (Set<String>) ImmutableSet.of("x")));
    request4.setFailOnUnknownNodes(true);
    try {
        rm.adminService.replaceLabelsOnNode(request4);
        fail("Should fail on node with wrong port");
    } catch (Exception ex) {
    }
    // replace non-exist node but not check
    ReplaceLabelsOnNodeRequest request5 = ReplaceLabelsOnNodeRequest.newInstance(ImmutableMap.of(NodeId.newInstance("host5", 0), (Set<String>) ImmutableSet.of("x")));
    request5.setFailOnUnknownNodes(false);
    try {
        rm.adminService.replaceLabelsOnNode(request5);
    } catch (Exception ex) {
        fail("Should not fail on unknown node when " + "fail-on-unkown-nodes is set false");
    }
    // replace on inactive node
    ReplaceLabelsOnNodeRequest request6 = ReplaceLabelsOnNodeRequest.newInstance(ImmutableMap.of(NodeId.newInstance("host4", 0), (Set<String>) ImmutableSet.of("x")));
    request6.setFailOnUnknownNodes(true);
    try {
        rm.adminService.replaceLabelsOnNode(request6);
    } catch (Exception ex) {
        fail("should not fail on inactive node");
    }
    rm.close();
}
Also used : RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) ReplaceLabelsOnNodeRequest(org.apache.hadoop.yarn.server.api.protocolrecords.ReplaceLabelsOnNodeRequest) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) NodeId(org.apache.hadoop.yarn.api.records.NodeId) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) RMNodeLabelsManager(org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager) Test(org.junit.Test)

Example 99 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestRMAdminService method testRefreshNodesResourceWithFileSystemBasedConfigurationProvider.

@Test
public void testRefreshNodesResourceWithFileSystemBasedConfigurationProvider() throws IOException, YarnException {
    configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider");
    //upload default configurations
    uploadDefaultConfiguration();
    try {
        rm = new MockRM(configuration);
        rm.init(configuration);
        rm.start();
        rm.registerNode("h1:1234", 5120);
    } catch (Exception ex) {
        fail("Should not get any exceptions");
    }
    NodeId nid = NodeId.fromString("h1:1234");
    RMNode ni = rm.getRMContext().getRMNodes().get(nid);
    Resource resource = ni.getTotalCapability();
    Assert.assertEquals("<memory:5120, vCores:5>", resource.toString());
    DynamicResourceConfiguration drConf = new DynamicResourceConfiguration();
    drConf.set(PREFIX + NODES, "h1:1234");
    drConf.set(PREFIX + "h1:1234.vcores", "4");
    drConf.set(PREFIX + "h1:1234.memory", "4096");
    uploadConfiguration(drConf, "dynamic-resources.xml");
    rm.adminService.refreshNodesResources(RefreshNodesResourcesRequest.newInstance());
    rm.drainEvents();
    RMNode niAfter = rm.getRMContext().getRMNodes().get(nid);
    Resource resourceAfter = niAfter.getTotalCapability();
    Assert.assertEquals("<memory:4096, vCores:4>", resourceAfter.toString());
}
Also used : RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Resource(org.apache.hadoop.yarn.api.records.Resource) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) DynamicResourceConfiguration(org.apache.hadoop.yarn.server.resourcemanager.resource.DynamicResourceConfiguration) Test(org.junit.Test)

Example 100 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestRMAdminService method testRefreshNodesResourceWithResourceReturnInHeartbeat.

@Test
public void testRefreshNodesResourceWithResourceReturnInHeartbeat() throws IOException, YarnException {
    configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider");
    //upload default configurations
    uploadDefaultConfiguration();
    MockNM nm = null;
    try {
        rm = new MockRM(configuration);
        rm.init(configuration);
        rm.start();
        nm = rm.registerNode("h1:1234", 2048, 2);
    } catch (Exception ex) {
        fail("Should not get any exceptions");
    }
    NodeId nid = NodeId.fromString("h1:1234");
    RMNode ni = rm.getRMContext().getRMNodes().get(nid);
    Resource resource = ni.getTotalCapability();
    Assert.assertEquals("<memory:2048, vCores:2>", resource.toString());
    DynamicResourceConfiguration drConf = new DynamicResourceConfiguration();
    drConf.set(PREFIX + NODES, "h1:1234");
    drConf.set(PREFIX + "h1:1234.vcores", "4");
    drConf.set(PREFIX + "h1:1234.memory", "4096");
    uploadConfiguration(drConf, "dynamic-resources.xml");
    rm.adminService.refreshNodesResources(RefreshNodesResourcesRequest.newInstance());
    try {
        // NM-RM heartbeat, validate that this will get new resource back.
        nm.nodeHeartbeat(true);
    } catch (Exception ex) {
        fail("Should not get any exceptions");
    }
    RMNode niAfter = rm.getRMContext().getRMNodes().get(nid);
    Resource resourceAfter = niAfter.getTotalCapability();
    Assert.assertEquals("<memory:4096, vCores:4>", resourceAfter.toString());
    Assert.assertEquals(4096, nm.getMemory());
    Assert.assertEquals(4, nm.getvCores());
}
Also used : RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Resource(org.apache.hadoop.yarn.api.records.Resource) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) AccessControlException(org.apache.hadoop.security.AccessControlException) DynamicResourceConfiguration(org.apache.hadoop.yarn.server.resourcemanager.resource.DynamicResourceConfiguration) Test(org.junit.Test)

Aggregations

RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)179 Test (org.junit.Test)143 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)102 NodeAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent)93 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)63 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)55 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)46 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)44 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)37 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)35 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)30 NodeId (org.apache.hadoop.yarn.api.records.NodeId)28 FileWriter (java.io.FileWriter)24 PrintWriter (java.io.PrintWriter)24 Resource (org.apache.hadoop.yarn.api.records.Resource)24 NodeRemovedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent)23 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)21 ArrayList (java.util.ArrayList)20 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)20 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)19