Search in sources :

Example 66 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestNodeLabelContainerAllocation method testQueueMetricsWithLabelsOnDefaultLabelNode.

@Test
public void testQueueMetricsWithLabelsOnDefaultLabelNode() throws Exception {
    /**
     * Test case: have a following queue structure:
     *
     * <pre>
     *            root
     *         /      \
     *        a        b
     *        (x)     (x)
     * </pre>
     *
     * a/b can access x, both of them has max-capacity-on-x = 50
     *
     * When doing non-exclusive allocation, app in a (or b) can use 100% of x
     * resource.
     */
    CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(this.conf);
    // Define top-level queues
    csConf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] { "a", "b" });
    csConf.setCapacityByLabel(CapacitySchedulerConfiguration.ROOT, "x", 100);
    final String queueA = CapacitySchedulerConfiguration.ROOT + ".a";
    csConf.setCapacity(queueA, 25);
    csConf.setAccessibleNodeLabels(queueA, toSet("x"));
    csConf.setCapacityByLabel(queueA, "x", 50);
    csConf.setMaximumCapacityByLabel(queueA, "x", 50);
    final String queueB = CapacitySchedulerConfiguration.ROOT + ".b";
    csConf.setCapacity(queueB, 75);
    csConf.setAccessibleNodeLabels(queueB, toSet("x"));
    csConf.setCapacityByLabel(queueB, "x", 50);
    csConf.setMaximumCapacityByLabel(queueB, "x", 50);
    // set node -> label
    mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x", false)));
    mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x")));
    // inject node label manager
    MockRM rm1 = new MockRM(csConf) {

        @Override
        public RMNodeLabelsManager createNodeLabelManager() {
            return mgr;
        }
    };
    rm1.getRMContext().setNodeLabelManager(mgr);
    rm1.start();
    // label = x
    MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB);
    // label = <no_label>
    MockNM nm2 = rm1.registerNode("h2:1234", 10 * GB);
    // app1 -> a
    RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm2);
    // app1 asks for 3 partition= containers
    am1.allocate("*", 1 * GB, 3, new ArrayList<ContainerId>());
    // NM1 do 50 heartbeats
    CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
    RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
    SchedulerNode schedulerNode1 = cs.getSchedulerNode(nm1.getNodeId());
    for (int i = 0; i < 50; i++) {
        cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
    }
    // app1 gets all resource in partition=x (non-exclusive)
    Assert.assertEquals(3, schedulerNode1.getNumContainers());
    SchedulerNodeReport reportNm1 = rm1.getResourceScheduler().getNodeReport(nm1.getNodeId());
    Assert.assertEquals(3 * GB, reportNm1.getUsedResource().getMemorySize());
    Assert.assertEquals(7 * GB, reportNm1.getAvailableResource().getMemorySize());
    SchedulerNodeReport reportNm2 = rm1.getResourceScheduler().getNodeReport(nm2.getNodeId());
    Assert.assertEquals(1 * GB, reportNm2.getUsedResource().getMemorySize());
    Assert.assertEquals(9 * GB, reportNm2.getAvailableResource().getMemorySize());
    LeafQueue leafQueue = (LeafQueue) cs.getQueue("a");
    double delta = 0.0001;
    // 3GB is used from label x quota. 1.5 GB is remaining from default label.
    // 2GB is remaining from label x.
    assertEquals(3.5 * GB, leafQueue.getMetrics().getAvailableMB(), delta);
    assertEquals(4 * GB, leafQueue.getMetrics().getAllocatedMB());
    // app1 asks for 1 default partition container
    am1.allocate("*", 1 * GB, 5, new ArrayList<ContainerId>());
    // NM2 do couple of heartbeats
    RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
    SchedulerNode schedulerNode2 = cs.getSchedulerNode(nm2.getNodeId());
    cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
    // app1 gets all resource in default partition
    Assert.assertEquals(2, schedulerNode2.getNumContainers());
    // 3GB is used from label x quota. 2GB used from default label.
    // So total 2.5 GB is remaining.
    assertEquals(2.5 * GB, leafQueue.getMetrics().getAvailableMB(), delta);
    assertEquals(5 * GB, leafQueue.getMetrics().getAllocatedMB());
    rm1.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) SchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode) FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) SchedulerNodeReport(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) Test(org.junit.Test)

Example 67 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestNodeLabelContainerAllocation method testQueueMaxCapacitiesWillNotBeHonoredWhenNotRespectingExclusivity.

@Test(timeout = 60000)
public void testQueueMaxCapacitiesWillNotBeHonoredWhenNotRespectingExclusivity() throws Exception {
    /**
     * Test case: have a following queue structure:
     * 
     * <pre>
     *            root
     *         /      \
     *        a        b
     *        (x)     (x)
     * </pre>
     * 
     * a/b can access x, both of them has max-capacity-on-x = 50
     * 
     * When doing non-exclusive allocation, app in a (or b) can use 100% of x
     * resource.
     */
    CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(this.conf);
    // Define top-level queues
    csConf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] { "a", "b" });
    csConf.setCapacityByLabel(CapacitySchedulerConfiguration.ROOT, "x", 100);
    final String A = CapacitySchedulerConfiguration.ROOT + ".a";
    csConf.setCapacity(A, 50);
    csConf.setAccessibleNodeLabels(A, toSet("x"));
    csConf.setCapacityByLabel(A, "x", 50);
    csConf.setMaximumCapacityByLabel(A, "x", 50);
    csConf.setUserLimit(A, 200);
    final String B = CapacitySchedulerConfiguration.ROOT + ".b";
    csConf.setCapacity(B, 50);
    csConf.setAccessibleNodeLabels(B, toSet("x"));
    csConf.setCapacityByLabel(B, "x", 50);
    csConf.setMaximumCapacityByLabel(B, "x", 50);
    csConf.setUserLimit(B, 200);
    // set node -> label
    mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x", false)));
    mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x")));
    // inject node label manager
    MockRM rm1 = new MockRM(csConf) {

        @Override
        public RMNodeLabelsManager createNodeLabelManager() {
            return mgr;
        }
    };
    rm1.getRMContext().setNodeLabelManager(mgr);
    rm1.start();
    // label = x
    MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB);
    // label = <empty>
    MockNM nm2 = rm1.registerNode("h2:1234", 10 * GB);
    // app1 -> a
    RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm2);
    // app1 asks for 10 partition= containers
    am1.allocate("*", 1 * GB, 10, new ArrayList<ContainerId>());
    // NM1 do 50 heartbeats
    CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
    RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
    SchedulerNode schedulerNode1 = cs.getSchedulerNode(nm1.getNodeId());
    cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
    for (int i = 0; i < 50; i++) {
        cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
    }
    // app1 gets all resource in partition=x
    Assert.assertEquals(10, schedulerNode1.getNumContainers());
    // check non-exclusive containers of LeafQueue is correctly updated
    LeafQueue leafQueue = (LeafQueue) cs.getQueue("a");
    Assert.assertFalse(leafQueue.getIgnoreExclusivityRMContainers().containsKey("y"));
    Assert.assertEquals(10, leafQueue.getIgnoreExclusivityRMContainers().get("x").size());
    // completes all containers of app1, ignoreExclusivityRMContainers should be
    // updated as well.
    cs.handle(new AppAttemptRemovedSchedulerEvent(am1.getApplicationAttemptId(), RMAppAttemptState.FINISHED, false));
    Assert.assertFalse(leafQueue.getIgnoreExclusivityRMContainers().containsKey("x"));
    rm1.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) SchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode) FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) AppAttemptRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent) Test(org.junit.Test)

Example 68 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestNodeLabelContainerAllocation method testAMContainerAllocationWillAlwaysBeExclusive.

@Test
public void testAMContainerAllocationWillAlwaysBeExclusive() throws Exception {
    /**
     * Test case: Submit one application without partition, trying to allocate a
     * node has partition=x, it should fail to allocate since AM container will
     * always respect exclusivity for partitions
     */
    // set node -> label
    mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x", false), NodeLabel.newInstance("y")));
    mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x")));
    // inject node label manager
    MockRM rm1 = new MockRM(TestUtils.getConfigurationWithQueueLabels(conf)) {

        @Override
        public RMNodeLabelsManager createNodeLabelManager() {
            return mgr;
        }
    };
    rm1.getRMContext().setNodeLabelManager(mgr);
    rm1.start();
    String nodeIdStr = "h1:1234";
    // label = x
    MockNM nm1 = rm1.registerNode(nodeIdStr, 8 * GB);
    // launch an app to queue b1 (label = y), AM container should be launched in nm3
    RMApp app = rm1.submitApp(1 * GB, "app", "user", null, "b1");
    CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
    RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
    // Heartbeat for many times, app1 should get nothing
    for (int i = 0; i < 50; i++) {
        cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
    }
    Assert.assertTrue("Scheduler diagnostics should have reason for not assigning the node", app.getDiagnostics().toString().contains(CSAMContainerLaunchDiagnosticsConstants.SKIP_AM_ALLOCATION_IN_IGNORE_EXCLUSIVE_MODE));
    Assert.assertTrue("Scheduler diagnostics should have last processed node information", app.getDiagnostics().toString().contains(CSAMContainerLaunchDiagnosticsConstants.LAST_NODE_PROCESSED_MSG + nodeIdStr + " ( Partition : [x]"));
    Assert.assertEquals(0, cs.getSchedulerNode(nm1.getNodeId()).getNumContainers());
    rm1.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) Test(org.junit.Test)

Example 69 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestNodeLabelContainerAllocation method testPreferenceOfNeedyAppsTowardsNodePartitions.

@Test
public void testPreferenceOfNeedyAppsTowardsNodePartitions() throws Exception {
    /**
     * Test case: Submit two application to a queue (app1 first then app2), app1
     * asked for no-label, app2 asked for label=x, when node1 has label=x
     * doing heart beat, app2 will get allocation first, even if app2 submits later
     * than app1
     */
    // set node -> label
    mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x"), NodeLabel.newInstance("y", false)));
    mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("y")));
    // inject node label manager
    MockRM rm1 = new MockRM(TestUtils.getConfigurationWithQueueLabels(conf)) {

        @Override
        public RMNodeLabelsManager createNodeLabelManager() {
            return mgr;
        }
    };
    rm1.getRMContext().setNodeLabelManager(mgr);
    rm1.start();
    // label = y
    MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);
    // label = <empty>
    MockNM nm2 = rm1.registerNode("h2:1234", 100 * GB);
    // launch an app to queue b1 (label = y), AM container should be launched in nm2
    RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "b1");
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm2);
    // launch another app to queue b1 (label = y), AM container should be launched in nm2
    RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "b1");
    MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
    // request container and nm1 do heartbeat (nm2 has label=y), note that app1
    // request non-labeled container, and app2 request labeled container, app2
    // will get allocated first even if app1 submitted first.  
    am1.allocate("*", 1 * GB, 8, new ArrayList<ContainerId>());
    am2.allocate("*", 1 * GB, 8, new ArrayList<ContainerId>(), "y");
    CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
    RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
    // Do node heartbeats many times
    for (int i = 0; i < 50; i++) {
        cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
        cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
    }
    // App2 will get preference to be allocated on node1, and node1 will be all
    // used by App2.
    FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
    FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
    // app1 get nothing in nm1 (partition=y)
    checkNumOfContainersInAnAppOnGivenNode(0, nm1.getNodeId(), schedulerApp1);
    checkNumOfContainersInAnAppOnGivenNode(9, nm2.getNodeId(), schedulerApp1);
    // app2 get all resource in nm1 (partition=y)
    checkNumOfContainersInAnAppOnGivenNode(8, nm1.getNodeId(), schedulerApp2);
    checkNumOfContainersInAnAppOnGivenNode(1, nm2.getNodeId(), schedulerApp2);
    rm1.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) Test(org.junit.Test)

Example 70 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestMiniYarnClusterNodeUtilization method verifySimulatedUtilization.

/**
   * Verify both the RMNode and SchedulerNode have been updated with the test
   * fixture utilization data.
   */
private void verifySimulatedUtilization() throws InterruptedException {
    ResourceManager rm = cluster.getResourceManager(0);
    RMContext rmContext = rm.getRMContext();
    ResourceUtilization containersUtilization = nodeStatus.getContainersUtilization();
    ResourceUtilization nodeUtilization = nodeStatus.getNodeUtilization();
    // We check if the nodeUtilization is up to date
    for (int i = 0; i < 100; i++) {
        for (RMNode ni : rmContext.getRMNodes().values()) {
            if (ni.getNodeUtilization() != null) {
                if (ni.getNodeUtilization().equals(nodeUtilization)) {
                    break;
                }
            }
        }
        Thread.sleep(100);
    }
    // Verify the data is readable from the RM and scheduler nodes
    for (RMNode ni : rmContext.getRMNodes().values()) {
        ResourceUtilization cu = ni.getAggregatedContainersUtilization();
        assertEquals("Containers Utillization not propagated to RMNode", containersUtilization, cu);
        ResourceUtilization nu = ni.getNodeUtilization();
        assertEquals("Node Utillization not propagated to RMNode", nodeUtilization, nu);
        SchedulerNode scheduler = rmContext.getScheduler().getSchedulerNode(ni.getNodeID());
        cu = scheduler.getAggregatedContainersUtilization();
        assertEquals("Containers Utillization not propagated to SchedulerNode", containersUtilization, cu);
        nu = scheduler.getNodeUtilization();
        assertEquals("Node Utillization not propagated to SchedulerNode", nodeUtilization, nu);
    }
}
Also used : RMContext(org.apache.hadoop.yarn.server.resourcemanager.RMContext) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) SchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode) ResourceUtilization(org.apache.hadoop.yarn.api.records.ResourceUtilization) ResourceManager(org.apache.hadoop.yarn.server.resourcemanager.ResourceManager)

Aggregations

RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)179 Test (org.junit.Test)143 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)102 NodeAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent)93 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)63 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)55 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)46 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)44 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)37 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)35 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)30 NodeId (org.apache.hadoop.yarn.api.records.NodeId)28 FileWriter (java.io.FileWriter)24 PrintWriter (java.io.PrintWriter)24 Resource (org.apache.hadoop.yarn.api.records.Resource)24 NodeRemovedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent)23 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)21 ArrayList (java.util.ArrayList)20 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)20 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)19