Search in sources :

Example 16 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestFairScheduler method testReservationThresholdWithAssignMultiple.

@Test(timeout = 5000)
public void testReservationThresholdWithAssignMultiple() throws Exception {
    // set reservable-nodes to 0 which make reservation exceed
    conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 0f);
    conf.setBoolean(FairSchedulerConfiguration.ASSIGN_MULTIPLE, true);
    conf.setBoolean(FairSchedulerConfiguration.DYNAMIC_MAX_ASSIGN, false);
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    // Add two node
    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(4096, 4), 1, "127.0.0.1");
    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
    scheduler.handle(nodeEvent1);
    RMNode node2 = MockNodes.newNodeInfo(2, Resources.createResource(4096, 4), 1, "127.0.0.2");
    NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
    scheduler.handle(nodeEvent2);
    //create one request and assign containers
    ApplicationAttemptId attId = createSchedulingRequest(1024, "queue1", "user1", 10);
    scheduler.update();
    scheduler.handle(new NodeUpdateSchedulerEvent(node1));
    scheduler.update();
    scheduler.handle(new NodeUpdateSchedulerEvent(node2));
    // Verify capacity allocation
    assertEquals(8192, scheduler.getQueueManager().getQueue("queue1").getResourceUsage().getMemorySize());
    // Verify number of reservations have decremented
    assertEquals(0, scheduler.getSchedulerApp(attId).getNumReservations(null, true));
}
Also used : NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Test(org.junit.Test)

Example 17 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestFairScheduler method testBlacklistNodes.

@Test
public void testBlacklistNodes() throws Exception {
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    final int GB = 1024;
    String host = "127.0.0.1";
    RMNode node = MockNodes.newNodeInfo(1, Resources.createResource(16 * GB, 16), 0, host);
    NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node);
    NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node);
    scheduler.handle(nodeEvent);
    ApplicationAttemptId appAttemptId = createSchedulingRequest(GB, "root.default", "user", 1);
    FSAppAttempt app = scheduler.getSchedulerApp(appAttemptId);
    // Verify the blacklist can be updated independent of requesting containers
    scheduler.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(), Collections.<ContainerId>emptyList(), Collections.singletonList(host), null, NULL_UPDATE_REQUESTS);
    assertTrue(app.isPlaceBlacklisted(host));
    scheduler.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(), Collections.<ContainerId>emptyList(), null, Collections.singletonList(host), NULL_UPDATE_REQUESTS);
    assertFalse(scheduler.getSchedulerApp(appAttemptId).isPlaceBlacklisted(host));
    List<ResourceRequest> update = Arrays.asList(createResourceRequest(GB, node.getHostName(), 1, 0, true));
    // Verify a container does not actually get placed on the blacklisted host
    scheduler.allocate(appAttemptId, update, Collections.<ContainerId>emptyList(), Collections.singletonList(host), null, NULL_UPDATE_REQUESTS);
    assertTrue(app.isPlaceBlacklisted(host));
    scheduler.update();
    scheduler.handle(updateEvent);
    assertEquals("Incorrect number of containers allocated", 0, app.getLiveContainers().size());
    // Verify a container gets placed on the empty blacklist
    scheduler.allocate(appAttemptId, update, Collections.<ContainerId>emptyList(), null, Collections.singletonList(host), NULL_UPDATE_REQUESTS);
    assertFalse(app.isPlaceBlacklisted(host));
    createSchedulingRequest(GB, "root.default", "user", 1);
    scheduler.update();
    scheduler.handle(updateEvent);
    assertEquals("Incorrect number of containers allocated", 1, app.getLiveContainers().size());
}
Also used : NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) Test(org.junit.Test)

Example 18 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestFairScheduler method testDecreaseQueueSettingOnTheFlyInternal.

private void testDecreaseQueueSettingOnTheFlyInternal(String allocBefore, String allocAfter) throws Exception {
    // Set max running apps
    conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
    PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
    out.println(allocBefore);
    out.close();
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    // Add a node
    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(8192, 8), 1, "127.0.0.1");
    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
    scheduler.handle(nodeEvent1);
    // Request for app 1
    ApplicationAttemptId attId1 = createSchedulingRequest(1024, "queue1", "user1", 1);
    scheduler.update();
    NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1);
    scheduler.handle(updateEvent);
    // App 1 should be running
    assertEquals(1, scheduler.getSchedulerApp(attId1).getLiveContainers().size());
    ApplicationAttemptId attId2 = createSchedulingRequest(1024, "queue1", "user1", 1);
    scheduler.update();
    scheduler.handle(updateEvent);
    ApplicationAttemptId attId3 = createSchedulingRequest(1024, "queue1", "user1", 1);
    scheduler.update();
    scheduler.handle(updateEvent);
    ApplicationAttemptId attId4 = createSchedulingRequest(1024, "queue1", "user1", 1);
    scheduler.update();
    scheduler.handle(updateEvent);
    // App 2 should be running
    assertEquals(1, scheduler.getSchedulerApp(attId2).getLiveContainers().size());
    // App 3 should be running
    assertEquals(1, scheduler.getSchedulerApp(attId3).getLiveContainers().size());
    // App 4 should not be running
    assertEquals(0, scheduler.getSchedulerApp(attId4).getLiveContainers().size());
    out = new PrintWriter(new FileWriter(ALLOC_FILE));
    out.println(allocAfter);
    out.close();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    scheduler.update();
    scheduler.handle(updateEvent);
    // App 2 should still be running
    assertEquals(1, scheduler.getSchedulerApp(attId2).getLiveContainers().size());
    scheduler.update();
    scheduler.handle(updateEvent);
    // App 3 should still be running
    assertEquals(1, scheduler.getSchedulerApp(attId3).getLiveContainers().size());
    scheduler.update();
    scheduler.handle(updateEvent);
    // App 4 should not be running
    assertEquals(0, scheduler.getSchedulerApp(attId4).getLiveContainers().size());
    // Now remove app 1
    AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(attId1, RMAppAttemptState.FINISHED, false);
    scheduler.handle(appRemovedEvent1);
    scheduler.update();
    scheduler.handle(updateEvent);
    // App 4 should not be running
    assertEquals(0, scheduler.getSchedulerApp(attId4).getLiveContainers().size());
    // Now remove app 2
    appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(attId2, RMAppAttemptState.FINISHED, false);
    scheduler.handle(appRemovedEvent1);
    scheduler.update();
    scheduler.handle(updateEvent);
    // App 4 should not be running
    assertEquals(0, scheduler.getSchedulerApp(attId4).getLiveContainers().size());
    // Now remove app 3
    appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(attId3, RMAppAttemptState.FINISHED, false);
    scheduler.handle(appRemovedEvent1);
    scheduler.update();
    scheduler.handle(updateEvent);
    // App 4 should be running now
    assertEquals(1, scheduler.getSchedulerApp(attId4).getLiveContainers().size());
}
Also used : NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) FileWriter(java.io.FileWriter) AppAttemptRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) PrintWriter(java.io.PrintWriter)

Example 19 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestFairScheduler method testDRFHierarchicalQueues.

@Test
public void testDRFHierarchicalQueues() throws Exception {
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    RMNode node = MockNodes.newNodeInfo(1, BuilderUtils.newResource(12288, 12), 1, "127.0.0.1");
    NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node);
    scheduler.handle(nodeEvent);
    ApplicationAttemptId appAttId1 = createSchedulingRequest(3074, 1, "queue1.subqueue1", "user1", 2);
    // so that start times will be different
    Thread.sleep(3);
    FSAppAttempt app1 = scheduler.getSchedulerApp(appAttId1);
    ApplicationAttemptId appAttId2 = createSchedulingRequest(1024, 3, "queue1.subqueue1", "user1", 2);
    // so that start times will be different
    Thread.sleep(3);
    FSAppAttempt app2 = scheduler.getSchedulerApp(appAttId2);
    ApplicationAttemptId appAttId3 = createSchedulingRequest(2048, 2, "queue1.subqueue2", "user1", 2);
    // so that start times will be different
    Thread.sleep(3);
    FSAppAttempt app3 = scheduler.getSchedulerApp(appAttId3);
    ApplicationAttemptId appAttId4 = createSchedulingRequest(1024, 2, "queue2", "user1", 2);
    // so that start times will be different
    Thread.sleep(3);
    FSAppAttempt app4 = scheduler.getSchedulerApp(appAttId4);
    DominantResourceFairnessPolicy drfPolicy = new DominantResourceFairnessPolicy();
    drfPolicy.initialize(scheduler.getContext());
    scheduler.getQueueManager().getQueue("root").setPolicy(drfPolicy);
    scheduler.getQueueManager().getQueue("queue1").setPolicy(drfPolicy);
    scheduler.getQueueManager().getQueue("queue1.subqueue1").setPolicy(drfPolicy);
    scheduler.update();
    NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node);
    scheduler.handle(updateEvent);
    // app1 gets first container because it asked first
    Assert.assertEquals(1, app1.getLiveContainers().size());
    scheduler.handle(updateEvent);
    // app4 gets second container because it's on queue2
    Assert.assertEquals(1, app4.getLiveContainers().size());
    scheduler.handle(updateEvent);
    // app4 gets another container because queue2's dominant share of memory
    // is still less than queue1's of cpu
    Assert.assertEquals(2, app4.getLiveContainers().size());
    scheduler.handle(updateEvent);
    // app3 gets one because queue1 gets one and queue1.subqueue2 is behind
    // queue1.subqueue1
    Assert.assertEquals(1, app3.getLiveContainers().size());
    scheduler.handle(updateEvent);
    // app4 would get another one, but it doesn't have any requests
    // queue1.subqueue2 is still using less than queue1.subqueue1, so it
    // gets another
    Assert.assertEquals(2, app3.getLiveContainers().size());
    // queue1.subqueue1 is behind again, so it gets one, which it gives to app2
    scheduler.handle(updateEvent);
    Assert.assertEquals(1, app2.getLiveContainers().size());
    // at this point, we've used all our CPU up, so nobody else should get a container
    scheduler.handle(updateEvent);
    Assert.assertEquals(1, app1.getLiveContainers().size());
    Assert.assertEquals(1, app2.getLiveContainers().size());
    Assert.assertEquals(2, app3.getLiveContainers().size());
    Assert.assertEquals(2, app4.getLiveContainers().size());
}
Also used : NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) DominantResourceFairnessPolicy(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.DominantResourceFairnessPolicy) Test(org.junit.Test)

Example 20 with RMNode

use of org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode in project hadoop by apache.

the class TestFairScheduler method testReservationMetrics.

@Test
public void testReservationMetrics() throws IOException {
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    QueueMetrics metrics = scheduler.getRootQueueMetrics();
    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(4096, 4), 1, "127.0.0.1");
    NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node1);
    scheduler.handle(nodeEvent);
    ApplicationAttemptId appAttemptId = createAppAttemptId(1, 1);
    createApplicationWithAMResource(appAttemptId, "default", "user1", null);
    NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1);
    scheduler.update();
    scheduler.handle(updateEvent);
    createSchedulingRequestExistingApplication(1024, 1, 1, appAttemptId);
    scheduler.update();
    scheduler.handle(updateEvent);
    // no reservation yet
    assertEquals(0, metrics.getReservedContainers());
    assertEquals(0, metrics.getReservedMB());
    assertEquals(0, metrics.getReservedVirtualCores());
    // create reservation of {4096, 4}
    createSchedulingRequestExistingApplication(4096, 4, 1, appAttemptId);
    scheduler.update();
    scheduler.handle(updateEvent);
    // reservation created
    assertEquals(1, metrics.getReservedContainers());
    assertEquals(4096, metrics.getReservedMB());
    assertEquals(4, metrics.getReservedVirtualCores());
    // remove AppAttempt
    AppAttemptRemovedSchedulerEvent attRemoveEvent = new AppAttemptRemovedSchedulerEvent(appAttemptId, RMAppAttemptState.KILLED, false);
    scheduler.handle(attRemoveEvent);
    // The reservation metrics should be subtracted
    assertEquals(0, metrics.getReservedContainers());
    assertEquals(0, metrics.getReservedMB());
    assertEquals(0, metrics.getReservedVirtualCores());
}
Also used : QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) AppAttemptRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Test(org.junit.Test)

Aggregations

RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)186 Test (org.junit.Test)143 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)103 NodeAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent)94 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)63 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)55 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)46 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)44 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)37 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)35 NodeId (org.apache.hadoop.yarn.api.records.NodeId)30 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)30 Resource (org.apache.hadoop.yarn.api.records.Resource)28 FileWriter (java.io.FileWriter)24 PrintWriter (java.io.PrintWriter)24 NodeRemovedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent)23 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)22 ArrayList (java.util.ArrayList)20 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)20 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)19