Search in sources :

Example 1 with NodeRemovedSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent in project hadoop by apache.

the class TestOpportunisticContainerAllocatorAMService method testNodeRemovalDuringAllocate.

@Test(timeout = 60000)
public void testNodeRemovalDuringAllocate() throws Exception {
    MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
    MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
    nm1.registerNode();
    nm2.registerNode();
    OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
    RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
    ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
    ResourceScheduler scheduler = rm.getResourceScheduler();
    RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    ((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
    // Send add and update node events to AM Service.
    amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
    // Both node 1 and node 2 will be applicable for scheduling.
    for (int i = 0; i < 10; i++) {
        am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2)), null);
        if (ctxt.getNodeMap().size() == 2) {
            break;
        }
        Thread.sleep(50);
    }
    Assert.assertEquals(2, ctxt.getNodeMap().size());
    // Remove node from scheduler but not from AM Service.
    scheduler.handle(new NodeRemovedSchedulerEvent(rmNode1));
    // After removal of node 1, only 1 node will be applicable for scheduling.
    for (int i = 0; i < 10; i++) {
        try {
            am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2)), null);
        } catch (Exception e) {
            Assert.fail("Allocate request should be handled on node removal");
        }
        if (ctxt.getNodeMap().size() == 1) {
            break;
        }
        Thread.sleep(50);
    }
    Assert.assertEquals(1, ctxt.getNodeMap().size());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) Test(org.junit.Test)

Example 2 with NodeRemovedSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent in project hadoop by apache.

the class TestFairScheduler method testRemoveNodeUpdatesRootQueueMetrics.

@Test
public void testRemoveNodeUpdatesRootQueueMetrics() throws IOException {
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024, 4), 1, "127.0.0.1");
    NodeAddedSchedulerEvent addEvent = new NodeAddedSchedulerEvent(node1);
    scheduler.handle(addEvent);
    assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(4, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
    // update shouldn't change things
    scheduler.update();
    assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(4, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
    NodeRemovedSchedulerEvent removeEvent = new NodeRemovedSchedulerEvent(node1);
    scheduler.handle(removeEvent);
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
    // update shouldn't change things
    scheduler.update();
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
}
Also used : RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) NodeRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent) Test(org.junit.Test)

Example 3 with NodeRemovedSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent in project hadoop by apache.

the class TestContinuousScheduling method testWithNodeRemoved.

@Test
public void testWithNodeRemoved() throws Exception {
    // Disable continuous scheduling, will invoke continuous
    // scheduling once manually
    scheduler = new FairScheduler();
    conf = super.createConfiguration();
    resourceManager = new MockRM(conf);
    // TODO: This test should really be using MockRM. For now starting stuff
    // that is needed at a bare minimum.
    ((AsyncDispatcher) resourceManager.getRMContext().getDispatcher()).start();
    resourceManager.getRMContext().getStateStore().start();
    // to initialize the master key
    resourceManager.getRMContext().getContainerTokenSecretManager().rollMasterKey();
    scheduler.setRMContext(resourceManager.getRMContext());
    Assert.assertTrue("Continuous scheduling should be disabled.", !scheduler.isContinuousSchedulingEnabled());
    scheduler.init(conf);
    scheduler.start();
    // Add two nodes
    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, "127.0.0.1");
    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
    scheduler.handle(nodeEvent1);
    RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 2, "127.0.0.2");
    NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
    scheduler.handle(nodeEvent2);
    Assert.assertEquals("We should have two alive nodes.", 2, scheduler.getNumClusterNodes());
    // Remove one node
    NodeRemovedSchedulerEvent removeNode1 = new NodeRemovedSchedulerEvent(node1);
    scheduler.handle(removeNode1);
    Assert.assertEquals("We should only have one alive node.", 1, scheduler.getNumClusterNodes());
    // Invoke the continuous scheduling once
    try {
        scheduler.continuousSchedulingAttempt();
    } catch (Exception e) {
        fail("Exception happened when doing continuous scheduling. " + e.toString());
    }
}
Also used : RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) NodeRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) Test(org.junit.Test)

Example 4 with NodeRemovedSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent in project hadoop by apache.

the class TestFairScheduler method testSteadyFairShareWithReloadAndNodeAddRemove.

@Test
public void testSteadyFairShareWithReloadAndNodeAddRemove() throws Exception {
    conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
    PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
    out.println("<?xml version=\"1.0\"?>");
    out.println("<allocations>");
    out.println("<defaultQueueSchedulingPolicy>fair</defaultQueueSchedulingPolicy>");
    out.println("<queue name=\"root\">");
    out.println("  <schedulingPolicy>drf</schedulingPolicy>");
    out.println("  <queue name=\"child1\">");
    out.println("    <weight>1</weight>");
    out.println("  </queue>");
    out.println("  <queue name=\"child2\">");
    out.println("    <weight>1</weight>");
    out.println("  </queue>");
    out.println("</queue>");
    out.println("</allocations>");
    out.close();
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    // The steady fair share for all queues should be 0
    QueueManager queueManager = scheduler.getQueueManager();
    assertEquals(0, queueManager.getLeafQueue("child1", false).getSteadyFairShare().getMemorySize());
    assertEquals(0, queueManager.getLeafQueue("child2", false).getSteadyFairShare().getMemorySize());
    // Add one node
    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(6144), 1, "127.0.0.1");
    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
    scheduler.handle(nodeEvent1);
    assertEquals(6144, scheduler.getClusterResource().getMemorySize());
    // The steady fair shares for all queues should be updated
    assertEquals(2048, queueManager.getLeafQueue("child1", false).getSteadyFairShare().getMemorySize());
    assertEquals(2048, queueManager.getLeafQueue("child2", false).getSteadyFairShare().getMemorySize());
    // Reload the allocation configuration file
    out = new PrintWriter(new FileWriter(ALLOC_FILE));
    out.println("<?xml version=\"1.0\"?>");
    out.println("<allocations>");
    out.println("<defaultQueueSchedulingPolicy>fair</defaultQueueSchedulingPolicy>");
    out.println("<queue name=\"root\">");
    out.println("  <schedulingPolicy>drf</schedulingPolicy>");
    out.println("  <queue name=\"child1\">");
    out.println("    <weight>1</weight>");
    out.println("  </queue>");
    out.println("  <queue name=\"child2\">");
    out.println("    <weight>2</weight>");
    out.println("  </queue>");
    out.println("  <queue name=\"child3\">");
    out.println("    <weight>2</weight>");
    out.println("  </queue>");
    out.println("</queue>");
    out.println("</allocations>");
    out.close();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    // The steady fair shares for all queues should be updated
    assertEquals(1024, queueManager.getLeafQueue("child1", false).getSteadyFairShare().getMemorySize());
    assertEquals(2048, queueManager.getLeafQueue("child2", false).getSteadyFairShare().getMemorySize());
    assertEquals(2048, queueManager.getLeafQueue("child3", false).getSteadyFairShare().getMemorySize());
    // Remove the node, steady fair shares should back to 0
    NodeRemovedSchedulerEvent nodeEvent2 = new NodeRemovedSchedulerEvent(node1);
    scheduler.handle(nodeEvent2);
    assertEquals(0, scheduler.getClusterResource().getMemorySize());
    assertEquals(0, queueManager.getLeafQueue("child1", false).getSteadyFairShare().getMemorySize());
    assertEquals(0, queueManager.getLeafQueue("child2", false).getSteadyFairShare().getMemorySize());
}
Also used : RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) FileWriter(java.io.FileWriter) NodeRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 5 with NodeRemovedSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent in project hadoop by apache.

the class TestFairScheduler method testAggregateCapacityTracking.

@Test
public void testAggregateCapacityTracking() throws Exception {
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    // Add a node
    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024), 1, "127.0.0.1");
    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
    scheduler.handle(nodeEvent1);
    assertEquals(1024, scheduler.getClusterResource().getMemorySize());
    // Add another node
    RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(512), 2, "127.0.0.2");
    NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
    scheduler.handle(nodeEvent2);
    assertEquals(1536, scheduler.getClusterResource().getMemorySize());
    // Remove the first node
    NodeRemovedSchedulerEvent nodeEvent3 = new NodeRemovedSchedulerEvent(node1);
    scheduler.handle(nodeEvent3);
    assertEquals(512, scheduler.getClusterResource().getMemorySize());
}
Also used : RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) NodeRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent) Test(org.junit.Test)

Aggregations

NodeRemovedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent)17 NodeAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent)14 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)13 Test (org.junit.Test)12 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)5 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)3 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)3 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)3 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)3 AppAttemptRemovedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent)3 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)2 YarnRuntimeException (org.apache.hadoop.yarn.exceptions.YarnRuntimeException)2 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)2 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)2 RMContext (org.apache.hadoop.yarn.server.resourcemanager.RMContext)2 NodeResourceUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeResourceUpdateSchedulerEvent)2 FileWriter (java.io.FileWriter)1 IOException (java.io.IOException)1 PrintWriter (java.io.PrintWriter)1 ArrayList (java.util.ArrayList)1