Search in sources :

Example 6 with AppAttemptRemovedSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.

the class TestFairScheduler method testDoubleRemoval.

@Test
public void testDoubleRemoval() throws Exception {
    // convenience var
    String testUser = "user1";
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    ApplicationAttemptId attemptId = createAppAttemptId(1, 1);
    // The placement rule will add the app to the user based queue but the
    // passed in queue must exist.
    AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent(attemptId.getApplicationId(), testUser, testUser);
    scheduler.handle(appAddedEvent);
    AppAttemptAddedSchedulerEvent attemptAddedEvent = new AppAttemptAddedSchedulerEvent(createAppAttemptId(1, 1), false);
    scheduler.handle(attemptAddedEvent);
    // Get a handle on the attempt.
    FSAppAttempt attempt = scheduler.getSchedulerApp(attemptId);
    AppAttemptRemovedSchedulerEvent attemptRemovedEvent = new AppAttemptRemovedSchedulerEvent(createAppAttemptId(1, 1), RMAppAttemptState.FINISHED, false);
    // Make sure the app attempt is in the queue.
    List<ApplicationAttemptId> attemptList = scheduler.getAppsInQueue(testUser);
    assertNotNull("Queue missing", attemptList);
    assertTrue("Attempt should be in the queue", attemptList.contains(attemptId));
    assertFalse("Attempt is stopped", attempt.isStopped());
    // Now remove the app attempt
    scheduler.handle(attemptRemovedEvent);
    // The attempt is not in the queue, and stopped
    attemptList = scheduler.getAppsInQueue(testUser);
    assertFalse("Attempt should not be in the queue", attemptList.contains(attemptId));
    assertTrue("Attempt should have been stopped", attempt.isStopped());
    // Now remove the app attempt again, since it is stopped nothing happens.
    scheduler.handle(attemptRemovedEvent);
    // The attempt should still show the original queue info.
    assertTrue("Attempt queue has changed", attempt.getQueue().getName().endsWith(testUser));
}
Also used : AppAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent) AppAttemptRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) AppAttemptAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent) Test(org.junit.Test)

Example 7 with AppAttemptRemovedSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.

the class TestLeafQueue method testAppAttemptMetrics.

@Test
public void testAppAttemptMetrics() throws Exception {
    // Manipulate queue 'a'
    LeafQueue a = stubLeafQueue((LeafQueue) queues.get(B));
    // Users
    final String user_0 = "user_0";
    // Submit applications
    final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 1);
    AppAddedSchedulerEvent addAppEvent = new AppAddedSchedulerEvent(appAttemptId_0.getApplicationId(), a.getQueueName(), user_0);
    cs.handle(addAppEvent);
    AppAttemptAddedSchedulerEvent addAttemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId_0, false);
    cs.handle(addAttemptEvent);
    AppAttemptRemovedSchedulerEvent event = new AppAttemptRemovedSchedulerEvent(appAttemptId_0, RMAppAttemptState.FAILED, false);
    cs.handle(event);
    assertEquals(0, a.getMetrics().getAppsPending());
    assertEquals(0, a.getMetrics().getAppsFailed());
    // Attempt the same application again
    final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(0, 2);
    FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, null, spyRMContext);
    app_1.setAMResource(Resource.newInstance(100, 1));
    // same user
    a.submitApplicationAttempt(app_1, user_0);
    assertEquals(1, a.getMetrics().getAppsSubmitted());
    assertEquals(1, a.getMetrics().getAppsPending());
    assertEquals(1, a.getUser(user_0).getActiveApplications());
    assertEquals(app_1.getAMResource().getMemorySize(), a.getMetrics().getUsedAMResourceMB());
    assertEquals(app_1.getAMResource().getVirtualCores(), a.getMetrics().getUsedAMResourceVCores());
    event = new AppAttemptRemovedSchedulerEvent(appAttemptId_0, RMAppAttemptState.FINISHED, false);
    cs.handle(event);
    AppRemovedSchedulerEvent rEvent = new AppRemovedSchedulerEvent(appAttemptId_0.getApplicationId(), RMAppState.FINISHED);
    cs.handle(rEvent);
    assertEquals(1, a.getMetrics().getAppsSubmitted());
    assertEquals(0, a.getMetrics().getAppsPending());
    assertEquals(0, a.getMetrics().getAppsFailed());
    assertEquals(1, a.getMetrics().getAppsCompleted());
    QueueMetrics userMetrics = a.getMetrics().getUserMetrics(user_0);
    assertEquals(1, userMetrics.getAppsSubmitted());
}
Also used : AppRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) AppAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) AppAttemptRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) AppAttemptAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent) Test(org.junit.Test)

Example 8 with AppAttemptRemovedSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.

the class TestNodeLabelContainerAllocation method testQueueMaxCapacitiesWillNotBeHonoredWhenNotRespectingExclusivity.

@Test(timeout = 60000)
public void testQueueMaxCapacitiesWillNotBeHonoredWhenNotRespectingExclusivity() throws Exception {
    /**
     * Test case: have a following queue structure:
     * 
     * <pre>
     *            root
     *         /      \
     *        a        b
     *        (x)     (x)
     * </pre>
     * 
     * a/b can access x, both of them has max-capacity-on-x = 50
     * 
     * When doing non-exclusive allocation, app in a (or b) can use 100% of x
     * resource.
     */
    CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(this.conf);
    // Define top-level queues
    csConf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] { "a", "b" });
    csConf.setCapacityByLabel(CapacitySchedulerConfiguration.ROOT, "x", 100);
    final String A = CapacitySchedulerConfiguration.ROOT + ".a";
    csConf.setCapacity(A, 50);
    csConf.setAccessibleNodeLabels(A, toSet("x"));
    csConf.setCapacityByLabel(A, "x", 50);
    csConf.setMaximumCapacityByLabel(A, "x", 50);
    csConf.setUserLimit(A, 200);
    final String B = CapacitySchedulerConfiguration.ROOT + ".b";
    csConf.setCapacity(B, 50);
    csConf.setAccessibleNodeLabels(B, toSet("x"));
    csConf.setCapacityByLabel(B, "x", 50);
    csConf.setMaximumCapacityByLabel(B, "x", 50);
    csConf.setUserLimit(B, 200);
    // set node -> label
    mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x", false)));
    mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x")));
    // inject node label manager
    MockRM rm1 = new MockRM(csConf) {

        @Override
        public RMNodeLabelsManager createNodeLabelManager() {
            return mgr;
        }
    };
    rm1.getRMContext().setNodeLabelManager(mgr);
    rm1.start();
    // label = x
    MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB);
    // label = <empty>
    MockNM nm2 = rm1.registerNode("h2:1234", 10 * GB);
    // app1 -> a
    RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm2);
    // app1 asks for 10 partition= containers
    am1.allocate("*", 1 * GB, 10, new ArrayList<ContainerId>());
    // NM1 do 50 heartbeats
    CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
    RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
    SchedulerNode schedulerNode1 = cs.getSchedulerNode(nm1.getNodeId());
    cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
    for (int i = 0; i < 50; i++) {
        cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
    }
    // app1 gets all resource in partition=x
    Assert.assertEquals(10, schedulerNode1.getNumContainers());
    // check non-exclusive containers of LeafQueue is correctly updated
    LeafQueue leafQueue = (LeafQueue) cs.getQueue("a");
    Assert.assertFalse(leafQueue.getIgnoreExclusivityRMContainers().containsKey("y"));
    Assert.assertEquals(10, leafQueue.getIgnoreExclusivityRMContainers().get("x").size());
    // completes all containers of app1, ignoreExclusivityRMContainers should be
    // updated as well.
    cs.handle(new AppAttemptRemovedSchedulerEvent(am1.getApplicationAttemptId(), RMAppAttemptState.FINISHED, false));
    Assert.assertFalse(leafQueue.getIgnoreExclusivityRMContainers().containsKey("x"));
    rm1.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) SchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode) FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) AppAttemptRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent) Test(org.junit.Test)

Example 9 with AppAttemptRemovedSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.

the class TestFairScheduler method testQueueMaxAMShareDefault.

@Test
public void testQueueMaxAMShareDefault() throws Exception {
    conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
    conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, 6);
    PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
    out.println("<?xml version=\"1.0\"?>");
    out.println("<allocations>");
    out.println("<queue name=\"queue1\">");
    out.println("</queue>");
    out.println("<queue name=\"queue2\">");
    out.println("<maxAMShare>0.4</maxAMShare>");
    out.println("</queue>");
    out.println("<queue name=\"queue3\">");
    out.println("<maxResources>10240 mb 4 vcores</maxResources>");
    out.println("</queue>");
    out.println("<queue name=\"queue4\">");
    out.println("</queue>");
    out.println("<queue name=\"queue5\">");
    out.println("</queue>");
    out.println("<defaultQueueSchedulingPolicy>fair</defaultQueueSchedulingPolicy>");
    out.println("</allocations>");
    out.close();
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    RMNode node = MockNodes.newNodeInfo(1, Resources.createResource(8192, 10), 0, "127.0.0.1");
    NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node);
    NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node);
    scheduler.handle(nodeEvent);
    scheduler.update();
    FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1", true);
    assertEquals("Queue queue1's fair share should be 0", 0, queue1.getFairShare().getMemorySize());
    FSLeafQueue queue2 = scheduler.getQueueManager().getLeafQueue("queue2", true);
    assertEquals("Queue queue2's fair share should be 0", 0, queue2.getFairShare().getMemorySize());
    FSLeafQueue queue3 = scheduler.getQueueManager().getLeafQueue("queue3", true);
    assertEquals("Queue queue3's fair share should be 0", 0, queue3.getFairShare().getMemorySize());
    FSLeafQueue queue4 = scheduler.getQueueManager().getLeafQueue("queue4", true);
    assertEquals("Queue queue4's fair share should be 0", 0, queue4.getFairShare().getMemorySize());
    FSLeafQueue queue5 = scheduler.getQueueManager().getLeafQueue("queue5", true);
    assertEquals("Queue queue5's fair share should be 0", 0, queue5.getFairShare().getMemorySize());
    List<String> queues = Arrays.asList("root.queue3", "root.queue4", "root.queue5");
    for (String queue : queues) {
        createSchedulingRequest(1 * 1024, queue, "user1");
        scheduler.update();
        scheduler.handle(updateEvent);
    }
    Resource amResource1 = Resource.newInstance(1024, 1);
    int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority();
    // The fair share is 2048 MB, and the default maxAMShare is 0.5f,
    // so the AM is accepted.
    ApplicationAttemptId attId1 = createAppAttemptId(1, 1);
    createApplicationWithAMResource(attId1, "queue1", "test1", amResource1);
    createSchedulingRequestExistingApplication(1024, 1, amPriority, attId1);
    FSAppAttempt app1 = scheduler.getSchedulerApp(attId1);
    scheduler.update();
    scheduler.handle(updateEvent);
    assertEquals("Application1's AM requests 1024 MB memory", 1024, app1.getAMResource().getMemorySize());
    assertEquals("Application1's AM should be running", 1, app1.getLiveContainers().size());
    assertEquals("Queue1's AM resource usage should be 1024 MB memory", 1024, queue1.getAmResourceUsage().getMemorySize());
    // Now the fair share is 1639 MB, and the maxAMShare is 0.4f,
    // so the AM is not accepted.
    ApplicationAttemptId attId2 = createAppAttemptId(2, 1);
    createApplicationWithAMResource(attId2, "queue2", "test1", amResource1);
    createSchedulingRequestExistingApplication(1024, 1, amPriority, attId2);
    FSAppAttempt app2 = scheduler.getSchedulerApp(attId2);
    scheduler.update();
    scheduler.handle(updateEvent);
    assertEquals("Application2's AM resource shouldn't be updated", 0, app2.getAMResource().getMemorySize());
    assertEquals("Application2's AM should not be running", 0, app2.getLiveContainers().size());
    assertEquals("Queue2's AM resource usage should be 0 MB memory", 0, queue2.getAmResourceUsage().getMemorySize());
    // Remove the app2
    AppAttemptRemovedSchedulerEvent appRemovedEvent2 = new AppAttemptRemovedSchedulerEvent(attId2, RMAppAttemptState.FINISHED, false);
    scheduler.handle(appRemovedEvent2);
    scheduler.update();
    // AM3 can pass the fair share checking, but it takes all available VCore,
    // So the AM3 is not accepted.
    ApplicationAttemptId attId3 = createAppAttemptId(3, 1);
    createApplicationWithAMResource(attId3, "queue3", "test1", amResource1);
    createSchedulingRequestExistingApplication(1024, 6, amPriority, attId3);
    FSAppAttempt app3 = scheduler.getSchedulerApp(attId3);
    scheduler.update();
    scheduler.handle(updateEvent);
    assertEquals("Application3's AM resource shouldn't be updated", 0, app3.getAMResource().getMemorySize());
    assertEquals("Application3's AM should not be running", 0, app3.getLiveContainers().size());
    assertEquals("Queue3's AM resource usage should be 0 MB memory", 0, queue3.getAmResourceUsage().getMemorySize());
    // AM4 can pass the fair share checking and it doesn't takes all
    // available VCore, but it need 5 VCores which are more than
    // maxResources(4 VCores). So the AM4 is not accepted.
    ApplicationAttemptId attId4 = createAppAttemptId(4, 1);
    createApplicationWithAMResource(attId4, "queue3", "test1", amResource1);
    createSchedulingRequestExistingApplication(1024, 5, amPriority, attId4);
    FSAppAttempt app4 = scheduler.getSchedulerApp(attId4);
    scheduler.update();
    scheduler.handle(updateEvent);
    assertEquals("Application4's AM resource shouldn't be updated", 0, app4.getAMResource().getMemorySize());
    assertEquals("Application4's AM should not be running", 0, app4.getLiveContainers().size());
    assertEquals("Queue3's AM resource usage should be 0 MB memory", 0, queue3.getAmResourceUsage().getMemorySize());
}
Also used : NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) FileWriter(java.io.FileWriter) Resource(org.apache.hadoop.yarn.api.records.Resource) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) AppAttemptRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Example 10 with AppAttemptRemovedSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.

the class TestFairSchedulerFairShare method testFairShareResetsToZeroWhenAppsComplete.

@Test
public void testFairShareResetsToZeroWhenAppsComplete() throws IOException {
    int nodeCapacity = 16 * 1024;
    createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
    // Run apps in childA1,childA2 which are under parentA
    ApplicationAttemptId app1 = createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
    ApplicationAttemptId app2 = createSchedulingRequest(3 * 1024, "root.parentA.childA2", "user2");
    scheduler.update();
    // share
    for (int i = 1; i <= 2; i++) {
        assertEquals(50, (double) scheduler.getQueueManager().getLeafQueue("root.parentA.childA" + i, false).getFairShare().getMemorySize() / nodeCapacity * 100, .9);
    }
    // Let app under childA1 complete. This should cause the fair share
    // of queue childA1 to be reset to zero,since the queue has no apps running.
    // Queue childA2's fair share would increase to 100% since its the only
    // active queue.
    AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(app1, RMAppAttemptState.FINISHED, false);
    scheduler.handle(appRemovedEvent1);
    scheduler.update();
    assertEquals(0, (double) scheduler.getQueueManager().getLeafQueue("root.parentA.childA1", false).getFairShare().getMemorySize() / nodeCapacity * 100, 0);
    assertEquals(100, (double) scheduler.getQueueManager().getLeafQueue("root.parentA.childA2", false).getFairShare().getMemorySize() / nodeCapacity * 100, 0.1);
    verifySteadyFairShareMemory(scheduler.getQueueManager().getLeafQueues(), nodeCapacity);
}
Also used : AppAttemptRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Test(org.junit.Test)

Aggregations

AppAttemptRemovedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent)26 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)19 Test (org.junit.Test)19 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)13 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)11 NodeAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent)11 AppAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent)9 AppAttemptAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent)9 FileWriter (java.io.FileWriter)7 PrintWriter (java.io.PrintWriter)7 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)6 Resource (org.apache.hadoop.yarn.api.records.Resource)5 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)5 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)5 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)4 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)4 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)4 NodeRemovedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent)4 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)3 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)3