Search in sources :

Example 51 with NodeUpdateSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.

the class TestContinuousScheduling method testBasic.

@Test(timeout = 60000)
public void testBasic() throws InterruptedException {
    // Add one node
    String host = "127.0.0.1";
    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(4096, 4), 1, host);
    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
    scheduler.handle(nodeEvent1);
    NodeUpdateSchedulerEvent nodeUpdateEvent = new NodeUpdateSchedulerEvent(node1);
    scheduler.handle(nodeUpdateEvent);
    ApplicationAttemptId appAttemptId = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++);
    createMockRMApp(appAttemptId);
    scheduler.addApplication(appAttemptId.getApplicationId(), "queue11", "user11", false);
    scheduler.addApplicationAttempt(appAttemptId, false, false);
    List<ResourceRequest> ask = new ArrayList<>();
    ask.add(createResourceRequest(1024, 1, ResourceRequest.ANY, 1, 1, true));
    scheduler.allocate(appAttemptId, ask, new ArrayList<ContainerId>(), null, null, NULL_UPDATE_REQUESTS);
    FSAppAttempt app = scheduler.getSchedulerApp(appAttemptId);
    triggerSchedulingAttempt();
    checkAppConsumption(app, Resources.createResource(1024, 1));
}
Also used : NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) Test(org.junit.Test)

Example 52 with NodeUpdateSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.

the class TestFairScheduler method testReservationWithMultiplePriorities.

/**
   * Reserve at a lower priority and verify the lower priority request gets
   * allocated
   */
@Test(timeout = 5000)
public void testReservationWithMultiplePriorities() throws IOException {
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());
    // Add a node
    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(2048, 2));
    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
    scheduler.handle(nodeEvent1);
    NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1);
    // Create first app and take up half resources so the second app that asks
    // for the entire node won't have enough.
    FSAppAttempt app1 = scheduler.getSchedulerApp(createSchedulingRequest(1024, 1, "queue", "user", 1));
    scheduler.update();
    scheduler.handle(updateEvent);
    assertEquals("Basic allocation failed", 1, app1.getLiveContainers().size());
    // Create another app and reserve at a lower priority first
    ApplicationAttemptId attId = createSchedulingRequest(2048, 2, "queue1", "user1", 1, 2);
    FSAppAttempt app2 = scheduler.getSchedulerApp(attId);
    scheduler.update();
    scheduler.handle(updateEvent);
    assertEquals("Reservation at lower priority failed", 1, app2.getReservedContainers().size());
    // Request container on the second app at a higher priority
    createSchedulingRequestExistingApplication(2048, 2, 1, attId);
    // Complete the first container so we can trigger allocation for app2
    ContainerId containerId = app1.getLiveContainers().iterator().next().getContainerId();
    scheduler.allocate(app1.getApplicationAttemptId(), new ArrayList<>(), Arrays.asList(containerId), null, null, NULL_UPDATE_REQUESTS);
    // Trigger allocation for app2
    scheduler.handle(updateEvent);
    // Reserved container (at lower priority) should be run
    Collection<RMContainer> liveContainers = app2.getLiveContainers();
    assertEquals("Allocation post completion failed", 1, liveContainers.size());
    assertEquals("High prio container allocated against low prio reservation", 2, liveContainers.iterator().next().getContainer().getPriority().getPriority());
}
Also used : NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Test(org.junit.Test)

Example 53 with NodeUpdateSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.

the class TestCapacitySchedulerLazyPreemption method testPreemptionPolicyCleanupKillableContainersWhenNoPreemptionNeeded.

/*
   * Ignore this test now because it could be a premature optimization
   */
@Ignore
@Test(timeout = 60000)
public void testPreemptionPolicyCleanupKillableContainersWhenNoPreemptionNeeded() throws Exception {
    /**
     * Test case:
     * <pre>
     *             Root
     *            /  |  \
     *           a   b   c
     *          10   20  70
     * </pre>
     * Submit applications to two queues, one uses more than the other, so
     * preemption will happen.
     *
     * Check:
     * 1) Containers will be marked to killable
     * 2) Cancel resource request
     * 3) Killable containers will be cancelled from policy and scheduler
     */
    MockRM rm1 = new MockRM(conf);
    rm1.getRMContext().setNodeLabelManager(mgr);
    rm1.start();
    MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);
    CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
    RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
    // launch an app to queue, AM container should be launched in nm1
    RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    am1.allocate("*", 1 * GB, 6, new ArrayList<ContainerId>());
    // Do allocation 6 times for node1
    for (int i = 0; i < 6; i++) {
        cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
    }
    // App1 should have 7 containers now, and no available resource for cluster
    FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
    Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
    // Submit app2 to queue-c and asks for a 1G container for AM
    RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "c");
    MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1);
    // NM1 has available resource = 0G
    Assert.assertEquals(0 * GB, cs.getNode(nm1.getNodeId()).getUnallocatedResource().getMemorySize());
    am2.allocate("*", 3 * GB, 1, new ArrayList<ContainerId>());
    // Get edit policy and do one update
    ProportionalCapacityPreemptionPolicy editPolicy = (ProportionalCapacityPreemptionPolicy) getSchedulingEditPolicy(rm1);
    // Call edit schedule twice, and check if 3 container from app1 marked
    // to be "killable"
    editPolicy.editSchedule();
    editPolicy.editSchedule();
    PreemptionManager pm = cs.getPreemptionManager();
    waitKillableContainersSize(pm, "a", RMNodeLabelsManager.NO_LABEL, 3);
    // Change reqeust from 3G to 2G, now we can preempt one less container. (3->2)
    am2.allocate("*", 2 * GB, 1, new ArrayList<ContainerId>());
    editPolicy.editSchedule();
    Assert.assertEquals(0, editPolicy.getToPreemptContainers().size());
    waitKillableContainersSize(pm, "a", RMNodeLabelsManager.NO_LABEL, 2);
    // Call editSchedule once more to make sure still nothing happens
    editPolicy.editSchedule();
    Assert.assertEquals(0, editPolicy.getToPreemptContainers().size());
    waitKillableContainersSize(pm, "a", RMNodeLabelsManager.NO_LABEL, 2);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) ProportionalCapacityPreemptionPolicy(org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) PreemptionManager(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptionManager) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 54 with NodeUpdateSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.

the class TestCapacitySchedulerLazyPreemption method testPreemptionConsidersHardNodeLocality.

@Test(timeout = 60000)
public void testPreemptionConsidersHardNodeLocality() throws Exception {
    /**
     * Test case: same as testSimplePreemption steps 1-3.
     *
     * Step 4: app2 asks for 1G container with hard locality specified, and
     *         asked host is not existed
     * Confirm system doesn't preempt any container.
     */
    MockRM rm1 = new MockRM(conf);
    rm1.getRMContext().setNodeLabelManager(mgr);
    rm1.start();
    MockNM nm1 = rm1.registerNode("h1:1234", 4 * GB);
    MockNM nm2 = rm1.registerNode("h2:1234", 4 * GB);
    CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
    RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
    // launch an app to queue, AM container should be launched in nm1
    RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    am1.allocate("*", 1 * GB, 6, new ArrayList<ContainerId>());
    // Do allocation 3 times for node1/node2
    for (int i = 0; i < 3; i++) {
        cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
    }
    for (int i = 0; i < 3; i++) {
        cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
    }
    // App1 should have 7 containers now, and no available resource for cluster
    FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
    Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
    // Submit app2 to queue-c and asks for a 1G container for AM
    RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "c");
    MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
    // NM1/NM2 has available resource = 0G
    Assert.assertEquals(0 * GB, cs.getNode(nm1.getNodeId()).getUnallocatedResource().getMemorySize());
    Assert.assertEquals(0 * GB, cs.getNode(nm2.getNodeId()).getUnallocatedResource().getMemorySize());
    // AM asks for a 1 * GB container for h3 with hard locality,
    // h3 doesn't exist in the cluster
    am2.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), ResourceRequest.ANY, Resources.createResource(1 * GB), 1, true), ResourceRequest.newInstance(Priority.newInstance(1), "h3", Resources.createResource(1 * GB), 1, false), ResourceRequest.newInstance(Priority.newInstance(1), "/default-rack", Resources.createResource(1 * GB), 1, false)), null);
    // Get edit policy and do one update
    SchedulingEditPolicy editPolicy = getSchedulingEditPolicy(rm1);
    // Call edit schedule twice, and check if one container from app1 marked
    // to be "killable"
    editPolicy.editSchedule();
    editPolicy.editSchedule();
    PreemptionManager pm = cs.getPreemptionManager();
    Map<ContainerId, RMContainer> killableContainers = waitKillableContainersSize(pm, "a", RMNodeLabelsManager.NO_LABEL, 1);
    Assert.assertEquals(killableContainers.entrySet().iterator().next().getKey().getApplicationAttemptId(), am1.getApplicationAttemptId());
    // Call CS.handle once to see if container preempted
    cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
    FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
    // App1 has 7 containers, and app2 has 1 containers (no container preempted)
    Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
    Assert.assertEquals(1, schedulerApp2.getLiveContainers().size());
    // Do allocation again, nothing will be preempted
    cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
    // App1 has 7 containers, and app2 has 1 containers (no container allocated)
    Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
    Assert.assertEquals(1, schedulerApp2.getLiveContainers().size());
    rm1.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) SchedulingEditPolicy(org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) PreemptionManager(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptionManager) Test(org.junit.Test)

Example 55 with NodeUpdateSchedulerEvent

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.

the class TestCapacitySchedulerSurgicalPreemption method testSimpleSurgicalPreemption.

@Test(timeout = 60000)
public void testSimpleSurgicalPreemption() throws Exception {
    /**
     * Test case: Submit two application (app1/app2) to different queues, queue
     * structure:
     *
     * <pre>
     *             Root
     *            /  |  \
     *           a   b   c
     *          10   20  70
     * </pre>
     *
     * 1) Two nodes (n1/n2) in the cluster, each of them has 20G.
     *
     * 2) app1 submit to queue-a first, it asked 32 * 1G containers
     * We will allocate 16 on n1 and 16 on n2.
     *
     * 3) app2 submit to queue-c, ask for one 1G container (for AM)
     *
     * 4) app2 asks for another 6G container, it will be reserved on n1
     *
     * Now: we have:
     * n1: 17 from app1, 1 from app2, and 1 reserved from app2
     * n2: 16 from app1.
     *
     * After preemption, we should expect:
     * Preempt 4 containers from app1 on n1.
     */
    MockRM rm1 = new MockRM(conf);
    rm1.getRMContext().setNodeLabelManager(mgr);
    rm1.start();
    MockNM nm1 = rm1.registerNode("h1:1234", 20 * GB);
    MockNM nm2 = rm1.registerNode("h2:1234", 20 * GB);
    CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
    RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
    // launch an app to queue, AM container should be launched in nm1
    RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    am1.allocate("*", 1 * GB, 32, new ArrayList<ContainerId>());
    // Do allocation for node1/node2
    for (int i = 0; i < 32; i++) {
        cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
        cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
    }
    // App1 should have 33 containers now
    FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
    Assert.assertEquals(33, schedulerApp1.getLiveContainers().size());
    // 17 from n1 and 16 from n2
    waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode1.getNodeID()), am1.getApplicationAttemptId(), 17);
    waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode2.getNodeID()), am1.getApplicationAttemptId(), 16);
    // Submit app2 to queue-c and asks for a 1G container for AM
    RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "c");
    MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1);
    // NM1/NM2 has available resource = 2G/4G
    Assert.assertEquals(2 * GB, cs.getNode(nm1.getNodeId()).getUnallocatedResource().getMemorySize());
    Assert.assertEquals(4 * GB, cs.getNode(nm2.getNodeId()).getUnallocatedResource().getMemorySize());
    // AM asks for a 1 * GB container
    am2.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), ResourceRequest.ANY, Resources.createResource(6 * GB), 1)), null);
    // Call allocation once on n1, we should expect the container reserved on n1
    cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
    Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer());
    // Get edit policy and do one update
    SchedulingEditPolicy editPolicy = getSchedulingEditPolicy(rm1);
    // Call edit schedule twice, and check if 4 containers from app1 at n1 killed
    editPolicy.editSchedule();
    editPolicy.editSchedule();
    waitNumberOfLiveContainersFromApp(schedulerApp1, 29);
    // 13 from n1 (4 preempted) and 16 from n2
    waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode1.getNodeID()), am1.getApplicationAttemptId(), 13);
    waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode2.getNodeID()), am1.getApplicationAttemptId(), 16);
    rm1.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) SchedulingEditPolicy(org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) Test(org.junit.Test)

Aggregations

NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)105 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)98 Test (org.junit.Test)93 NodeAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent)61 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)53 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)42 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)40 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)38 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)36 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)35 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)31 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)17 FileWriter (java.io.FileWriter)16 PrintWriter (java.io.PrintWriter)16 AppAttemptRemovedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent)15 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)12 NodeRemovedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent)12 ArrayList (java.util.ArrayList)11 Resource (org.apache.hadoop.yarn.api.records.Resource)10 AppAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent)10