use of org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy in project hadoop by apache.
the class TestCapacitySchedulerLazyPreemption method testPreemptionConsidersNodeLocalityDelay.
@Test(timeout = 60000)
public void testPreemptionConsidersNodeLocalityDelay() throws Exception {
/**
* Test case: same as testSimplePreemption steps 1-3.
*
* Step 4: app2 asks for 1G container with locality specified, so it needs
* to wait for missed-opportunity before get scheduled.
* Check if system waits missed-opportunity before finish killable container
*/
MockRM rm1 = new MockRM(conf);
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 4 * GB);
MockNM nm2 = rm1.registerNode("h2:1234", 4 * GB);
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
// launch an app to queue, AM container should be launched in nm1
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
am1.allocate("*", 1 * GB, 6, new ArrayList<ContainerId>());
// Do allocation 3 times for node1/node2
for (int i = 0; i < 3; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
}
// App1 should have 7 containers now, and no available resource for cluster
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
// Submit app2 to queue-c and asks for a 1G container for AM
RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "c");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
// NM1/NM2 has available resource = 0G
Assert.assertEquals(0 * GB, cs.getNode(nm1.getNodeId()).getUnallocatedResource().getMemorySize());
Assert.assertEquals(0 * GB, cs.getNode(nm2.getNodeId()).getUnallocatedResource().getMemorySize());
// AM asks for a 1 * GB container with unknown host and unknown rack
am2.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), ResourceRequest.ANY, Resources.createResource(1 * GB), 1), ResourceRequest.newInstance(Priority.newInstance(1), "unknownhost", Resources.createResource(1 * GB), 1), ResourceRequest.newInstance(Priority.newInstance(1), "/default-rack", Resources.createResource(1 * GB), 1)), null);
// Get edit policy and do one update
SchedulingEditPolicy editPolicy = getSchedulingEditPolicy(rm1);
// Call edit schedule twice, and check if one container from app1 marked
// to be "killable"
editPolicy.editSchedule();
editPolicy.editSchedule();
PreemptionManager pm = cs.getPreemptionManager();
Map<ContainerId, RMContainer> killableContainers = waitKillableContainersSize(pm, "a", RMNodeLabelsManager.NO_LABEL, 1);
Assert.assertEquals(killableContainers.entrySet().iterator().next().getKey().getApplicationAttemptId(), am1.getApplicationAttemptId());
// Call CS.handle once to see if container preempted
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
// App1 has 7 containers, and app2 has 1 containers (no container preempted)
Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
Assert.assertEquals(1, schedulerApp2.getLiveContainers().size());
// Do allocation again, one container will be preempted
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
// App1 has 6 containers, and app2 has 2 containers (new container allocated)
Assert.assertEquals(6, schedulerApp1.getLiveContainers().size());
Assert.assertEquals(2, schedulerApp2.getLiveContainers().size());
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy in project hadoop by apache.
the class TestCapacitySchedulerLazyPreemption method testPreemptionConsidersUserLimit.
@Test(timeout = 60000)
public void testPreemptionConsidersUserLimit() throws Exception {
/**
* Test case: Submit two application (app1/app2) to different queues, queue
* structure:
*
* <pre>
* Root
* / | \
* a b c
* 10 20 70
* </pre>
*
* Queue-c's user-limit-factor = 0.1, so single user cannot allocate >1 containers in queue-c
*
* 1) Two nodes in the cluster, each of them has 4G.
*
* 2) app1 submit to queue-a first, it asked 7 * 1G containers, so there's no
* more resource available.
*
* 3) app2 submit to queue-c, ask for one 1G container (for AM)
*
* Now the cluster is fulfilled.
*
* 4) app2 asks for another 1G container, system will preempt one container
* from app1, and app2 will receive the preempted container
*/
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(conf);
csConf.setUserLimitFactor(CapacitySchedulerConfiguration.ROOT + ".c", 0.1f);
MockRM rm1 = new MockRM(csConf);
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 4 * GB);
MockNM nm2 = rm1.registerNode("h2:1234", 4 * GB);
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
// launch an app to queue, AM container should be launched in nm1
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
am1.allocate("*", 1 * GB, 6, new ArrayList<ContainerId>());
// Do allocation 3 times for node1/node2
for (int i = 0; i < 3; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
}
// App1 should have 7 containers now, and no available resource for cluster
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
// Submit app2 to queue-c and asks for a 1G container for AM
RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "c");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
// NM1/NM2 has available resource = 0G
Assert.assertEquals(0 * GB, cs.getNode(nm1.getNodeId()).getUnallocatedResource().getMemorySize());
Assert.assertEquals(0 * GB, cs.getNode(nm2.getNodeId()).getUnallocatedResource().getMemorySize());
// AM asks for a 1 * GB container
am2.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), ResourceRequest.ANY, Resources.createResource(1 * GB), 1)), null);
// Get edit policy and do one update
SchedulingEditPolicy editPolicy = getSchedulingEditPolicy(rm1);
// Call edit schedule twice, and check if no container from app1 marked
// to be "killable"
editPolicy.editSchedule();
editPolicy.editSchedule();
// No preemption happens
PreemptionManager pm = cs.getPreemptionManager();
Map<ContainerId, RMContainer> killableContainers = waitKillableContainersSize(pm, "a", RMNodeLabelsManager.NO_LABEL, 0);
Assert.assertEquals(0, killableContainers.size());
// Call CS.handle once to see if container preempted
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
// App1 has 7 containers, and app2 has 1 containers (nothing preempted)
Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
Assert.assertEquals(1, schedulerApp2.getLiveContainers().size());
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy in project hadoop by apache.
the class TestCapacitySchedulerLazyPreemption method testSimplePreemption.
@Test(timeout = 60000)
public void testSimplePreemption() throws Exception {
/**
* Test case: Submit two application (app1/app2) to different queues, queue
* structure:
*
* <pre>
* Root
* / | \
* a b c
* 10 20 70
* </pre>
*
* 1) Two nodes in the cluster, each of them has 4G.
*
* 2) app1 submit to queue-a first, it asked 7 * 1G containers, so there's no
* more resource available.
*
* 3) app2 submit to queue-c, ask for one 1G container (for AM)
*
* Now the cluster is fulfilled.
*
* 4) app2 asks for another 1G container, system will preempt one container
* from app1, and app2 will receive the preempted container
*/
MockRM rm1 = new MockRM(conf);
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 4 * GB);
MockNM nm2 = rm1.registerNode("h2:1234", 4 * GB);
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
// launch an app to queue, AM container should be launched in nm1
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
am1.allocate("*", 1 * GB, 7, new ArrayList<ContainerId>());
// Do allocation 3 times for node1/node2
for (int i = 0; i < 3; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
}
// App1 should have 7 containers now, and no available resource for cluster
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
// Submit app2 to queue-c and asks for a 1G container for AM
RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "c");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
// NM1/NM2 has available resource = 0G
Assert.assertEquals(0 * GB, cs.getNode(nm1.getNodeId()).getUnallocatedResource().getMemorySize());
Assert.assertEquals(0 * GB, cs.getNode(nm2.getNodeId()).getUnallocatedResource().getMemorySize());
// AM asks for a 1 * GB container
am2.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), ResourceRequest.ANY, Resources.createResource(1 * GB), 1)), null);
// Get edit policy and do one update
SchedulingEditPolicy editPolicy = getSchedulingEditPolicy(rm1);
// Call edit schedule twice, and check if one container from app1 marked
// to be "killable"
editPolicy.editSchedule();
editPolicy.editSchedule();
PreemptionManager pm = cs.getPreemptionManager();
Map<ContainerId, RMContainer> killableContainers = waitKillableContainersSize(pm, "a", RMNodeLabelsManager.NO_LABEL, 1);
Assert.assertEquals(1, killableContainers.size());
Assert.assertEquals(killableContainers.entrySet().iterator().next().getKey().getApplicationAttemptId(), am1.getApplicationAttemptId());
// Call CS.handle once to see if container preempted
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
// App1 has 6 containers, and app2 has 2 containers
Assert.assertEquals(6, schedulerApp1.getLiveContainers().size());
Assert.assertEquals(2, schedulerApp2.getLiveContainers().size());
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy in project hadoop by apache.
the class TestCapacitySchedulerLazyPreemption method testPreemptionConsidersHardNodeLocality.
@Test(timeout = 60000)
public void testPreemptionConsidersHardNodeLocality() throws Exception {
/**
* Test case: same as testSimplePreemption steps 1-3.
*
* Step 4: app2 asks for 1G container with hard locality specified, and
* asked host is not existed
* Confirm system doesn't preempt any container.
*/
MockRM rm1 = new MockRM(conf);
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 4 * GB);
MockNM nm2 = rm1.registerNode("h2:1234", 4 * GB);
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
// launch an app to queue, AM container should be launched in nm1
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
am1.allocate("*", 1 * GB, 6, new ArrayList<ContainerId>());
// Do allocation 3 times for node1/node2
for (int i = 0; i < 3; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
}
for (int i = 0; i < 3; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
}
// App1 should have 7 containers now, and no available resource for cluster
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
// Submit app2 to queue-c and asks for a 1G container for AM
RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "c");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
// NM1/NM2 has available resource = 0G
Assert.assertEquals(0 * GB, cs.getNode(nm1.getNodeId()).getUnallocatedResource().getMemorySize());
Assert.assertEquals(0 * GB, cs.getNode(nm2.getNodeId()).getUnallocatedResource().getMemorySize());
// AM asks for a 1 * GB container for h3 with hard locality,
// h3 doesn't exist in the cluster
am2.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), ResourceRequest.ANY, Resources.createResource(1 * GB), 1, true), ResourceRequest.newInstance(Priority.newInstance(1), "h3", Resources.createResource(1 * GB), 1, false), ResourceRequest.newInstance(Priority.newInstance(1), "/default-rack", Resources.createResource(1 * GB), 1, false)), null);
// Get edit policy and do one update
SchedulingEditPolicy editPolicy = getSchedulingEditPolicy(rm1);
// Call edit schedule twice, and check if one container from app1 marked
// to be "killable"
editPolicy.editSchedule();
editPolicy.editSchedule();
PreemptionManager pm = cs.getPreemptionManager();
Map<ContainerId, RMContainer> killableContainers = waitKillableContainersSize(pm, "a", RMNodeLabelsManager.NO_LABEL, 1);
Assert.assertEquals(killableContainers.entrySet().iterator().next().getKey().getApplicationAttemptId(), am1.getApplicationAttemptId());
// Call CS.handle once to see if container preempted
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
// App1 has 7 containers, and app2 has 1 containers (no container preempted)
Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
Assert.assertEquals(1, schedulerApp2.getLiveContainers().size());
// Do allocation again, nothing will be preempted
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
// App1 has 7 containers, and app2 has 1 containers (no container allocated)
Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
Assert.assertEquals(1, schedulerApp2.getLiveContainers().size());
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy in project hadoop by apache.
the class TestCapacitySchedulerSurgicalPreemption method testSimpleSurgicalPreemption.
@Test(timeout = 60000)
public void testSimpleSurgicalPreemption() throws Exception {
/**
* Test case: Submit two application (app1/app2) to different queues, queue
* structure:
*
* <pre>
* Root
* / | \
* a b c
* 10 20 70
* </pre>
*
* 1) Two nodes (n1/n2) in the cluster, each of them has 20G.
*
* 2) app1 submit to queue-a first, it asked 32 * 1G containers
* We will allocate 16 on n1 and 16 on n2.
*
* 3) app2 submit to queue-c, ask for one 1G container (for AM)
*
* 4) app2 asks for another 6G container, it will be reserved on n1
*
* Now: we have:
* n1: 17 from app1, 1 from app2, and 1 reserved from app2
* n2: 16 from app1.
*
* After preemption, we should expect:
* Preempt 4 containers from app1 on n1.
*/
MockRM rm1 = new MockRM(conf);
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 20 * GB);
MockNM nm2 = rm1.registerNode("h2:1234", 20 * GB);
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
// launch an app to queue, AM container should be launched in nm1
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
am1.allocate("*", 1 * GB, 32, new ArrayList<ContainerId>());
// Do allocation for node1/node2
for (int i = 0; i < 32; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
}
// App1 should have 33 containers now
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
Assert.assertEquals(33, schedulerApp1.getLiveContainers().size());
// 17 from n1 and 16 from n2
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode1.getNodeID()), am1.getApplicationAttemptId(), 17);
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode2.getNodeID()), am1.getApplicationAttemptId(), 16);
// Submit app2 to queue-c and asks for a 1G container for AM
RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "c");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1);
// NM1/NM2 has available resource = 2G/4G
Assert.assertEquals(2 * GB, cs.getNode(nm1.getNodeId()).getUnallocatedResource().getMemorySize());
Assert.assertEquals(4 * GB, cs.getNode(nm2.getNodeId()).getUnallocatedResource().getMemorySize());
// AM asks for a 1 * GB container
am2.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), ResourceRequest.ANY, Resources.createResource(6 * GB), 1)), null);
// Call allocation once on n1, we should expect the container reserved on n1
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer());
// Get edit policy and do one update
SchedulingEditPolicy editPolicy = getSchedulingEditPolicy(rm1);
// Call edit schedule twice, and check if 4 containers from app1 at n1 killed
editPolicy.editSchedule();
editPolicy.editSchedule();
waitNumberOfLiveContainersFromApp(schedulerApp1, 29);
// 13 from n1 (4 preempted) and 16 from n2
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode1.getNodeID()), am1.getApplicationAttemptId(), 13);
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode2.getNodeID()), am1.getApplicationAttemptId(), 16);
rm1.close();
}
Aggregations