use of org.apache.hadoop.yarn.server.resourcemanager.MockAM in project hadoop by apache.
the class TestCapacitySchedulerLazyPreemption method testPreemptionPolicyShouldRespectAlreadyMarkedKillableContainers.
@Test(timeout = 60000)
public void testPreemptionPolicyShouldRespectAlreadyMarkedKillableContainers() throws Exception {
/**
* Test case:
* <pre>
* Root
* / | \
* a b c
* 10 20 70
* </pre>
* Submit applications to two queues, one uses more than the other, so
* preemption will happen.
*
* Check:
* 1) Killable containers resources will be excluded from PCPP (no duplicated
* container added to killable list)
* 2) When more resources need to be preempted, new containers will be selected
* and killable containers will be considered
*/
MockRM rm1 = new MockRM(conf);
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
// launch an app to queue, AM container should be launched in nm1
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
am1.allocate("*", 1 * GB, 6, new ArrayList<ContainerId>());
// Do allocation 6 times for node1
for (int i = 0; i < 6; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
}
// App1 should have 7 containers now, and no available resource for cluster
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
// Submit app2 to queue-c and asks for a 1G container for AM
RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "c");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1);
// NM1 has available resource = 0G
Assert.assertEquals(0 * GB, cs.getNode(nm1.getNodeId()).getUnallocatedResource().getMemorySize());
am2.allocate("*", 1 * GB, 1, new ArrayList<ContainerId>());
// Get edit policy and do one update
ProportionalCapacityPreemptionPolicy editPolicy = (ProportionalCapacityPreemptionPolicy) getSchedulingEditPolicy(rm1);
// Call edit schedule twice, and check if one container from app1 marked
// to be "killable"
editPolicy.editSchedule();
editPolicy.editSchedule();
PreemptionManager pm = cs.getPreemptionManager();
waitKillableContainersSize(pm, "a", RMNodeLabelsManager.NO_LABEL, 1);
// Check killable containers and to-be-preempted containers in edit policy
Assert.assertEquals(0, editPolicy.getToPreemptContainers().size());
// Run edit schedule again, confirm status doesn't changed
editPolicy.editSchedule();
Assert.assertEquals(0, editPolicy.getToPreemptContainers().size());
// Save current to kill containers
Set<ContainerId> previousKillableContainers = new HashSet<>(pm.getKillableContainersMap("a", RMNodeLabelsManager.NO_LABEL).keySet());
// Update request resource of c from 1 to 2, so we need to preempt
// one more container
am2.allocate("*", 1 * GB, 2, new ArrayList<ContainerId>());
// Call editPolicy.editSchedule() once, we should have 1 container in to-preempt map
// and 1 container in killable map
editPolicy.editSchedule();
Assert.assertEquals(1, editPolicy.getToPreemptContainers().size());
// Call editPolicy.editSchedule() once more, we should have 2 containers killable map
editPolicy.editSchedule();
Assert.assertEquals(0, editPolicy.getToPreemptContainers().size());
// Check if previous killable containers included by new killable containers
Map<ContainerId, RMContainer> killableContainers = waitKillableContainersSize(pm, "a", RMNodeLabelsManager.NO_LABEL, 2);
Assert.assertTrue(Sets.difference(previousKillableContainers, killableContainers.keySet()).isEmpty());
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockAM in project hadoop by apache.
the class TestCapacitySchedulerLazyPreemption method testPreemptionConsidersUserLimit.
@Test(timeout = 60000)
public void testPreemptionConsidersUserLimit() throws Exception {
/**
* Test case: Submit two application (app1/app2) to different queues, queue
* structure:
*
* <pre>
* Root
* / | \
* a b c
* 10 20 70
* </pre>
*
* Queue-c's user-limit-factor = 0.1, so single user cannot allocate >1 containers in queue-c
*
* 1) Two nodes in the cluster, each of them has 4G.
*
* 2) app1 submit to queue-a first, it asked 7 * 1G containers, so there's no
* more resource available.
*
* 3) app2 submit to queue-c, ask for one 1G container (for AM)
*
* Now the cluster is fulfilled.
*
* 4) app2 asks for another 1G container, system will preempt one container
* from app1, and app2 will receive the preempted container
*/
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(conf);
csConf.setUserLimitFactor(CapacitySchedulerConfiguration.ROOT + ".c", 0.1f);
MockRM rm1 = new MockRM(csConf);
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 4 * GB);
MockNM nm2 = rm1.registerNode("h2:1234", 4 * GB);
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
// launch an app to queue, AM container should be launched in nm1
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
am1.allocate("*", 1 * GB, 6, new ArrayList<ContainerId>());
// Do allocation 3 times for node1/node2
for (int i = 0; i < 3; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
}
// App1 should have 7 containers now, and no available resource for cluster
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
// Submit app2 to queue-c and asks for a 1G container for AM
RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "c");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
// NM1/NM2 has available resource = 0G
Assert.assertEquals(0 * GB, cs.getNode(nm1.getNodeId()).getUnallocatedResource().getMemorySize());
Assert.assertEquals(0 * GB, cs.getNode(nm2.getNodeId()).getUnallocatedResource().getMemorySize());
// AM asks for a 1 * GB container
am2.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), ResourceRequest.ANY, Resources.createResource(1 * GB), 1)), null);
// Get edit policy and do one update
SchedulingEditPolicy editPolicy = getSchedulingEditPolicy(rm1);
// Call edit schedule twice, and check if no container from app1 marked
// to be "killable"
editPolicy.editSchedule();
editPolicy.editSchedule();
// No preemption happens
PreemptionManager pm = cs.getPreemptionManager();
Map<ContainerId, RMContainer> killableContainers = waitKillableContainersSize(pm, "a", RMNodeLabelsManager.NO_LABEL, 0);
Assert.assertEquals(0, killableContainers.size());
// Call CS.handle once to see if container preempted
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
// App1 has 7 containers, and app2 has 1 containers (nothing preempted)
Assert.assertEquals(7, schedulerApp1.getLiveContainers().size());
Assert.assertEquals(1, schedulerApp2.getLiveContainers().size());
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockAM in project hadoop by apache.
the class TestCapacitySchedulerNodeLabelUpdate method testMoveApplicationWithLabel.
@Test(timeout = 300000)
public void testMoveApplicationWithLabel() throws Exception {
// set node -> label
mgr.addToCluserNodeLabelsWithDefaultExclusivity(ImmutableSet.of("x", "y", "z"));
// set mapping:
// h1 -> x
// h2 -> y
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x")));
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h2", 0), toSet("y")));
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h4", 0), toSet("z")));
// inject node label manager
MockRM rm = new MockRM(getConfigurationWithSubQueueLabels(conf)) {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return mgr;
}
};
rm.getRMContext().getContainerTokenSecretManager().rollMasterKey();
rm.getRMContext().getNMTokenSecretManager().rollMasterKey();
rm.getRMContext().setNodeLabelManager(mgr);
rm.start();
MockNM nm1 = rm.registerNode("h1:1234", 4096 * 2);
MockNM nm2 = rm.registerNode("h2:1234", 4096 * 2);
MockNM nm3 = rm.registerNode("h3:1234", 4096 * 2);
MockNM nm4 = rm.registerNode("h4:1234", 4096 * 2);
// launch an app to queue a1 (label = x), and check all container will
// be allocated in h1
RMApp app1 = rm.submitApp(GB, "app", "user", null, "a1");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm3);
am1.allocate("*", GB, 1, new ArrayList<ContainerId>(), "x");
ContainerId container1 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
rm.waitForState(nm1, container1, RMContainerState.ALLOCATED, 10 * 1000);
am1.allocate("*", GB, 1, new ArrayList<ContainerId>(), "y");
ContainerId container2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 3);
rm.waitForState(nm2, container2, RMContainerState.ALLOCATED, 10 * 1000);
CapacityScheduler scheduler = ((CapacityScheduler) rm.getResourceScheduler());
try {
scheduler.preValidateMoveApplication(app1.getApplicationId(), "a2");
scheduler.moveApplication(app1.getApplicationId(), "a2");
fail("Should throw exception since target queue doesnt have " + "required labels");
} catch (Exception e) {
Assert.assertTrue("Yarn Exception should be thrown", e instanceof YarnException);
Assert.assertEquals("Specified queue=a2 can't satisfy " + "following apps label expressions =[x] accessible " + "node labels =[y]", e.getMessage());
}
try {
scheduler.moveApplication(app1.getApplicationId(), "a3");
scheduler.moveApplication(app1.getApplicationId(), "a4");
// Check move to queue with accessible label ANY
scheduler.moveApplication(app1.getApplicationId(), "b");
} catch (Exception e) {
fail("Should not throw exception since target queue has " + "required labels");
}
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockAM in project hadoop by apache.
the class TestCapacitySchedulerNodeLabelUpdate method testComplexResourceUsageWhenNodeUpdatesPartition.
@Test(timeout = 60000)
public void testComplexResourceUsageWhenNodeUpdatesPartition() throws Exception {
/*
* This test is similar to testResourceUsageWhenNodeUpdatesPartition, this
* will include multiple applications, multiple users and multiple
* containers running on a single node, size of each container is 1G
*
* Node 1
* ------
* App1-container3
* App2-container2
* App2-Container3
*
* Node 2
* ------
* App2-container1
* App1-container1
* App1-container2
*/
// set node -> label
mgr.addToCluserNodeLabelsWithDefaultExclusivity(ImmutableSet.of("x", "y", "z"));
// set mapping:
// h1 -> x
// h2 -> y
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x")));
// inject node label manager
MockRM rm = new MockRM(getConfigurationWithQueueLabels(conf)) {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return mgr;
}
};
rm.getRMContext().setNodeLabelManager(mgr);
rm.start();
MockNM nm1 = rm.registerNode("h1:1234", 80000);
MockNM nm2 = rm.registerNode("h2:1234", 80000);
// app1
RMApp app1 = rm.submitApp(GB, "app", "u1", null, "a");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
// c2 on n1, c3 on n2
am1.allocate("*", GB, 1, new ArrayList<ContainerId>(), "x");
ContainerId containerId = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
Assert.assertTrue(rm.waitForState(nm1, containerId, RMContainerState.ALLOCATED));
am1.allocate("*", GB, 1, new ArrayList<ContainerId>());
containerId = ContainerId.newContainerId(am1.getApplicationAttemptId(), 3);
Assert.assertTrue(rm.waitForState(nm2, containerId, RMContainerState.ALLOCATED));
// app2
RMApp app2 = rm.submitApp(GB, "app", "u2", null, "a");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm, nm2);
// c2/c3 on n1
am2.allocate("*", GB, 2, new ArrayList<ContainerId>(), "x");
containerId = ContainerId.newContainerId(am2.getApplicationAttemptId(), 3);
Assert.assertTrue(rm.waitForState(nm1, containerId, RMContainerState.ALLOCATED));
// check used resource:
// queue-a used x=1G, ""=1G
checkUsedResource(rm, "a", 3 * GB, "x");
checkUsedResource(rm, "a", 3 * GB);
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
FiCaSchedulerApp application1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
FiCaSchedulerApp application2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
// change h1's label to z
cs.handle(new NodeLabelsUpdateSchedulerEvent(ImmutableMap.of(nm1.getNodeId(), toSet("z"))));
checkUsedResource(rm, "a", 0, "x");
checkUsedResource(rm, "a", 3 * GB, "z");
checkUsedResource(rm, "a", 3 * GB);
checkUsedResource(rm, "root", 0, "x");
checkUsedResource(rm, "root", 3 * GB, "z");
checkUsedResource(rm, "root", 3 * GB);
checkUserUsedResource(rm, "a", "u1", "x", 0 * GB);
checkUserUsedResource(rm, "a", "u1", "z", 1 * GB);
checkUserUsedResource(rm, "a", "u1", "", 2 * GB);
checkUserUsedResource(rm, "a", "u2", "x", 0 * GB);
checkUserUsedResource(rm, "a", "u2", "z", 2 * GB);
checkUserUsedResource(rm, "a", "u2", "", 1 * GB);
Assert.assertEquals(0, application1.getAppAttemptResourceUsage().getUsed("x").getMemorySize());
Assert.assertEquals(1 * GB, application1.getAppAttemptResourceUsage().getUsed("z").getMemorySize());
Assert.assertEquals(2 * GB, application1.getAppAttemptResourceUsage().getUsed("").getMemorySize());
Assert.assertEquals(0, application2.getAppAttemptResourceUsage().getUsed("x").getMemorySize());
Assert.assertEquals(2 * GB, application2.getAppAttemptResourceUsage().getUsed("z").getMemorySize());
Assert.assertEquals(1 * GB, application2.getAppAttemptResourceUsage().getUsed("").getMemorySize());
rm.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockAM in project hadoop by apache.
the class TestCapacitySchedulerSurgicalPreemption method testPriorityPreemptionRequiresMoveReservation.
@Test(timeout = 300000)
public void testPriorityPreemptionRequiresMoveReservation() throws Exception {
/**
* Test case: Submit two application (app1/app2) to different queues, queue
* structure:
*
* <pre>
* Root
* / | \
* a b c
* 10 20 70
* </pre>
*
* 1) 3 nodes in the cluster, 10G for each
*
* 2) app1 submit to queue-b first, it asked 2G each,
* it can get 2G on n1 (AM), 2 * 2G on n2
*
* 3) app2 submit to queue-c, with 2G AM container (allocated on n3)
* app2 requires 9G resource, which will be reserved on n3
*
* We should expect container unreserved from n3 and allocated on n1/n2
*/
conf.setPUOrderingPolicyUnderUtilizedPreemptionEnabled(true);
conf.setPUOrderingPolicyUnderUtilizedPreemptionDelay(1000);
conf.setQueueOrderingPolicy(CapacitySchedulerConfiguration.ROOT, CapacitySchedulerConfiguration.QUEUE_PRIORITY_UTILIZATION_ORDERING_POLICY);
conf.setPUOrderingPolicyUnderUtilizedPreemptionMoveReservation(true);
// Queue c has higher priority than a/b
conf.setQueuePriority(CapacitySchedulerConfiguration.ROOT + ".c", 1);
MockRM rm1 = new MockRM(conf);
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB);
MockNM nm2 = rm1.registerNode("h2:1234", 10 * GB);
MockNM nm3 = rm1.registerNode("h3:1234", 10 * GB);
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
RMNode rmNode3 = rm1.getRMContext().getRMNodes().get(nm3.getNodeId());
// launch an app to queue, AM container should be launched in nm1
RMApp app1 = rm1.submitApp(2 * GB, "app", "user", null, "b");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
am1.allocate("*", 2 * GB, 2, new ArrayList<>());
// Do allocation for node2 twice
for (int i = 0; i < 2; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
}
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
Assert.assertEquals(3, schedulerApp1.getLiveContainers().size());
// 1 from n1 and 2 from n2
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode1.getNodeID()), am1.getApplicationAttemptId(), 1);
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode2.getNodeID()), am1.getApplicationAttemptId(), 2);
// Submit app2 to queue-c and asks for a 2G container for AM, on n3
RMApp app2 = rm1.submitApp(2 * GB, "app", "user", null, "c");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm3);
FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(ApplicationAttemptId.newInstance(app2.getApplicationId(), 1));
// Asks 1 * 9G container
am2.allocate("*", 9 * GB, 1, new ArrayList<>());
// Do allocation for node3 once
cs.handle(new NodeUpdateSchedulerEvent(rmNode3));
// Make sure container reserved on node3
Assert.assertNotNull(cs.getNode(rmNode3.getNodeID()).getReservedContainer());
// Call editSchedule immediately: nothing happens
ProportionalCapacityPreemptionPolicy editPolicy = (ProportionalCapacityPreemptionPolicy) getSchedulingEditPolicy(rm1);
editPolicy.editSchedule();
Assert.assertNotNull(cs.getNode(rmNode3.getNodeID()).getReservedContainer());
// Sleep the timeout interval, we should be able to see reserved container
// moved to n2 (n1 occupied by AM)
Thread.sleep(1000);
editPolicy.editSchedule();
Assert.assertNull(cs.getNode(rmNode3.getNodeID()).getReservedContainer());
Assert.assertNotNull(cs.getNode(rmNode2.getNodeID()).getReservedContainer());
Assert.assertEquals(am2.getApplicationAttemptId(), cs.getNode(rmNode2.getNodeID()).getReservedContainer().getApplicationAttemptId());
// Do it again, we should see containers marked to be preempt
editPolicy.editSchedule();
Assert.assertEquals(2, editPolicy.getToPreemptContainers().size());
// Call editSchedule again: selected containers are killed
editPolicy.editSchedule();
// Do allocation till reserved container allocated
while (schedulerApp2.getLiveContainers().size() < 2) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
Thread.sleep(200);
}
waitNumberOfLiveContainersFromApp(schedulerApp1, 1);
rm1.close();
}
Aggregations