use of org.apache.hadoop.yarn.server.resourcemanager.MockAM in project hadoop by apache.
the class TestCapacitySchedulerSurgicalPreemption method testPriorityPreemptionFromHighestPriorityQueueAndOldestContainer.
@Test(timeout = 600000)
public void testPriorityPreemptionFromHighestPriorityQueueAndOldestContainer() throws Exception {
/**
* Test case: Submit two application (app1/app2) to different queues, queue
* structure:
*
* <pre>
* Root
* / | \
* a b c
* 45 45 10
* </pre>
*
* Priority of queue_a = 1
* Priority of queue_b = 2
*
* 1) 5 nodes (n0-n4) in the cluster, each of them has 4G.
*
* 2) app1 submit to queue-c first (AM=1G), it asked 4 * 1G containers
* We will allocate 1 container on each of n0-n4. AM on n4.
*
* 3) app2 submit to queue-a, AM container=0.5G, allocated on n0
* Ask for 2 * 3.5G containers. (Reserved on n0/n1)
*
* 4) app2 submit to queue-b, AM container=0.5G, allocated on n2
* Ask for 2 * 3.5G containers. (Reserved on n2/n3)
*
* First we will preempt container on n2 since it is the oldest container of
* Highest priority queue (b)
*/
// Total preemption = 1G per round, which is 5% of cluster resource (20G)
conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND, 0.05f);
conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512);
conf.setPUOrderingPolicyUnderUtilizedPreemptionEnabled(true);
conf.setPUOrderingPolicyUnderUtilizedPreemptionDelay(1000);
conf.setQueueOrderingPolicy(CapacitySchedulerConfiguration.ROOT, CapacitySchedulerConfiguration.QUEUE_PRIORITY_UTILIZATION_ORDERING_POLICY);
// A/B has higher priority
conf.setQueuePriority(CapacitySchedulerConfiguration.ROOT + ".a", 1);
conf.setQueuePriority(CapacitySchedulerConfiguration.ROOT + ".b", 2);
conf.setCapacity(CapacitySchedulerConfiguration.ROOT + ".a", 45f);
conf.setCapacity(CapacitySchedulerConfiguration.ROOT + ".b", 45f);
conf.setCapacity(CapacitySchedulerConfiguration.ROOT + ".c", 10f);
MockRM rm1 = new MockRM(conf);
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
MockNM[] mockNMs = new MockNM[5];
for (int i = 0; i < 5; i++) {
mockNMs[i] = rm1.registerNode("h" + i + ":1234", 4 * GB);
}
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode[] rmNodes = new RMNode[5];
for (int i = 0; i < 5; i++) {
rmNodes[i] = rm1.getRMContext().getRMNodes().get(mockNMs[i].getNodeId());
}
// launch an app to queue, AM container should be launched in nm1
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "c");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, mockNMs[4]);
am1.allocate("*", 1 * GB, 4, new ArrayList<>());
// Do allocation for nm1-nm8
for (int i = 0; i < 4; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNodes[i]));
}
// App1 should have 5 containers now, one for each node
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
Assert.assertEquals(5, schedulerApp1.getLiveContainers().size());
for (int i = 0; i < 5; i++) {
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNodes[i].getNodeID()), am1.getApplicationAttemptId(), 1);
}
// Submit app2 to queue-a and asks for a 0.5G container for AM (on n0)
RMApp app2 = rm1.submitApp(512, "app", "user", null, "a");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, mockNMs[0]);
FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(ApplicationAttemptId.newInstance(app2.getApplicationId(), 1));
// Ask 2 * 3.5GB containers
am2.allocate("*", 3 * GB + 512, 2, new ArrayList<>());
// Do allocation for n0-n1
for (int i = 0; i < 2; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNodes[i]));
}
// Check am2 reserved resource from nm0-nm1
for (int i = 0; i < 2; i++) {
Assert.assertNotNull("Should reserve on nm-" + i, cs.getNode(rmNodes[i].getNodeID()).getReservedContainer());
Assert.assertEquals(cs.getNode(rmNodes[i].getNodeID()).getReservedContainer().getQueueName(), "a");
}
// Submit app3 to queue-b and asks for a 0.5G container for AM (on n2)
RMApp app3 = rm1.submitApp(512, "app", "user", null, "b");
MockAM am3 = MockRM.launchAndRegisterAM(app3, rm1, mockNMs[2]);
FiCaSchedulerApp schedulerApp3 = cs.getApplicationAttempt(ApplicationAttemptId.newInstance(app3.getApplicationId(), 1));
// Ask 2 * 3.5GB containers
am3.allocate("*", 3 * GB + 512, 2, new ArrayList<>());
// Do allocation for n2-n3
for (int i = 2; i < 4; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNodes[i]));
}
// Check am2 reserved resource from nm2-nm3
for (int i = 2; i < 4; i++) {
Assert.assertNotNull("Should reserve on nm-" + i, cs.getNode(rmNodes[i].getNodeID()).getReservedContainer());
Assert.assertEquals(cs.getNode(rmNodes[i].getNodeID()).getReservedContainer().getQueueName(), "b");
}
// Sleep the timeout interval, we should be able to see 1 container selected
Thread.sleep(1000);
/* 1st container preempted is on n2 */
ProportionalCapacityPreemptionPolicy editPolicy = (ProportionalCapacityPreemptionPolicy) getSchedulingEditPolicy(rm1);
editPolicy.editSchedule();
// We should have one to-preempt container, on node[2]
Set<RMContainer> selectedToPreempt = editPolicy.getToPreemptContainers().keySet();
Assert.assertEquals(1, selectedToPreempt.size());
Assert.assertEquals(mockNMs[2].getNodeId(), selectedToPreempt.iterator().next().getAllocatedNode());
// Call editSchedule again: selected containers are killed
editPolicy.editSchedule();
waitNumberOfLiveContainersFromApp(schedulerApp1, 4);
// Make sure the container killed, then do allocation for all nms
for (int i = 0; i < 4; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNodes[i]));
}
waitNumberOfLiveContainersFromApp(schedulerApp1, 4);
waitNumberOfLiveContainersFromApp(schedulerApp2, 1);
waitNumberOfLiveContainersFromApp(schedulerApp3, 2);
/* 2nd container preempted is on n3 */
editPolicy.editSchedule();
// We should have one to-preempt container, on node[3]
selectedToPreempt = editPolicy.getToPreemptContainers().keySet();
Assert.assertEquals(1, selectedToPreempt.size());
Assert.assertEquals(mockNMs[3].getNodeId(), selectedToPreempt.iterator().next().getAllocatedNode());
// Call editSchedule again: selected containers are killed
editPolicy.editSchedule();
waitNumberOfLiveContainersFromApp(schedulerApp1, 3);
// Do allocation for all nms
for (int i = 0; i < 4; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNodes[i]));
}
waitNumberOfLiveContainersFromApp(schedulerApp1, 3);
waitNumberOfLiveContainersFromApp(schedulerApp2, 1);
waitNumberOfLiveContainersFromApp(schedulerApp3, 3);
/* 3rd container preempted is on n0 */
editPolicy.editSchedule();
// We should have one to-preempt container, on node[0]
selectedToPreempt = editPolicy.getToPreemptContainers().keySet();
Assert.assertEquals(1, selectedToPreempt.size());
Assert.assertEquals(mockNMs[0].getNodeId(), selectedToPreempt.iterator().next().getAllocatedNode());
// Call editSchedule again: selected containers are killed
editPolicy.editSchedule();
waitNumberOfLiveContainersFromApp(schedulerApp1, 2);
// Do allocation for all nms
for (int i = 0; i < 4; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNodes[i]));
}
waitNumberOfLiveContainersFromApp(schedulerApp1, 2);
waitNumberOfLiveContainersFromApp(schedulerApp2, 2);
waitNumberOfLiveContainersFromApp(schedulerApp3, 3);
/* 4th container preempted is on n1 */
editPolicy.editSchedule();
// We should have one to-preempt container, on node[0]
selectedToPreempt = editPolicy.getToPreemptContainers().keySet();
Assert.assertEquals(1, selectedToPreempt.size());
Assert.assertEquals(mockNMs[1].getNodeId(), selectedToPreempt.iterator().next().getAllocatedNode());
// Call editSchedule again: selected containers are killed
editPolicy.editSchedule();
waitNumberOfLiveContainersFromApp(schedulerApp1, 1);
// Do allocation for all nms
for (int i = 0; i < 4; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNodes[i]));
}
waitNumberOfLiveContainersFromApp(schedulerApp1, 1);
waitNumberOfLiveContainersFromApp(schedulerApp2, 3);
waitNumberOfLiveContainersFromApp(schedulerApp3, 3);
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockAM in project hadoop by apache.
the class TestContainerAllocation method testPendingResourcesConsideringUserLimit.
@Test
public void testPendingResourcesConsideringUserLimit() throws Exception {
// Set maximum capacity of A to 10
CapacitySchedulerConfiguration newConf = new CapacitySchedulerConfiguration(conf);
newConf.setUserLimitFactor(CapacitySchedulerConfiguration.ROOT + ".default", 0.5f);
newConf.setMaximumAMResourcePercentPerPartition(CapacitySchedulerConfiguration.ROOT + ".default", "", 1.0f);
MockRM rm1 = new MockRM(newConf);
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);
MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB);
// launch an app to queue default, AM container should be launched in nm1
RMApp app1 = rm1.submitApp(2 * GB, "app", "u1", null, "default");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// launch 2nd app to queue default, AM container should be launched in nm1
RMApp app2 = rm1.submitApp(4 * GB, "app", "u2", null, "default");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1);
// am1 asks 1 * 3G container
am1.allocate("*", 3 * GB, 1, null);
// am2 asks 4 * 5G container
am2.allocate("*", 5 * GB, 4, null);
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
// Do node heartbeats one, we expect one container allocated reserved on nm1
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
// App1 will get 1 container reserved
Assert.assertEquals(1, schedulerApp1.getReservedContainers().size());
/*
* Note that the behavior of appAttemptResourceUsage is different from queue's
* For queue, used = actual-used + reserved
* For app, used = actual-used.
*
* TODO (wangda): Need to make behaviors of queue/app's resource usage
* consistent
*/
Assert.assertEquals(2 * GB, schedulerApp1.getAppAttemptResourceUsage().getUsed().getMemorySize());
Assert.assertEquals(3 * GB, schedulerApp1.getAppAttemptResourceUsage().getReserved().getMemorySize());
Assert.assertEquals(3 * GB, schedulerApp1.getAppAttemptResourceUsage().getPending().getMemorySize());
FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
Assert.assertEquals(4 * GB, schedulerApp2.getAppAttemptResourceUsage().getUsed().getMemorySize());
Assert.assertEquals(0 * GB, schedulerApp2.getAppAttemptResourceUsage().getReserved().getMemorySize());
Assert.assertEquals(5 * 4 * GB, schedulerApp2.getAppAttemptResourceUsage().getPending().getMemorySize());
LeafQueue lq = (LeafQueue) cs.getQueue("default");
// UL = 8GB, so head room of u1 = 8GB - 2GB (AM) - 3GB (Reserved) = 3GB
// u2 = 8GB - 4GB = 4GB
// When not deduct reserved, total-pending = 3G (u1) + 4G (u2) = 7G
// deduct reserved, total-pending = 0G (u1) + 4G = 4G
Assert.assertEquals(7 * GB, lq.getTotalPendingResourcesConsideringUserLimit(Resources.createResource(20 * GB), "", false).getMemorySize());
Assert.assertEquals(4 * GB, lq.getTotalPendingResourcesConsideringUserLimit(Resources.createResource(20 * GB), "", true).getMemorySize());
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockAM in project hadoop by apache.
the class TestContainerAllocation method testNormalContainerAllocationWhenDNSUnavailable.
@Test
public void testNormalContainerAllocationWhenDNSUnavailable() throws Exception {
MockRM rm1 = new MockRM(conf);
rm1.start();
MockNM nm1 = rm1.registerNode("unknownhost:1234", 8000);
RMApp app1 = rm1.submitApp(200);
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// request a container.
am1.allocate("127.0.0.1", 1024, 1, new ArrayList<ContainerId>());
ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED);
// acquire the container.
SecurityUtilTestHelper.setTokenServiceUseIp(true);
List<Container> containers;
try {
containers = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
// not able to fetch the container;
Assert.assertEquals(0, containers.size());
} finally {
SecurityUtilTestHelper.setTokenServiceUseIp(false);
}
containers = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
// should be able to fetch the container;
Assert.assertEquals(1, containers.size());
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockAM in project hadoop by apache.
the class TestContainerAllocation method testExcessReservationThanNodeManagerCapacity.
@Test(timeout = 60000)
public void testExcessReservationThanNodeManagerCapacity() throws Exception {
@SuppressWarnings("resource") MockRM rm = new MockRM(conf);
rm.start();
// Register node1
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 2 * GB, 4);
MockNM nm2 = rm.registerNode("127.0.0.1:2234", 3 * GB, 4);
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
// wait..
int waitCount = 20;
int size = rm.getRMContext().getRMNodes().size();
while ((size = rm.getRMContext().getRMNodes().size()) != 2 && waitCount-- > 0) {
LOG.info("Waiting for node managers to register : " + size);
Thread.sleep(100);
}
Assert.assertEquals(2, rm.getRMContext().getRMNodes().size());
// Submit an application
RMApp app1 = rm.submitApp(128);
// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
LOG.info("sending container requests ");
am1.addRequests(new String[] { "*" }, 2 * GB, 1, 1);
// send the request
AllocateResponse alloc1Response = am1.schedule();
// kick the scheduler
nm1.nodeHeartbeat(true);
int waitCounter = 20;
LOG.info("heartbeating nm1");
while (alloc1Response.getAllocatedContainers().size() < 1 && waitCounter-- > 0) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(500);
alloc1Response = am1.schedule();
}
LOG.info("received container : " + alloc1Response.getAllocatedContainers().size());
// No container should be allocated.
// Internally it should not been reserved.
Assert.assertTrue(alloc1Response.getAllocatedContainers().size() == 0);
LOG.info("heartbeating nm2");
waitCounter = 20;
nm2.nodeHeartbeat(true);
while (alloc1Response.getAllocatedContainers().size() < 1 && waitCounter-- > 0) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(500);
alloc1Response = am1.schedule();
}
LOG.info("received container : " + alloc1Response.getAllocatedContainers().size());
Assert.assertTrue(alloc1Response.getAllocatedContainers().size() == 1);
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.MockAM in project hadoop by apache.
the class TestContainerAllocation method testAllocationForReservedContainer.
@Test(timeout = 60000)
public void testAllocationForReservedContainer() throws Exception {
/**
* Test case: Submit two application (app1/app2) to a queue. And there's one
* node with 8G resource in the cluster. App1 allocates a 6G container, Then
* app2 asks for a 4G container. App2's request will be reserved on the
* node.
*
* Before next node heartbeat, app1 container is completed/killed. So app1
* container which was reserved will be allocated.
*/
// inject node label manager
MockRM rm1 = new MockRM();
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);
MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB);
// launch an app to queue, AM container should be launched in nm1
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// launch another app to queue, AM container should be launched in nm1
RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "default");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1);
am1.allocate("*", 4 * GB, 1, new ArrayList<ContainerId>());
am2.allocate("*", 4 * GB, 1, new ArrayList<ContainerId>());
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
LeafQueue leafQueue = (LeafQueue) cs.getQueue("default");
// Do node heartbeats 2 times
// First time will allocate container for app1, second time will reserve
// container for app2
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
// App2 will get preference to be allocated on node1, and node1 will be all
// used by App2.
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
// Check if a 4G container allocated for app1, and nothing allocated for app2
Assert.assertEquals(2, schedulerApp1.getLiveContainers().size());
Assert.assertEquals(1, schedulerApp2.getLiveContainers().size());
Assert.assertTrue(schedulerApp2.getReservedContainers().size() > 0);
// NM1 has available resource = 2G (8G - 2 * 1G - 4G)
Assert.assertEquals(2 * GB, cs.getNode(nm1.getNodeId()).getUnallocatedResource().getMemorySize());
Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer());
// Usage of queue = 4G + 2 * 1G + 4G (reserved)
Assert.assertEquals(10 * GB, cs.getRootQueue().getQueueResourceUsage().getUsed().getMemorySize());
Assert.assertEquals(4 * GB, cs.getRootQueue().getQueueResourceUsage().getReserved().getMemorySize());
Assert.assertEquals(4 * GB, leafQueue.getQueueResourceUsage().getReserved().getMemorySize());
// Mark one app1 container as killed/completed and re-kick RM
for (RMContainer container : schedulerApp1.getLiveContainers()) {
if (container.isAMContainer()) {
continue;
}
cs.markContainerForKillable(container);
}
// Cancel asks of app1 and re-kick RM
am1.allocate("*", 4 * GB, 0, new ArrayList<ContainerId>());
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
// Check 4G container cancelled for app1, and one container allocated for
// app2
Assert.assertEquals(1, schedulerApp1.getLiveContainers().size());
Assert.assertEquals(2, schedulerApp2.getLiveContainers().size());
Assert.assertFalse(schedulerApp2.getReservedContainers().size() > 0);
// NM1 has available resource = 2G (8G - 2 * 1G - 4G)
Assert.assertEquals(2 * GB, cs.getNode(nm1.getNodeId()).getUnallocatedResource().getMemorySize());
Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer());
// Usage of queue = 4G + 2 * 1G
Assert.assertEquals(6 * GB, cs.getRootQueue().getQueueResourceUsage().getUsed().getMemorySize());
Assert.assertEquals(0 * GB, cs.getRootQueue().getQueueResourceUsage().getReserved().getMemorySize());
Assert.assertEquals(0 * GB, leafQueue.getQueueResourceUsage().getReserved().getMemorySize());
rm1.close();
}
Aggregations