use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testQueueMaxAMShareWithContainerReservation.
/**
* The test verifies container gets reserved when not over maxAMShare,
* reserved container gets unreserved when over maxAMShare,
* container doesn't get reserved when over maxAMShare,
* reserved container is turned into an allocation and
* superfluously reserved container gets unreserved.
* 1. create three nodes: Node1 is 10G, Node2 is 10G and Node3 is 5G.
* 2. APP1 allocated 1G on Node1 and APP2 allocated 1G on Node2.
* 3. APP3 reserved 10G on Node1 and Node2.
* 4. APP4 allocated 5G on Node3, which makes APP3 over maxAMShare.
* 5. Remove APP1 to make Node1 have 10G available resource.
* 6. APP3 unreserved its container on Node1 because it is over maxAMShare.
* 7. APP5 allocated 1G on Node1 after APP3 unreserved its container.
* 8. Remove APP3.
* 9. APP6 failed to reserve a 10G container on Node1 due to AMShare limit.
* 10. APP7 allocated 1G on Node1.
* 11. Remove APP4 and APP5.
* 12. APP6 reserved 10G on Node1 and Node2.
* 13. APP8 failed to allocate a 1G container on Node1 and Node2 because
* APP6 reserved Node1 and Node2.
* 14. Remove APP2.
* 15. APP6 turned the 10G reservation into an allocation on node2.
* 16. APP6 unreserved its container on node1, APP8 allocated 1G on Node1.
*/
@Test
public void testQueueMaxAMShareWithContainerReservation() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 1f);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<queue name=\"queue1\">");
out.println("<maxAMShare>0.5</maxAMShare>");
out.println("</queue>");
out.println("</allocations>");
out.close();
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(10240, 10), 1, "127.0.0.1");
RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(10240, 10), 2, "127.0.0.2");
RMNode node3 = MockNodes.newNodeInfo(1, Resources.createResource(5120, 5), 3, "127.0.0.3");
NodeAddedSchedulerEvent nodeE1 = new NodeAddedSchedulerEvent(node1);
NodeUpdateSchedulerEvent updateE1 = new NodeUpdateSchedulerEvent(node1);
NodeAddedSchedulerEvent nodeE2 = new NodeAddedSchedulerEvent(node2);
NodeUpdateSchedulerEvent updateE2 = new NodeUpdateSchedulerEvent(node2);
NodeAddedSchedulerEvent nodeE3 = new NodeAddedSchedulerEvent(node3);
NodeUpdateSchedulerEvent updateE3 = new NodeUpdateSchedulerEvent(node3);
scheduler.handle(nodeE1);
scheduler.handle(nodeE2);
scheduler.handle(nodeE3);
scheduler.update();
FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1", true);
Resource amResource1 = Resource.newInstance(1024, 1);
Resource amResource2 = Resource.newInstance(1024, 1);
Resource amResource3 = Resource.newInstance(10240, 1);
Resource amResource4 = Resource.newInstance(5120, 1);
Resource amResource5 = Resource.newInstance(1024, 1);
Resource amResource6 = Resource.newInstance(10240, 1);
Resource amResource7 = Resource.newInstance(1024, 1);
Resource amResource8 = Resource.newInstance(1024, 1);
int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority();
ApplicationAttemptId attId1 = createAppAttemptId(1, 1);
createApplicationWithAMResource(attId1, "queue1", "user1", amResource1);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId1);
FSAppAttempt app1 = scheduler.getSchedulerApp(attId1);
scheduler.update();
// Allocate app1's AM container on node1.
scheduler.handle(updateE1);
assertEquals("Application1's AM requests 1024 MB memory", 1024, app1.getAMResource().getMemorySize());
assertEquals("Application1's AM should be running", 1, app1.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 1024 MB memory", 1024, queue1.getAmResourceUsage().getMemorySize());
ApplicationAttemptId attId2 = createAppAttemptId(2, 1);
createApplicationWithAMResource(attId2, "queue1", "user1", amResource2);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId2);
FSAppAttempt app2 = scheduler.getSchedulerApp(attId2);
scheduler.update();
// Allocate app2's AM container on node2.
scheduler.handle(updateE2);
assertEquals("Application2's AM requests 1024 MB memory", 1024, app2.getAMResource().getMemorySize());
assertEquals("Application2's AM should be running", 1, app2.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
ApplicationAttemptId attId3 = createAppAttemptId(3, 1);
createApplicationWithAMResource(attId3, "queue1", "user1", amResource3);
createSchedulingRequestExistingApplication(10240, 1, amPriority, attId3);
FSAppAttempt app3 = scheduler.getSchedulerApp(attId3);
scheduler.update();
// app3 reserves a container on node1 because node1's available resource
// is less than app3's AM container resource.
scheduler.handle(updateE1);
// Similarly app3 reserves a container on node2.
scheduler.handle(updateE2);
assertEquals("Application3's AM resource shouldn't be updated", 0, app3.getAMResource().getMemorySize());
assertEquals("Application3's AM should not be running", 0, app3.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
ApplicationAttemptId attId4 = createAppAttemptId(4, 1);
createApplicationWithAMResource(attId4, "queue1", "user1", amResource4);
createSchedulingRequestExistingApplication(5120, 1, amPriority, attId4);
FSAppAttempt app4 = scheduler.getSchedulerApp(attId4);
scheduler.update();
// app4 can't allocate its AM container on node1 because
// app3 already reserved its container on node1.
scheduler.handle(updateE1);
assertEquals("Application4's AM resource shouldn't be updated", 0, app4.getAMResource().getMemorySize());
assertEquals("Application4's AM should not be running", 0, app4.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
scheduler.update();
// Allocate app4's AM container on node3.
scheduler.handle(updateE3);
assertEquals("Application4's AM requests 5120 MB memory", 5120, app4.getAMResource().getMemorySize());
assertEquals("Application4's AM should be running", 1, app4.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 7168 MB memory", 7168, queue1.getAmResourceUsage().getMemorySize());
AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(attId1, RMAppAttemptState.FINISHED, false);
// Release app1's AM container on node1.
scheduler.handle(appRemovedEvent1);
assertEquals("Queue1's AM resource usage should be 6144 MB memory", 6144, queue1.getAmResourceUsage().getMemorySize());
ApplicationAttemptId attId5 = createAppAttemptId(5, 1);
createApplicationWithAMResource(attId5, "queue1", "user1", amResource5);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId5);
FSAppAttempt app5 = scheduler.getSchedulerApp(attId5);
scheduler.update();
// app5 can allocate its AM container on node1 after
// app3 unreserve its container on node1 due to
// exceeding queue MaxAMShare limit.
scheduler.handle(updateE1);
assertEquals("Application5's AM requests 1024 MB memory", 1024, app5.getAMResource().getMemorySize());
assertEquals("Application5's AM should be running", 1, app5.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 7168 MB memory", 7168, queue1.getAmResourceUsage().getMemorySize());
AppAttemptRemovedSchedulerEvent appRemovedEvent3 = new AppAttemptRemovedSchedulerEvent(attId3, RMAppAttemptState.FINISHED, false);
// Remove app3.
scheduler.handle(appRemovedEvent3);
assertEquals("Queue1's AM resource usage should be 7168 MB memory", 7168, queue1.getAmResourceUsage().getMemorySize());
ApplicationAttemptId attId6 = createAppAttemptId(6, 1);
createApplicationWithAMResource(attId6, "queue1", "user1", amResource6);
createSchedulingRequestExistingApplication(10240, 1, amPriority, attId6);
FSAppAttempt app6 = scheduler.getSchedulerApp(attId6);
scheduler.update();
// app6 can't reserve a container on node1 because
// it exceeds queue MaxAMShare limit.
scheduler.handle(updateE1);
assertEquals("Application6's AM resource shouldn't be updated", 0, app6.getAMResource().getMemorySize());
assertEquals("Application6's AM should not be running", 0, app6.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 7168 MB memory", 7168, queue1.getAmResourceUsage().getMemorySize());
ApplicationAttemptId attId7 = createAppAttemptId(7, 1);
createApplicationWithAMResource(attId7, "queue1", "user1", amResource7);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId7);
FSAppAttempt app7 = scheduler.getSchedulerApp(attId7);
scheduler.update();
// Allocate app7's AM container on node1 to prove
// app6 didn't reserve a container on node1.
scheduler.handle(updateE1);
assertEquals("Application7's AM requests 1024 MB memory", 1024, app7.getAMResource().getMemorySize());
assertEquals("Application7's AM should be running", 1, app7.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 8192 MB memory", 8192, queue1.getAmResourceUsage().getMemorySize());
AppAttemptRemovedSchedulerEvent appRemovedEvent4 = new AppAttemptRemovedSchedulerEvent(attId4, RMAppAttemptState.FINISHED, false);
// Release app4's AM container on node3.
scheduler.handle(appRemovedEvent4);
assertEquals("Queue1's AM resource usage should be 3072 MB memory", 3072, queue1.getAmResourceUsage().getMemorySize());
AppAttemptRemovedSchedulerEvent appRemovedEvent5 = new AppAttemptRemovedSchedulerEvent(attId5, RMAppAttemptState.FINISHED, false);
// Release app5's AM container on node1.
scheduler.handle(appRemovedEvent5);
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
scheduler.update();
// app6 reserves a container on node1 because node1's available resource
// is less than app6's AM container resource and
// app6 is not over AMShare limit.
scheduler.handle(updateE1);
// Similarly app6 reserves a container on node2.
scheduler.handle(updateE2);
ApplicationAttemptId attId8 = createAppAttemptId(8, 1);
createApplicationWithAMResource(attId8, "queue1", "user1", amResource8);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId8);
FSAppAttempt app8 = scheduler.getSchedulerApp(attId8);
scheduler.update();
// app8 can't allocate a container on node1 because
// app6 already reserved a container on node1.
scheduler.handle(updateE1);
assertEquals("Application8's AM resource shouldn't be updated", 0, app8.getAMResource().getMemorySize());
assertEquals("Application8's AM should not be running", 0, app8.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
scheduler.update();
// app8 can't allocate a container on node2 because
// app6 already reserved a container on node2.
scheduler.handle(updateE2);
assertEquals("Application8's AM resource shouldn't be updated", 0, app8.getAMResource().getMemorySize());
assertEquals("Application8's AM should not be running", 0, app8.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
AppAttemptRemovedSchedulerEvent appRemovedEvent2 = new AppAttemptRemovedSchedulerEvent(attId2, RMAppAttemptState.FINISHED, false);
// Release app2's AM container on node2.
scheduler.handle(appRemovedEvent2);
assertEquals("Queue1's AM resource usage should be 1024 MB memory", 1024, queue1.getAmResourceUsage().getMemorySize());
scheduler.update();
// app6 turns the reservation into an allocation on node2.
scheduler.handle(updateE2);
assertEquals("Application6's AM requests 10240 MB memory", 10240, app6.getAMResource().getMemorySize());
assertEquals("Application6's AM should be running", 1, app6.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 11264 MB memory", 11264, queue1.getAmResourceUsage().getMemorySize());
scheduler.update();
// app6 unreserve its container on node1 because
// it already got a container on node2.
// Now app8 can allocate its AM container on node1.
scheduler.handle(updateE1);
assertEquals("Application8's AM requests 1024 MB memory", 1024, app8.getAMResource().getMemorySize());
assertEquals("Application8's AM should be running", 1, app8.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 12288 MB memory", 12288, queue1.getAmResourceUsage().getMemorySize());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testIncreaseQueueSettingOnTheFlyInternal.
private void testIncreaseQueueSettingOnTheFlyInternal(String allocBefore, String allocAfter) throws Exception {
// Set max running apps
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println(allocBefore);
out.close();
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
// Add a node
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(8192, 8), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent1);
// Request for app 1
ApplicationAttemptId attId1 = createSchedulingRequest(1024, "queue1", "user1", 1);
scheduler.update();
NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1);
scheduler.handle(updateEvent);
// App 1 should be running
assertEquals(1, scheduler.getSchedulerApp(attId1).getLiveContainers().size());
ApplicationAttemptId attId2 = createSchedulingRequest(1024, "queue1", "user1", 1);
scheduler.update();
scheduler.handle(updateEvent);
ApplicationAttemptId attId3 = createSchedulingRequest(1024, "queue1", "user1", 1);
scheduler.update();
scheduler.handle(updateEvent);
ApplicationAttemptId attId4 = createSchedulingRequest(1024, "queue1", "user1", 1);
scheduler.update();
scheduler.handle(updateEvent);
// App 2 should not be running
assertEquals(0, scheduler.getSchedulerApp(attId2).getLiveContainers().size());
// App 3 should not be running
assertEquals(0, scheduler.getSchedulerApp(attId3).getLiveContainers().size());
// App 4 should not be running
assertEquals(0, scheduler.getSchedulerApp(attId4).getLiveContainers().size());
out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println(allocAfter);
out.close();
scheduler.reinitialize(conf, resourceManager.getRMContext());
scheduler.update();
scheduler.handle(updateEvent);
// App 2 should be running
assertEquals(1, scheduler.getSchedulerApp(attId2).getLiveContainers().size());
scheduler.update();
scheduler.handle(updateEvent);
// App 3 should be running
assertEquals(1, scheduler.getSchedulerApp(attId3).getLiveContainers().size());
scheduler.update();
scheduler.handle(updateEvent);
// App 4 should not be running
assertEquals(0, scheduler.getSchedulerApp(attId4).getLiveContainers().size());
// Now remove app 1
AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(attId1, RMAppAttemptState.FINISHED, false);
scheduler.handle(appRemovedEvent1);
scheduler.update();
scheduler.handle(updateEvent);
// App 4 should be running
assertEquals(1, scheduler.getSchedulerApp(attId4).getLiveContainers().size());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testMaxRunningAppsHierarchicalQueues.
@Test
public void testMaxRunningAppsHierarchicalQueues() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
ControlledClock clock = new ControlledClock();
scheduler.setClock(clock);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<queue name=\"queue1\">");
out.println(" <maxRunningApps>3</maxRunningApps>");
out.println(" <queue name=\"sub1\"></queue>");
out.println(" <queue name=\"sub2\"></queue>");
out.println(" <queue name=\"sub3\">");
out.println(" <maxRunningApps>1</maxRunningApps>");
out.println(" </queue>");
out.println("</queue>");
out.println("</allocations>");
out.close();
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
// exceeds no limits
ApplicationAttemptId attId1 = createSchedulingRequest(1024, "queue1.sub1", "user1");
verifyAppRunnable(attId1, true);
verifyQueueNumRunnable("queue1.sub1", 1, 0);
clock.tickSec(10);
// exceeds no limits
ApplicationAttemptId attId2 = createSchedulingRequest(1024, "queue1.sub3", "user1");
verifyAppRunnable(attId2, true);
verifyQueueNumRunnable("queue1.sub3", 1, 0);
clock.tickSec(10);
// exceeds no limits
ApplicationAttemptId attId3 = createSchedulingRequest(1024, "queue1.sub2", "user1");
verifyAppRunnable(attId3, true);
verifyQueueNumRunnable("queue1.sub2", 1, 0);
clock.tickSec(10);
// exceeds queue1 limit
ApplicationAttemptId attId4 = createSchedulingRequest(1024, "queue1.sub2", "user1");
verifyAppRunnable(attId4, false);
verifyQueueNumRunnable("queue1.sub2", 1, 1);
clock.tickSec(10);
// exceeds sub3 limit
ApplicationAttemptId attId5 = createSchedulingRequest(1024, "queue1.sub3", "user1");
verifyAppRunnable(attId5, false);
verifyQueueNumRunnable("queue1.sub3", 1, 1);
clock.tickSec(10);
// Even though the app was removed from sub3, the app from sub2 gets to go
// because it came in first
AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(attId2, RMAppAttemptState.FINISHED, false);
scheduler.handle(appRemovedEvent1);
verifyAppRunnable(attId4, true);
verifyQueueNumRunnable("queue1.sub2", 2, 0);
verifyAppRunnable(attId5, false);
verifyQueueNumRunnable("queue1.sub3", 0, 1);
// Now test removal of a non-runnable app
AppAttemptRemovedSchedulerEvent appRemovedEvent2 = new AppAttemptRemovedSchedulerEvent(attId5, RMAppAttemptState.KILLED, true);
scheduler.handle(appRemovedEvent2);
assertEquals(0, scheduler.maxRunningEnforcer.usersNonRunnableApps.get("user1").size());
// verify app gone in queue accounting
verifyQueueNumRunnable("queue1.sub3", 0, 0);
// verify it doesn't become runnable when there would be space for it
AppAttemptRemovedSchedulerEvent appRemovedEvent3 = new AppAttemptRemovedSchedulerEvent(attId4, RMAppAttemptState.FINISHED, true);
scheduler.handle(appRemovedEvent3);
verifyQueueNumRunnable("queue1.sub2", 1, 0);
verifyQueueNumRunnable("queue1.sub3", 0, 0);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testQueueMaxAMShare.
@Test
public void testQueueMaxAMShare() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<queue name=\"queue1\">");
out.println("<maxAMShare>0.2</maxAMShare>");
out.println("</queue>");
out.println("</allocations>");
out.close();
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
RMNode node = MockNodes.newNodeInfo(1, Resources.createResource(20480, 20), 0, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node);
NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node);
scheduler.handle(nodeEvent);
scheduler.update();
FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1", true);
assertEquals("Queue queue1's fair share should be 0", 0, queue1.getFairShare().getMemorySize());
createSchedulingRequest(1 * 1024, "root.default", "user1");
scheduler.update();
scheduler.handle(updateEvent);
Resource amResource1 = Resource.newInstance(1024, 1);
Resource amResource2 = Resource.newInstance(2048, 2);
Resource amResource3 = Resource.newInstance(1860, 2);
int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority();
// Exceeds no limits
ApplicationAttemptId attId1 = createAppAttemptId(1, 1);
createApplicationWithAMResource(attId1, "queue1", "user1", amResource1);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId1);
FSAppAttempt app1 = scheduler.getSchedulerApp(attId1);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application1's AM requests 1024 MB memory", 1024, app1.getAMResource().getMemorySize());
assertEquals("Application1's AM should be running", 1, app1.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 1024 MB memory", 1024, queue1.getAmResourceUsage().getMemorySize());
// Exceeds no limits
ApplicationAttemptId attId2 = createAppAttemptId(2, 1);
createApplicationWithAMResource(attId2, "queue1", "user1", amResource1);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId2);
FSAppAttempt app2 = scheduler.getSchedulerApp(attId2);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application2's AM requests 1024 MB memory", 1024, app2.getAMResource().getMemorySize());
assertEquals("Application2's AM should be running", 1, app2.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
// Exceeds queue limit
ApplicationAttemptId attId3 = createAppAttemptId(3, 1);
createApplicationWithAMResource(attId3, "queue1", "user1", amResource1);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId3);
FSAppAttempt app3 = scheduler.getSchedulerApp(attId3);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application3's AM resource shouldn't be updated", 0, app3.getAMResource().getMemorySize());
assertEquals("Application3's AM should not be running", 0, app3.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
// Still can run non-AM container
createSchedulingRequestExistingApplication(1024, 1, attId1);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application1 should have two running containers", 2, app1.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
// Remove app1, app3's AM should become running
AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(attId1, RMAppAttemptState.FINISHED, false);
scheduler.update();
scheduler.handle(appRemovedEvent1);
scheduler.handle(updateEvent);
assertEquals("Application1's AM should be finished", 0, app1.getLiveContainers().size());
assertEquals("Finished application usage should be none", Resources.none(), app1.getResourceUsage());
assertEquals("Application3's AM should be running", 1, app3.getLiveContainers().size());
assertEquals("Application3's AM requests 1024 MB memory", 1024, app3.getAMResource().getMemorySize());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
// Exceeds queue limit
ApplicationAttemptId attId4 = createAppAttemptId(4, 1);
createApplicationWithAMResource(attId4, "queue1", "user1", amResource2);
createSchedulingRequestExistingApplication(2048, 2, amPriority, attId4);
FSAppAttempt app4 = scheduler.getSchedulerApp(attId4);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application4's AM resource shouldn't be updated", 0, app4.getAMResource().getMemorySize());
assertEquals("Application4's AM should not be running", 0, app4.getLiveContainers().size());
assertEquals("Finished application usage should be none", Resources.none(), app4.getResourceUsage());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
// Exceeds queue limit
ApplicationAttemptId attId5 = createAppAttemptId(5, 1);
createApplicationWithAMResource(attId5, "queue1", "user1", amResource2);
createSchedulingRequestExistingApplication(2048, 2, amPriority, attId5);
FSAppAttempt app5 = scheduler.getSchedulerApp(attId5);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application5's AM resource shouldn't be updated", 0, app5.getAMResource().getMemorySize());
assertEquals("Application5's AM should not be running", 0, app5.getLiveContainers().size());
assertEquals("Finished application usage should be none", Resources.none(), app5.getResourceUsage());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
// Remove un-running app doesn't affect others
AppAttemptRemovedSchedulerEvent appRemovedEvent4 = new AppAttemptRemovedSchedulerEvent(attId4, RMAppAttemptState.KILLED, false);
scheduler.handle(appRemovedEvent4);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application5's AM should not be running", 0, app5.getLiveContainers().size());
assertEquals("Finished application usage should be none", Resources.none(), app5.getResourceUsage());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
// Remove app2 and app3, app5's AM should become running
AppAttemptRemovedSchedulerEvent appRemovedEvent2 = new AppAttemptRemovedSchedulerEvent(attId2, RMAppAttemptState.FINISHED, false);
AppAttemptRemovedSchedulerEvent appRemovedEvent3 = new AppAttemptRemovedSchedulerEvent(attId3, RMAppAttemptState.FINISHED, false);
scheduler.handle(appRemovedEvent2);
scheduler.handle(appRemovedEvent3);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application2's AM should be finished", 0, app2.getLiveContainers().size());
assertEquals("Finished application usage should be none", Resources.none(), app2.getResourceUsage());
assertEquals("Application3's AM should be finished", 0, app3.getLiveContainers().size());
assertEquals("Finished application usage should be none", Resources.none(), app3.getResourceUsage());
assertEquals("Application5's AM should be running", 1, app5.getLiveContainers().size());
assertEquals("Application5's AM requests 2048 MB memory", 2048, app5.getAMResource().getMemorySize());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
// request non-AM container for app5
createSchedulingRequestExistingApplication(1024, 1, attId5);
assertEquals("Application5's AM should have 1 container", 1, app5.getLiveContainers().size());
// complete AM container before non-AM container is allocated.
// spark application hit this situation.
RMContainer amContainer5 = (RMContainer) app5.getLiveContainers().toArray()[0];
ContainerExpiredSchedulerEvent containerExpired = new ContainerExpiredSchedulerEvent(amContainer5.getContainerId());
scheduler.handle(containerExpired);
assertEquals("Application5's AM should have 0 container", 0, app5.getLiveContainers().size());
assertEquals("Finished application usage should be none", Resources.none(), app5.getResourceUsage());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
scheduler.update();
scheduler.handle(updateEvent);
// non-AM container should be allocated
// check non-AM container allocation is not rejected
// due to queue MaxAMShare limitation.
assertEquals("Application5 should have 1 container", 1, app5.getLiveContainers().size());
// check non-AM container allocation won't affect queue AmResourceUsage
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
// Check amResource normalization
ApplicationAttemptId attId6 = createAppAttemptId(6, 1);
createApplicationWithAMResource(attId6, "queue1", "user1", amResource3);
createSchedulingRequestExistingApplication(1860, 2, amPriority, attId6);
FSAppAttempt app6 = scheduler.getSchedulerApp(attId6);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application6's AM should not be running", 0, app6.getLiveContainers().size());
assertEquals("Finished application usage should be none", Resources.none(), app6.getResourceUsage());
assertEquals("Application6's AM resource shouldn't be updated", 0, app6.getAMResource().getMemorySize());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
// Remove all apps
AppAttemptRemovedSchedulerEvent appRemovedEvent5 = new AppAttemptRemovedSchedulerEvent(attId5, RMAppAttemptState.FINISHED, false);
AppAttemptRemovedSchedulerEvent appRemovedEvent6 = new AppAttemptRemovedSchedulerEvent(attId6, RMAppAttemptState.FINISHED, false);
scheduler.handle(appRemovedEvent5);
scheduler.handle(appRemovedEvent6);
scheduler.update();
assertEquals("Queue1's AM resource usage should be 0", 0, queue1.getAmResourceUsage().getMemorySize());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.
the class TestSchedulerPlanFollowerBase method testPlanFollower.
protected void testPlanFollower(boolean isMove) throws PlanningException, InterruptedException, AccessControlException {
// Initialize plan based on move flag
plan = new InMemoryPlan(scheduler.getRootQueueMetrics(), policy, mAgent, scheduler.getClusterResource(), 1L, res, scheduler.getMinimumResourceCapability(), maxAlloc, "dedicated", null, isMove, context);
// add a few reservations to the plan
long ts = System.currentTimeMillis();
ReservationId r1 = ReservationId.newInstance(ts, 1);
int[] f1 = { 10, 10, 10, 10, 10 };
ReservationDefinition rDef = ReservationSystemTestUtil.createSimpleReservationDefinition(0, 0 + f1.length + 1, f1.length);
assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r1, rDef, "u3", "dedicated", 0, 0 + f1.length, ReservationSystemTestUtil.generateAllocation(0L, 1L, f1), res, minAlloc), false));
ReservationId r2 = ReservationId.newInstance(ts, 2);
assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r2, rDef, "u3", "dedicated", 3, 3 + f1.length, ReservationSystemTestUtil.generateAllocation(3L, 1L, f1), res, minAlloc), false));
ReservationId r3 = ReservationId.newInstance(ts, 3);
int[] f2 = { 0, 10, 20, 10, 0 };
assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r3, rDef, "u4", "dedicated", 10, 10 + f2.length, ReservationSystemTestUtil.generateAllocation(10L, 1L, f2), res, minAlloc), false));
AbstractSchedulerPlanFollower planFollower = createPlanFollower();
when(mClock.getTime()).thenReturn(0L);
planFollower.run();
Queue q = getReservationQueue(r1.toString());
assertReservationQueueExists(r1);
// submit an app to r1
String user_0 = "test-user";
ApplicationId appId = ApplicationId.newInstance(0, 1);
ApplicationAttemptId appAttemptId_0 = ApplicationAttemptId.newInstance(appId, 0);
AppAddedSchedulerEvent addAppEvent = new AppAddedSchedulerEvent(appId, q.getQueueName(), user_0);
scheduler.handle(addAppEvent);
AppAttemptAddedSchedulerEvent appAttemptAddedEvent = new AppAttemptAddedSchedulerEvent(appAttemptId_0, false);
scheduler.handle(appAttemptAddedEvent);
// initial default reservation queue should have no apps
Queue defQ = getDefaultQueue();
Assert.assertEquals(0, getNumberOfApplications(defQ));
assertReservationQueueExists(r1, 0.1, 0.1);
Assert.assertEquals(1, getNumberOfApplications(q));
assertReservationQueueDoesNotExist(r2);
assertReservationQueueDoesNotExist(r3);
when(mClock.getTime()).thenReturn(3L);
planFollower.run();
Assert.assertEquals(0, getNumberOfApplications(defQ));
assertReservationQueueExists(r1, 0.1, 0.1);
Assert.assertEquals(1, getNumberOfApplications(q));
assertReservationQueueExists(r2, 0.1, 0.1);
assertReservationQueueDoesNotExist(r3);
when(mClock.getTime()).thenReturn(10L);
planFollower.run();
q = getReservationQueue(r1.toString());
if (isMove) {
// app should have been moved to default reservation queue
Assert.assertEquals(1, getNumberOfApplications(defQ));
assertNull(q);
} else {
// app should be killed
Assert.assertEquals(0, getNumberOfApplications(defQ));
assertNotNull(q);
AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent = new AppAttemptRemovedSchedulerEvent(appAttemptId_0, RMAppAttemptState.KILLED, false);
scheduler.handle(appAttemptRemovedEvent);
}
assertReservationQueueDoesNotExist(r2);
assertReservationQueueExists(r3, 0, 1.0);
when(mClock.getTime()).thenReturn(11L);
planFollower.run();
if (isMove) {
// app should have been moved to default reservation queue
Assert.assertEquals(1, getNumberOfApplications(defQ));
} else {
// app should be killed
Assert.assertEquals(0, getNumberOfApplications(defQ));
}
assertReservationQueueDoesNotExist(r1);
assertReservationQueueDoesNotExist(r2);
assertReservationQueueExists(r3, 0.1, 0.1);
when(mClock.getTime()).thenReturn(12L);
planFollower.run();
assertReservationQueueDoesNotExist(r1);
assertReservationQueueDoesNotExist(r2);
assertReservationQueueExists(r3, 0.2, 0.2);
when(mClock.getTime()).thenReturn(16L);
planFollower.run();
assertReservationQueueDoesNotExist(r1);
assertReservationQueueDoesNotExist(r2);
assertReservationQueueDoesNotExist(r3);
verifyCapacity(defQ);
}
Aggregations