use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testDoubleRemoval.
@Test
public void testDoubleRemoval() throws Exception {
// convenience var
String testUser = "user1";
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
ApplicationAttemptId attemptId = createAppAttemptId(1, 1);
// The placement rule will add the app to the user based queue but the
// passed in queue must exist.
AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent(attemptId.getApplicationId(), testUser, testUser);
scheduler.handle(appAddedEvent);
AppAttemptAddedSchedulerEvent attemptAddedEvent = new AppAttemptAddedSchedulerEvent(createAppAttemptId(1, 1), false);
scheduler.handle(attemptAddedEvent);
// Get a handle on the attempt.
FSAppAttempt attempt = scheduler.getSchedulerApp(attemptId);
AppAttemptRemovedSchedulerEvent attemptRemovedEvent = new AppAttemptRemovedSchedulerEvent(createAppAttemptId(1, 1), RMAppAttemptState.FINISHED, false);
// Make sure the app attempt is in the queue.
List<ApplicationAttemptId> attemptList = scheduler.getAppsInQueue(testUser);
assertNotNull("Queue missing", attemptList);
assertTrue("Attempt should be in the queue", attemptList.contains(attemptId));
assertFalse("Attempt is stopped", attempt.isStopped());
// Now remove the app attempt
scheduler.handle(attemptRemovedEvent);
// The attempt is not in the queue, and stopped
attemptList = scheduler.getAppsInQueue(testUser);
assertFalse("Attempt should not be in the queue", attemptList.contains(attemptId));
assertTrue("Attempt should have been stopped", attempt.isStopped());
// Now remove the app attempt again, since it is stopped nothing happens.
scheduler.handle(attemptRemovedEvent);
// The attempt should still show the original queue info.
assertTrue("Attempt queue has changed", attempt.getQueue().getName().endsWith(testUser));
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.
the class TestLeafQueue method testAppAttemptMetrics.
@Test
public void testAppAttemptMetrics() throws Exception {
// Manipulate queue 'a'
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(B));
// Users
final String user_0 = "user_0";
// Submit applications
final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 1);
AppAddedSchedulerEvent addAppEvent = new AppAddedSchedulerEvent(appAttemptId_0.getApplicationId(), a.getQueueName(), user_0);
cs.handle(addAppEvent);
AppAttemptAddedSchedulerEvent addAttemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId_0, false);
cs.handle(addAttemptEvent);
AppAttemptRemovedSchedulerEvent event = new AppAttemptRemovedSchedulerEvent(appAttemptId_0, RMAppAttemptState.FAILED, false);
cs.handle(event);
assertEquals(0, a.getMetrics().getAppsPending());
assertEquals(0, a.getMetrics().getAppsFailed());
// Attempt the same application again
final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(0, 2);
FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, null, spyRMContext);
app_1.setAMResource(Resource.newInstance(100, 1));
// same user
a.submitApplicationAttempt(app_1, user_0);
assertEquals(1, a.getMetrics().getAppsSubmitted());
assertEquals(1, a.getMetrics().getAppsPending());
assertEquals(1, a.getUser(user_0).getActiveApplications());
assertEquals(app_1.getAMResource().getMemorySize(), a.getMetrics().getUsedAMResourceMB());
assertEquals(app_1.getAMResource().getVirtualCores(), a.getMetrics().getUsedAMResourceVCores());
event = new AppAttemptRemovedSchedulerEvent(appAttemptId_0, RMAppAttemptState.FINISHED, false);
cs.handle(event);
AppRemovedSchedulerEvent rEvent = new AppRemovedSchedulerEvent(appAttemptId_0.getApplicationId(), RMAppState.FINISHED);
cs.handle(rEvent);
assertEquals(1, a.getMetrics().getAppsSubmitted());
assertEquals(0, a.getMetrics().getAppsPending());
assertEquals(0, a.getMetrics().getAppsFailed());
assertEquals(1, a.getMetrics().getAppsCompleted());
QueueMetrics userMetrics = a.getMetrics().getUserMetrics(user_0);
assertEquals(1, userMetrics.getAppsSubmitted());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.
the class TestNodeLabelContainerAllocation method testQueueMaxCapacitiesWillNotBeHonoredWhenNotRespectingExclusivity.
@Test(timeout = 60000)
public void testQueueMaxCapacitiesWillNotBeHonoredWhenNotRespectingExclusivity() throws Exception {
/**
* Test case: have a following queue structure:
*
* <pre>
* root
* / \
* a b
* (x) (x)
* </pre>
*
* a/b can access x, both of them has max-capacity-on-x = 50
*
* When doing non-exclusive allocation, app in a (or b) can use 100% of x
* resource.
*/
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(this.conf);
// Define top-level queues
csConf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] { "a", "b" });
csConf.setCapacityByLabel(CapacitySchedulerConfiguration.ROOT, "x", 100);
final String A = CapacitySchedulerConfiguration.ROOT + ".a";
csConf.setCapacity(A, 50);
csConf.setAccessibleNodeLabels(A, toSet("x"));
csConf.setCapacityByLabel(A, "x", 50);
csConf.setMaximumCapacityByLabel(A, "x", 50);
csConf.setUserLimit(A, 200);
final String B = CapacitySchedulerConfiguration.ROOT + ".b";
csConf.setCapacity(B, 50);
csConf.setAccessibleNodeLabels(B, toSet("x"));
csConf.setCapacityByLabel(B, "x", 50);
csConf.setMaximumCapacityByLabel(B, "x", 50);
csConf.setUserLimit(B, 200);
// set node -> label
mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x", false)));
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x")));
// inject node label manager
MockRM rm1 = new MockRM(csConf) {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return mgr;
}
};
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
// label = x
MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB);
// label = <empty>
MockNM nm2 = rm1.registerNode("h2:1234", 10 * GB);
// app1 -> a
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm2);
// app1 asks for 10 partition= containers
am1.allocate("*", 1 * GB, 10, new ArrayList<ContainerId>());
// NM1 do 50 heartbeats
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
SchedulerNode schedulerNode1 = cs.getSchedulerNode(nm1.getNodeId());
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
for (int i = 0; i < 50; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
}
// app1 gets all resource in partition=x
Assert.assertEquals(10, schedulerNode1.getNumContainers());
// check non-exclusive containers of LeafQueue is correctly updated
LeafQueue leafQueue = (LeafQueue) cs.getQueue("a");
Assert.assertFalse(leafQueue.getIgnoreExclusivityRMContainers().containsKey("y"));
Assert.assertEquals(10, leafQueue.getIgnoreExclusivityRMContainers().get("x").size());
// completes all containers of app1, ignoreExclusivityRMContainers should be
// updated as well.
cs.handle(new AppAttemptRemovedSchedulerEvent(am1.getApplicationAttemptId(), RMAppAttemptState.FINISHED, false));
Assert.assertFalse(leafQueue.getIgnoreExclusivityRMContainers().containsKey("x"));
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testQueueMaxAMShareDefault.
@Test
public void testQueueMaxAMShareDefault() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, 6);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<queue name=\"queue1\">");
out.println("</queue>");
out.println("<queue name=\"queue2\">");
out.println("<maxAMShare>0.4</maxAMShare>");
out.println("</queue>");
out.println("<queue name=\"queue3\">");
out.println("<maxResources>10240 mb 4 vcores</maxResources>");
out.println("</queue>");
out.println("<queue name=\"queue4\">");
out.println("</queue>");
out.println("<queue name=\"queue5\">");
out.println("</queue>");
out.println("<defaultQueueSchedulingPolicy>fair</defaultQueueSchedulingPolicy>");
out.println("</allocations>");
out.close();
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
RMNode node = MockNodes.newNodeInfo(1, Resources.createResource(8192, 10), 0, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node);
NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node);
scheduler.handle(nodeEvent);
scheduler.update();
FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1", true);
assertEquals("Queue queue1's fair share should be 0", 0, queue1.getFairShare().getMemorySize());
FSLeafQueue queue2 = scheduler.getQueueManager().getLeafQueue("queue2", true);
assertEquals("Queue queue2's fair share should be 0", 0, queue2.getFairShare().getMemorySize());
FSLeafQueue queue3 = scheduler.getQueueManager().getLeafQueue("queue3", true);
assertEquals("Queue queue3's fair share should be 0", 0, queue3.getFairShare().getMemorySize());
FSLeafQueue queue4 = scheduler.getQueueManager().getLeafQueue("queue4", true);
assertEquals("Queue queue4's fair share should be 0", 0, queue4.getFairShare().getMemorySize());
FSLeafQueue queue5 = scheduler.getQueueManager().getLeafQueue("queue5", true);
assertEquals("Queue queue5's fair share should be 0", 0, queue5.getFairShare().getMemorySize());
List<String> queues = Arrays.asList("root.queue3", "root.queue4", "root.queue5");
for (String queue : queues) {
createSchedulingRequest(1 * 1024, queue, "user1");
scheduler.update();
scheduler.handle(updateEvent);
}
Resource amResource1 = Resource.newInstance(1024, 1);
int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority();
// The fair share is 2048 MB, and the default maxAMShare is 0.5f,
// so the AM is accepted.
ApplicationAttemptId attId1 = createAppAttemptId(1, 1);
createApplicationWithAMResource(attId1, "queue1", "test1", amResource1);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId1);
FSAppAttempt app1 = scheduler.getSchedulerApp(attId1);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application1's AM requests 1024 MB memory", 1024, app1.getAMResource().getMemorySize());
assertEquals("Application1's AM should be running", 1, app1.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 1024 MB memory", 1024, queue1.getAmResourceUsage().getMemorySize());
// Now the fair share is 1639 MB, and the maxAMShare is 0.4f,
// so the AM is not accepted.
ApplicationAttemptId attId2 = createAppAttemptId(2, 1);
createApplicationWithAMResource(attId2, "queue2", "test1", amResource1);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId2);
FSAppAttempt app2 = scheduler.getSchedulerApp(attId2);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application2's AM resource shouldn't be updated", 0, app2.getAMResource().getMemorySize());
assertEquals("Application2's AM should not be running", 0, app2.getLiveContainers().size());
assertEquals("Queue2's AM resource usage should be 0 MB memory", 0, queue2.getAmResourceUsage().getMemorySize());
// Remove the app2
AppAttemptRemovedSchedulerEvent appRemovedEvent2 = new AppAttemptRemovedSchedulerEvent(attId2, RMAppAttemptState.FINISHED, false);
scheduler.handle(appRemovedEvent2);
scheduler.update();
// AM3 can pass the fair share checking, but it takes all available VCore,
// So the AM3 is not accepted.
ApplicationAttemptId attId3 = createAppAttemptId(3, 1);
createApplicationWithAMResource(attId3, "queue3", "test1", amResource1);
createSchedulingRequestExistingApplication(1024, 6, amPriority, attId3);
FSAppAttempt app3 = scheduler.getSchedulerApp(attId3);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application3's AM resource shouldn't be updated", 0, app3.getAMResource().getMemorySize());
assertEquals("Application3's AM should not be running", 0, app3.getLiveContainers().size());
assertEquals("Queue3's AM resource usage should be 0 MB memory", 0, queue3.getAmResourceUsage().getMemorySize());
// AM4 can pass the fair share checking and it doesn't takes all
// available VCore, but it need 5 VCores which are more than
// maxResources(4 VCores). So the AM4 is not accepted.
ApplicationAttemptId attId4 = createAppAttemptId(4, 1);
createApplicationWithAMResource(attId4, "queue3", "test1", amResource1);
createSchedulingRequestExistingApplication(1024, 5, amPriority, attId4);
FSAppAttempt app4 = scheduler.getSchedulerApp(attId4);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application4's AM resource shouldn't be updated", 0, app4.getAMResource().getMemorySize());
assertEquals("Application4's AM should not be running", 0, app4.getLiveContainers().size());
assertEquals("Queue3's AM resource usage should be 0 MB memory", 0, queue3.getAmResourceUsage().getMemorySize());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent in project hadoop by apache.
the class TestFairSchedulerFairShare method testFairShareResetsToZeroWhenAppsComplete.
@Test
public void testFairShareResetsToZeroWhenAppsComplete() throws IOException {
int nodeCapacity = 16 * 1024;
createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
// Run apps in childA1,childA2 which are under parentA
ApplicationAttemptId app1 = createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
ApplicationAttemptId app2 = createSchedulingRequest(3 * 1024, "root.parentA.childA2", "user2");
scheduler.update();
// share
for (int i = 1; i <= 2; i++) {
assertEquals(50, (double) scheduler.getQueueManager().getLeafQueue("root.parentA.childA" + i, false).getFairShare().getMemorySize() / nodeCapacity * 100, .9);
}
// Let app under childA1 complete. This should cause the fair share
// of queue childA1 to be reset to zero,since the queue has no apps running.
// Queue childA2's fair share would increase to 100% since its the only
// active queue.
AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(app1, RMAppAttemptState.FINISHED, false);
scheduler.handle(appRemovedEvent1);
scheduler.update();
assertEquals(0, (double) scheduler.getQueueManager().getLeafQueue("root.parentA.childA1", false).getFairShare().getMemorySize() / nodeCapacity * 100, 0);
assertEquals(100, (double) scheduler.getQueueManager().getLeafQueue("root.parentA.childA2", false).getFairShare().getMemorySize() / nodeCapacity * 100, 0.1);
verifySteadyFairShareMemory(scheduler.getQueueManager().getLeafQueues(), nodeCapacity);
}
Aggregations