use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestCapacityScheduler method nodeUpdate.
private void nodeUpdate(org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm) {
RMNode node = resourceManager.getRMContext().getRMNodes().get(nm.getNodeId());
// Send a heartbeat to kick the tires on the Scheduler
NodeUpdateSchedulerEvent nodeUpdate = new NodeUpdateSchedulerEvent(node);
resourceManager.getResourceScheduler().handle(nodeUpdate);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class FairScheduler method handle.
@Override
public void handle(SchedulerEvent event) {
switch(event.getType()) {
case NODE_ADDED:
if (!(event instanceof NodeAddedSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
}
NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent) event;
addNode(nodeAddedEvent.getContainerReports(), nodeAddedEvent.getAddedRMNode());
break;
case NODE_REMOVED:
if (!(event instanceof NodeRemovedSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
}
NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent) event;
removeNode(nodeRemovedEvent.getRemovedRMNode());
break;
case NODE_UPDATE:
if (!(event instanceof NodeUpdateSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
}
NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent) event;
nodeUpdate(nodeUpdatedEvent.getRMNode());
break;
case APP_ADDED:
if (!(event instanceof AppAddedSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
}
AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
String queueName = resolveReservationQueueName(appAddedEvent.getQueue(), appAddedEvent.getApplicationId(), appAddedEvent.getReservationID(), appAddedEvent.getIsAppRecovering());
if (queueName != null) {
addApplication(appAddedEvent.getApplicationId(), queueName, appAddedEvent.getUser(), appAddedEvent.getIsAppRecovering());
}
break;
case APP_REMOVED:
if (!(event instanceof AppRemovedSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
}
AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent) event;
removeApplication(appRemovedEvent.getApplicationID(), appRemovedEvent.getFinalState());
break;
case NODE_RESOURCE_UPDATE:
if (!(event instanceof NodeResourceUpdateSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
}
NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent = (NodeResourceUpdateSchedulerEvent) event;
updateNodeResource(nodeResourceUpdatedEvent.getRMNode(), nodeResourceUpdatedEvent.getResourceOption());
break;
case APP_ATTEMPT_ADDED:
if (!(event instanceof AppAttemptAddedSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
}
AppAttemptAddedSchedulerEvent appAttemptAddedEvent = (AppAttemptAddedSchedulerEvent) event;
addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(), appAttemptAddedEvent.getTransferStateFromPreviousAttempt(), appAttemptAddedEvent.getIsAttemptRecovering());
break;
case APP_ATTEMPT_REMOVED:
if (!(event instanceof AppAttemptRemovedSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
}
AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent = (AppAttemptRemovedSchedulerEvent) event;
removeApplicationAttempt(appAttemptRemovedEvent.getApplicationAttemptID(), appAttemptRemovedEvent.getFinalAttemptState(), appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
break;
case CONTAINER_EXPIRED:
if (!(event instanceof ContainerExpiredSchedulerEvent)) {
throw new RuntimeException("Unexpected event type: " + event);
}
ContainerExpiredSchedulerEvent containerExpiredEvent = (ContainerExpiredSchedulerEvent) event;
ContainerId containerId = containerExpiredEvent.getContainerId();
super.completedContainer(getRMContainer(containerId), SchedulerUtils.createAbnormalContainerStatus(containerId, SchedulerUtils.EXPIRED_CONTAINER), RMContainerEventType.EXPIRE);
break;
default:
LOG.error("Unknown event arrived at FairScheduler: " + event.toString());
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestFifoScheduler method testHeadroom.
@Test(timeout = 50000)
public void testHeadroom() throws Exception {
Configuration conf = new Configuration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class, ResourceScheduler.class);
MockRM rm = new MockRM(conf);
rm.start();
FifoScheduler fs = (FifoScheduler) rm.getResourceScheduler();
// Add a node
RMNode n1 = MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, "127.0.0.2");
fs.handle(new NodeAddedSchedulerEvent(n1));
// Add two applications
ApplicationId appId1 = BuilderUtils.newApplicationId(100, 1);
ApplicationAttemptId appAttemptId1 = BuilderUtils.newApplicationAttemptId(appId1, 1);
createMockRMApp(appAttemptId1, rm.getRMContext());
SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId1, "queue", "user");
fs.handle(appEvent);
SchedulerEvent attemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId1, false);
fs.handle(attemptEvent);
ApplicationId appId2 = BuilderUtils.newApplicationId(200, 2);
ApplicationAttemptId appAttemptId2 = BuilderUtils.newApplicationAttemptId(appId2, 1);
createMockRMApp(appAttemptId2, rm.getRMContext());
SchedulerEvent appEvent2 = new AppAddedSchedulerEvent(appId2, "queue", "user");
fs.handle(appEvent2);
SchedulerEvent attemptEvent2 = new AppAttemptAddedSchedulerEvent(appAttemptId2, false);
fs.handle(attemptEvent2);
List<ContainerId> emptyId = new ArrayList<ContainerId>();
List<ResourceRequest> emptyAsk = new ArrayList<ResourceRequest>();
// Set up resource requests
// Ask for a 1 GB container for app 1
List<ResourceRequest> ask1 = new ArrayList<ResourceRequest>();
ask1.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1));
fs.allocate(appAttemptId1, ask1, emptyId, null, null, NULL_UPDATE_REQUESTS);
// Ask for a 2 GB container for app 2
List<ResourceRequest> ask2 = new ArrayList<ResourceRequest>();
ask2.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(2 * GB, 1), 1));
fs.allocate(appAttemptId2, ask2, emptyId, null, null, NULL_UPDATE_REQUESTS);
// Trigger container assignment
fs.handle(new NodeUpdateSchedulerEvent(n1));
// Get the allocation for the applications and verify headroom
Allocation allocation1 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("Allocation headroom", 1 * GB, allocation1.getResourceLimit().getMemorySize());
Allocation allocation2 = fs.allocate(appAttemptId2, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("Allocation headroom", 1 * GB, allocation2.getResourceLimit().getMemorySize());
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestFSLeafQueue method test.
@Test(timeout = 5000)
public void test() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<queue name=\"queueA\"></queue>");
out.println("<queue name=\"queueB\"></queue>");
out.println("</allocations>");
out.close();
resourceManager = new MockRM(conf);
resourceManager.start();
scheduler = (FairScheduler) resourceManager.getResourceScheduler();
for (FSQueue queue : scheduler.getQueueManager().getQueues()) {
assertEquals(queue.getMetrics().getMaxApps(), Integer.MAX_VALUE);
assertEquals(queue.getMetrics().getSchedulingPolicy(), SchedulingPolicy.DEFAULT_POLICY.getName());
}
// Add one big node (only care about aggregate capacity)
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(4 * 1024, 4), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent1);
scheduler.update();
// Queue A wants 3 * 1024. Node update gives this all to A
createSchedulingRequest(3 * 1024, "queueA", "user1");
scheduler.update();
NodeUpdateSchedulerEvent nodeEvent2 = new NodeUpdateSchedulerEvent(node1);
scheduler.handle(nodeEvent2);
// Queue B arrives and wants 1 * 1024
createSchedulingRequest(1 * 1024, "queueB", "user1");
scheduler.update();
Collection<FSLeafQueue> queues = scheduler.getQueueManager().getLeafQueues();
assertEquals(3, queues.size());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testQueueMaxAMShareWithContainerReservation.
/**
* The test verifies container gets reserved when not over maxAMShare,
* reserved container gets unreserved when over maxAMShare,
* container doesn't get reserved when over maxAMShare,
* reserved container is turned into an allocation and
* superfluously reserved container gets unreserved.
* 1. create three nodes: Node1 is 10G, Node2 is 10G and Node3 is 5G.
* 2. APP1 allocated 1G on Node1 and APP2 allocated 1G on Node2.
* 3. APP3 reserved 10G on Node1 and Node2.
* 4. APP4 allocated 5G on Node3, which makes APP3 over maxAMShare.
* 5. Remove APP1 to make Node1 have 10G available resource.
* 6. APP3 unreserved its container on Node1 because it is over maxAMShare.
* 7. APP5 allocated 1G on Node1 after APP3 unreserved its container.
* 8. Remove APP3.
* 9. APP6 failed to reserve a 10G container on Node1 due to AMShare limit.
* 10. APP7 allocated 1G on Node1.
* 11. Remove APP4 and APP5.
* 12. APP6 reserved 10G on Node1 and Node2.
* 13. APP8 failed to allocate a 1G container on Node1 and Node2 because
* APP6 reserved Node1 and Node2.
* 14. Remove APP2.
* 15. APP6 turned the 10G reservation into an allocation on node2.
* 16. APP6 unreserved its container on node1, APP8 allocated 1G on Node1.
*/
@Test
public void testQueueMaxAMShareWithContainerReservation() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 1f);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<queue name=\"queue1\">");
out.println("<maxAMShare>0.5</maxAMShare>");
out.println("</queue>");
out.println("</allocations>");
out.close();
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(10240, 10), 1, "127.0.0.1");
RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(10240, 10), 2, "127.0.0.2");
RMNode node3 = MockNodes.newNodeInfo(1, Resources.createResource(5120, 5), 3, "127.0.0.3");
NodeAddedSchedulerEvent nodeE1 = new NodeAddedSchedulerEvent(node1);
NodeUpdateSchedulerEvent updateE1 = new NodeUpdateSchedulerEvent(node1);
NodeAddedSchedulerEvent nodeE2 = new NodeAddedSchedulerEvent(node2);
NodeUpdateSchedulerEvent updateE2 = new NodeUpdateSchedulerEvent(node2);
NodeAddedSchedulerEvent nodeE3 = new NodeAddedSchedulerEvent(node3);
NodeUpdateSchedulerEvent updateE3 = new NodeUpdateSchedulerEvent(node3);
scheduler.handle(nodeE1);
scheduler.handle(nodeE2);
scheduler.handle(nodeE3);
scheduler.update();
FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1", true);
Resource amResource1 = Resource.newInstance(1024, 1);
Resource amResource2 = Resource.newInstance(1024, 1);
Resource amResource3 = Resource.newInstance(10240, 1);
Resource amResource4 = Resource.newInstance(5120, 1);
Resource amResource5 = Resource.newInstance(1024, 1);
Resource amResource6 = Resource.newInstance(10240, 1);
Resource amResource7 = Resource.newInstance(1024, 1);
Resource amResource8 = Resource.newInstance(1024, 1);
int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority();
ApplicationAttemptId attId1 = createAppAttemptId(1, 1);
createApplicationWithAMResource(attId1, "queue1", "user1", amResource1);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId1);
FSAppAttempt app1 = scheduler.getSchedulerApp(attId1);
scheduler.update();
// Allocate app1's AM container on node1.
scheduler.handle(updateE1);
assertEquals("Application1's AM requests 1024 MB memory", 1024, app1.getAMResource().getMemorySize());
assertEquals("Application1's AM should be running", 1, app1.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 1024 MB memory", 1024, queue1.getAmResourceUsage().getMemorySize());
ApplicationAttemptId attId2 = createAppAttemptId(2, 1);
createApplicationWithAMResource(attId2, "queue1", "user1", amResource2);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId2);
FSAppAttempt app2 = scheduler.getSchedulerApp(attId2);
scheduler.update();
// Allocate app2's AM container on node2.
scheduler.handle(updateE2);
assertEquals("Application2's AM requests 1024 MB memory", 1024, app2.getAMResource().getMemorySize());
assertEquals("Application2's AM should be running", 1, app2.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
ApplicationAttemptId attId3 = createAppAttemptId(3, 1);
createApplicationWithAMResource(attId3, "queue1", "user1", amResource3);
createSchedulingRequestExistingApplication(10240, 1, amPriority, attId3);
FSAppAttempt app3 = scheduler.getSchedulerApp(attId3);
scheduler.update();
// app3 reserves a container on node1 because node1's available resource
// is less than app3's AM container resource.
scheduler.handle(updateE1);
// Similarly app3 reserves a container on node2.
scheduler.handle(updateE2);
assertEquals("Application3's AM resource shouldn't be updated", 0, app3.getAMResource().getMemorySize());
assertEquals("Application3's AM should not be running", 0, app3.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
ApplicationAttemptId attId4 = createAppAttemptId(4, 1);
createApplicationWithAMResource(attId4, "queue1", "user1", amResource4);
createSchedulingRequestExistingApplication(5120, 1, amPriority, attId4);
FSAppAttempt app4 = scheduler.getSchedulerApp(attId4);
scheduler.update();
// app4 can't allocate its AM container on node1 because
// app3 already reserved its container on node1.
scheduler.handle(updateE1);
assertEquals("Application4's AM resource shouldn't be updated", 0, app4.getAMResource().getMemorySize());
assertEquals("Application4's AM should not be running", 0, app4.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
scheduler.update();
// Allocate app4's AM container on node3.
scheduler.handle(updateE3);
assertEquals("Application4's AM requests 5120 MB memory", 5120, app4.getAMResource().getMemorySize());
assertEquals("Application4's AM should be running", 1, app4.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 7168 MB memory", 7168, queue1.getAmResourceUsage().getMemorySize());
AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(attId1, RMAppAttemptState.FINISHED, false);
// Release app1's AM container on node1.
scheduler.handle(appRemovedEvent1);
assertEquals("Queue1's AM resource usage should be 6144 MB memory", 6144, queue1.getAmResourceUsage().getMemorySize());
ApplicationAttemptId attId5 = createAppAttemptId(5, 1);
createApplicationWithAMResource(attId5, "queue1", "user1", amResource5);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId5);
FSAppAttempt app5 = scheduler.getSchedulerApp(attId5);
scheduler.update();
// app5 can allocate its AM container on node1 after
// app3 unreserve its container on node1 due to
// exceeding queue MaxAMShare limit.
scheduler.handle(updateE1);
assertEquals("Application5's AM requests 1024 MB memory", 1024, app5.getAMResource().getMemorySize());
assertEquals("Application5's AM should be running", 1, app5.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 7168 MB memory", 7168, queue1.getAmResourceUsage().getMemorySize());
AppAttemptRemovedSchedulerEvent appRemovedEvent3 = new AppAttemptRemovedSchedulerEvent(attId3, RMAppAttemptState.FINISHED, false);
// Remove app3.
scheduler.handle(appRemovedEvent3);
assertEquals("Queue1's AM resource usage should be 7168 MB memory", 7168, queue1.getAmResourceUsage().getMemorySize());
ApplicationAttemptId attId6 = createAppAttemptId(6, 1);
createApplicationWithAMResource(attId6, "queue1", "user1", amResource6);
createSchedulingRequestExistingApplication(10240, 1, amPriority, attId6);
FSAppAttempt app6 = scheduler.getSchedulerApp(attId6);
scheduler.update();
// app6 can't reserve a container on node1 because
// it exceeds queue MaxAMShare limit.
scheduler.handle(updateE1);
assertEquals("Application6's AM resource shouldn't be updated", 0, app6.getAMResource().getMemorySize());
assertEquals("Application6's AM should not be running", 0, app6.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 7168 MB memory", 7168, queue1.getAmResourceUsage().getMemorySize());
ApplicationAttemptId attId7 = createAppAttemptId(7, 1);
createApplicationWithAMResource(attId7, "queue1", "user1", amResource7);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId7);
FSAppAttempt app7 = scheduler.getSchedulerApp(attId7);
scheduler.update();
// Allocate app7's AM container on node1 to prove
// app6 didn't reserve a container on node1.
scheduler.handle(updateE1);
assertEquals("Application7's AM requests 1024 MB memory", 1024, app7.getAMResource().getMemorySize());
assertEquals("Application7's AM should be running", 1, app7.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 8192 MB memory", 8192, queue1.getAmResourceUsage().getMemorySize());
AppAttemptRemovedSchedulerEvent appRemovedEvent4 = new AppAttemptRemovedSchedulerEvent(attId4, RMAppAttemptState.FINISHED, false);
// Release app4's AM container on node3.
scheduler.handle(appRemovedEvent4);
assertEquals("Queue1's AM resource usage should be 3072 MB memory", 3072, queue1.getAmResourceUsage().getMemorySize());
AppAttemptRemovedSchedulerEvent appRemovedEvent5 = new AppAttemptRemovedSchedulerEvent(attId5, RMAppAttemptState.FINISHED, false);
// Release app5's AM container on node1.
scheduler.handle(appRemovedEvent5);
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
scheduler.update();
// app6 reserves a container on node1 because node1's available resource
// is less than app6's AM container resource and
// app6 is not over AMShare limit.
scheduler.handle(updateE1);
// Similarly app6 reserves a container on node2.
scheduler.handle(updateE2);
ApplicationAttemptId attId8 = createAppAttemptId(8, 1);
createApplicationWithAMResource(attId8, "queue1", "user1", amResource8);
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId8);
FSAppAttempt app8 = scheduler.getSchedulerApp(attId8);
scheduler.update();
// app8 can't allocate a container on node1 because
// app6 already reserved a container on node1.
scheduler.handle(updateE1);
assertEquals("Application8's AM resource shouldn't be updated", 0, app8.getAMResource().getMemorySize());
assertEquals("Application8's AM should not be running", 0, app8.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
scheduler.update();
// app8 can't allocate a container on node2 because
// app6 already reserved a container on node2.
scheduler.handle(updateE2);
assertEquals("Application8's AM resource shouldn't be updated", 0, app8.getAMResource().getMemorySize());
assertEquals("Application8's AM should not be running", 0, app8.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory", 2048, queue1.getAmResourceUsage().getMemorySize());
AppAttemptRemovedSchedulerEvent appRemovedEvent2 = new AppAttemptRemovedSchedulerEvent(attId2, RMAppAttemptState.FINISHED, false);
// Release app2's AM container on node2.
scheduler.handle(appRemovedEvent2);
assertEquals("Queue1's AM resource usage should be 1024 MB memory", 1024, queue1.getAmResourceUsage().getMemorySize());
scheduler.update();
// app6 turns the reservation into an allocation on node2.
scheduler.handle(updateE2);
assertEquals("Application6's AM requests 10240 MB memory", 10240, app6.getAMResource().getMemorySize());
assertEquals("Application6's AM should be running", 1, app6.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 11264 MB memory", 11264, queue1.getAmResourceUsage().getMemorySize());
scheduler.update();
// app6 unreserve its container on node1 because
// it already got a container on node2.
// Now app8 can allocate its AM container on node1.
scheduler.handle(updateE1);
assertEquals("Application8's AM requests 1024 MB memory", 1024, app8.getAMResource().getMemorySize());
assertEquals("Application8's AM should be running", 1, app8.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 12288 MB memory", 12288, queue1.getAmResourceUsage().getMemorySize());
}
Aggregations