use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testHostPortNodeName.
@Test(timeout = 30000)
public void testHostPortNodeName() throws Exception {
conf.setBoolean(YarnConfiguration.RM_SCHEDULER_INCLUDE_PORT_IN_NODE_NAME, true);
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024), 1, "127.0.0.1", 1);
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent1);
RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(1024), 2, "127.0.0.1", 2);
NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
scheduler.handle(nodeEvent2);
ApplicationAttemptId attId1 = createSchedulingRequest(1024, "queue1", "user1", 0);
ResourceRequest nodeRequest = createResourceRequest(1024, node1.getNodeID().getHost() + ":" + node1.getNodeID().getPort(), 1, 1, true);
ResourceRequest rackRequest = createResourceRequest(1024, node1.getRackName(), 1, 1, false);
ResourceRequest anyRequest = createResourceRequest(1024, ResourceRequest.ANY, 1, 1, false);
createSchedulingRequestExistingApplication(nodeRequest, attId1);
createSchedulingRequestExistingApplication(rackRequest, attId1);
createSchedulingRequestExistingApplication(anyRequest, attId1);
scheduler.update();
NodeUpdateSchedulerEvent node1UpdateEvent = new NodeUpdateSchedulerEvent(node1);
NodeUpdateSchedulerEvent node2UpdateEvent = new NodeUpdateSchedulerEvent(node2);
// no matter how many heartbeats, node2 should never get a container
FSAppAttempt app = scheduler.getSchedulerApp(attId1);
for (int i = 0; i < 10; i++) {
scheduler.handle(node2UpdateEvent);
assertEquals(0, app.getLiveContainers().size());
assertEquals(0, app.getReservedContainers().size());
}
// then node1 should get the container
scheduler.handle(node1UpdateEvent);
assertEquals(1, app.getLiveContainers().size());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testOffSwitchAppReservationThreshold.
@Test(timeout = 5000)
public void testOffSwitchAppReservationThreshold() throws Exception {
conf.setFloat(FairSchedulerConfiguration.RESERVABLE_NODES, 0.50f);
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
// Add three node
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(3072), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent1);
RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(3072), 1, "127.0.0.2");
NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
scheduler.handle(nodeEvent2);
RMNode node3 = MockNodes.newNodeInfo(1, Resources.createResource(3072), 1, "127.0.0.3");
NodeAddedSchedulerEvent nodeEvent3 = new NodeAddedSchedulerEvent(node3);
scheduler.handle(nodeEvent3);
// Ensure capacity on all nodes are allocated
createSchedulingRequest(2048, "queue1", "user1", 1);
scheduler.update();
scheduler.handle(new NodeUpdateSchedulerEvent(node1));
createSchedulingRequest(2048, "queue1", "user1", 1);
scheduler.update();
scheduler.handle(new NodeUpdateSchedulerEvent(node2));
createSchedulingRequest(2048, "queue1", "user1", 1);
scheduler.update();
scheduler.handle(new NodeUpdateSchedulerEvent(node3));
// Verify capacity allocation
assertEquals(6144, scheduler.getQueueManager().getQueue("queue1").getResourceUsage().getMemorySize());
// Create new app with a resource request that can be satisfied by any
// node but would be
ApplicationAttemptId attId = createSchedulingRequest(2048, "queue1", "user1", 1);
scheduler.update();
scheduler.handle(new NodeUpdateSchedulerEvent(node1));
assertEquals(1, scheduler.getSchedulerApp(attId).getNumReservations(null, true));
scheduler.update();
scheduler.handle(new NodeUpdateSchedulerEvent(node2));
assertEquals(2, scheduler.getSchedulerApp(attId).getNumReservations(null, true));
scheduler.update();
scheduler.handle(new NodeUpdateSchedulerEvent(node3));
// No new reservations should happen since it exceeds threshold
assertEquals(2, scheduler.getSchedulerApp(attId).getNumReservations(null, true));
// Add 1 more node
RMNode node4 = MockNodes.newNodeInfo(1, Resources.createResource(3072), 1, "127.0.0.4");
NodeAddedSchedulerEvent nodeEvent4 = new NodeAddedSchedulerEvent(node4);
scheduler.handle(nodeEvent4);
// New node satisfies resource request
scheduler.update();
scheduler.handle(new NodeUpdateSchedulerEvent(node4));
assertEquals(8192, scheduler.getQueueManager().getQueue("queue1").getResourceUsage().getMemorySize());
scheduler.handle(new NodeUpdateSchedulerEvent(node1));
scheduler.handle(new NodeUpdateSchedulerEvent(node2));
scheduler.handle(new NodeUpdateSchedulerEvent(node3));
scheduler.update();
// Verify number of reservations have decremented
assertEquals(0, scheduler.getSchedulerApp(attId).getNumReservations(null, true));
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testReservationThresholdGatesReservations.
@Test
public void testReservationThresholdGatesReservations() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<defaultQueueSchedulingPolicy>drf" + "</defaultQueueSchedulingPolicy>");
out.println("</allocations>");
out.close();
// Set threshold to 2 * 1024 ==> 2048 MB & 2 * 1 ==> 2 vcores (test will
// use vcores)
conf.setFloat(FairSchedulerConfiguration.RM_SCHEDULER_RESERVATION_THRESHOLD_INCREMENT_MULTIPLE, 2f);
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
// Add a node
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(4096, 4), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent1);
// Queue 1 requests full capacity of node
createSchedulingRequest(4096, 4, "queue1", "user1", 1, 1);
scheduler.update();
NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1);
scheduler.handle(updateEvent);
// Make sure queue 1 is allocated app capacity
assertEquals(4096, scheduler.getQueueManager().getQueue("queue1").getResourceUsage().getMemorySize());
// Now queue 2 requests below threshold
ApplicationAttemptId attId = createSchedulingRequest(1024, "queue2", "user1", 1);
scheduler.update();
scheduler.handle(updateEvent);
// Make sure queue 2 has no reservation
assertEquals(0, scheduler.getQueueManager().getQueue("queue2").getResourceUsage().getMemorySize());
assertEquals(0, scheduler.getSchedulerApp(attId).getReservedContainers().size());
// Now queue requests CPU above threshold
createSchedulingRequestExistingApplication(1024, 3, 1, attId);
scheduler.update();
scheduler.handle(updateEvent);
// Make sure queue 2 is waiting with a reservation
assertEquals(0, scheduler.getQueueManager().getQueue("queue2").getResourceUsage().getMemorySize());
assertEquals(3, scheduler.getSchedulerApp(attId).getCurrentReservation().getVirtualCores());
// Now another node checks in with capacity
RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(1024, 4), 2, "127.0.0.2");
NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
NodeUpdateSchedulerEvent updateEvent2 = new NodeUpdateSchedulerEvent(node2);
scheduler.handle(nodeEvent2);
scheduler.handle(updateEvent2);
// Make sure this goes to queue 2
assertEquals(3, scheduler.getQueueManager().getQueue("queue2").getResourceUsage().getVirtualCores());
// The old reservation should still be there...
assertEquals(3, scheduler.getSchedulerApp(attId).getCurrentReservation().getVirtualCores());
// ... but it should disappear when we update the first node.
scheduler.handle(updateEvent);
assertEquals(0, scheduler.getSchedulerApp(attId).getCurrentReservation().getVirtualCores());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testReservationsStrictLocality.
/**
* Strict locality requests shouldn't reserve resources on another node.
*/
@Test
public void testReservationsStrictLocality() throws IOException {
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
// Add two nodes
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024, 1));
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent1);
RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(1024, 1));
NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
scheduler.handle(nodeEvent2);
// Submit application without container requests
ApplicationAttemptId attId = createSchedulingRequest(1024, "queue1", "user1", 0);
FSAppAttempt app = scheduler.getSchedulerApp(attId);
// Request a container on node2
ResourceRequest nodeRequest = createResourceRequest(1024, node2.getHostName(), 1, 1, true);
ResourceRequest rackRequest = createResourceRequest(1024, "rack1", 1, 1, false);
ResourceRequest anyRequest = createResourceRequest(1024, ResourceRequest.ANY, 1, 1, false);
createSchedulingRequestExistingApplication(nodeRequest, attId);
createSchedulingRequestExistingApplication(rackRequest, attId);
createSchedulingRequestExistingApplication(anyRequest, attId);
scheduler.update();
// Heartbeat from node1. App shouldn't get an allocation or reservation
NodeUpdateSchedulerEvent nodeUpdateEvent = new NodeUpdateSchedulerEvent(node1);
scheduler.handle(nodeUpdateEvent);
assertEquals("App assigned a container on the wrong node", 0, app.getLiveContainers().size());
scheduler.handle(nodeUpdateEvent);
assertEquals("App reserved a container on the wrong node", 0, app.getReservedContainers().size());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAfterContainerComplete.
@Test(timeout = 600000)
public void testContainerPromoteAfterContainerComplete() throws Exception {
HashMap<NodeId, MockNM> nodes = new HashMap<>();
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm1.getNodeId(), nm1);
MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm2.getNodeId(), nm2);
nm1.registerNode();
nm2.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
// All nodes 1 to 2 will be applicable for scheduling.
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
Thread.sleep(1000);
QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
// Verify Metrics
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
Assert.assertEquals(2, allocatedContainers.size());
Container container = allocatedContainers.get(0);
MockNM allocNode = nodes.get(container.getNodeId());
// Start Container in NM
allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
Thread.sleep(200);
// Verify that container is actually running wrt the RM..
RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
// Container Completed in the NM
allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.COMPLETE, "", 0)), true);
Thread.sleep(200);
// Verify that container has been removed..
rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
Assert.assertNull(rmContainer);
// Verify Metrics After OPP allocation (Nothing should change)
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
// Send Promotion req... this should result in update error
// Since the container doesn't exist anymore..
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(1, allocateResponse.getCompletedContainersStatuses().size());
Assert.assertEquals(container.getId(), allocateResponse.getCompletedContainersStatuses().get(0).getContainerId());
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("INVALID_CONTAINER_ID", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Verify Metrics After OPP allocation (Nothing should change again)
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
}
Aggregations