use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testReservationMetrics.
@Test
public void testReservationMetrics() throws IOException {
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
QueueMetrics metrics = scheduler.getRootQueueMetrics();
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(4096, 4), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent);
ApplicationAttemptId appAttemptId = createAppAttemptId(1, 1);
createApplicationWithAMResource(appAttemptId, "default", "user1", null);
NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1);
scheduler.update();
scheduler.handle(updateEvent);
createSchedulingRequestExistingApplication(1024, 1, 1, appAttemptId);
scheduler.update();
scheduler.handle(updateEvent);
// no reservation yet
assertEquals(0, metrics.getReservedContainers());
assertEquals(0, metrics.getReservedMB());
assertEquals(0, metrics.getReservedVirtualCores());
// create reservation of {4096, 4}
createSchedulingRequestExistingApplication(4096, 4, 1, appAttemptId);
scheduler.update();
scheduler.handle(updateEvent);
// reservation created
assertEquals(1, metrics.getReservedContainers());
assertEquals(4096, metrics.getReservedMB());
assertEquals(4, metrics.getReservedVirtualCores());
// remove AppAttempt
AppAttemptRemovedSchedulerEvent attRemoveEvent = new AppAttemptRemovedSchedulerEvent(appAttemptId, RMAppAttemptState.KILLED, false);
scheduler.handle(attRemoveEvent);
// The reservation metrics should be subtracted
assertEquals(0, metrics.getReservedContainers());
assertEquals(0, metrics.getReservedMB());
assertEquals(0, metrics.getReservedVirtualCores());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent in project hadoop by apache.
the class TestFairScheduler method testStrictLocality.
@Test
public void testStrictLocality() throws IOException {
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
scheduler.handle(nodeEvent1);
RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(1024), 2, "127.0.0.2");
NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
scheduler.handle(nodeEvent2);
ApplicationAttemptId attId1 = createSchedulingRequest(1024, "queue1", "user1", 0);
ResourceRequest nodeRequest = createResourceRequest(1024, node1.getHostName(), 1, 1, true);
ResourceRequest rackRequest = createResourceRequest(1024, node1.getRackName(), 1, 1, false);
ResourceRequest anyRequest = createResourceRequest(1024, ResourceRequest.ANY, 1, 1, false);
createSchedulingRequestExistingApplication(nodeRequest, attId1);
createSchedulingRequestExistingApplication(rackRequest, attId1);
createSchedulingRequestExistingApplication(anyRequest, attId1);
scheduler.update();
NodeUpdateSchedulerEvent node1UpdateEvent = new NodeUpdateSchedulerEvent(node1);
NodeUpdateSchedulerEvent node2UpdateEvent = new NodeUpdateSchedulerEvent(node2);
// no matter how many heartbeats, node2 should never get a container
FSAppAttempt app = scheduler.getSchedulerApp(attId1);
for (int i = 0; i < 10; i++) {
scheduler.handle(node2UpdateEvent);
assertEquals(0, app.getLiveContainers().size());
assertEquals(0, app.getReservedContainers().size());
}
// then node1 should get the container
scheduler.handle(node1UpdateEvent);
assertEquals(1, app.getLiveContainers().size());
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAndDemoteBeforeContainerStart.
@Test(timeout = 600000)
public void testContainerPromoteAndDemoteBeforeContainerStart() throws Exception {
HashMap<NodeId, MockNM> nodes = new HashMap<>();
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm1.getNodeId(), nm1);
MockNM nm2 = new MockNM("h1:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm2.getNodeId(), nm2);
MockNM nm3 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm3.getNodeId(), nm3);
MockNM nm4 = new MockNM("h2:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm4.getNodeId(), nm4);
nm1.registerNode();
nm2.registerNode();
nm3.registerNode();
nm4.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
RMNode rmNode3 = rm.getRMContext().getRMNodes().get(nm3.getNodeId());
RMNode rmNode4 = rm.getRMContext().getRMNodes().get(nm4.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode3).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode4).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeAddedSchedulerEvent(rmNode3));
amservice.handle(new NodeAddedSchedulerEvent(rmNode4));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode3));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode4));
// All nodes 1 - 4 will be applicable for scheduling.
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(1000);
QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
// Verify Metrics
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
Assert.assertEquals(2, allocatedContainers.size());
Container container = allocatedContainers.get(0);
MockNM allocNode = nodes.get(container.getNodeId());
MockNM sameHostDiffNode = null;
for (NodeId n : nodes.keySet()) {
if (n.getHost().equals(allocNode.getNodeId().getHost()) && n.getPort() != allocNode.getNodeId().getPort()) {
sameHostDiffNode = nodes.get(n);
}
}
// Verify Metrics After OPP allocation (Nothing should change)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
// Node on same host should not result in allocation
sameHostDiffNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
// Verify Metrics After OPP allocation (Nothing should change again)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
// Send Promotion req again... this should result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Send Promotion req again with incorrect version...
// this should also result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(1, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("INCORRECT_CONTAINER_VERSION_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(0, allocateResponse.getUpdateErrors().get(0).getCurrentContainerVersion());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Ensure after correct node heartbeats, we should get the allocation
allocNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
// Verify Metrics After OPP allocation :
// Allocated cores+mem should have increased, available should decrease
verifyMetrics(metrics, 14336, 14, 2048, 2, 2);
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(200);
// Verify that the container is still in ACQUIRED state wrt the RM.
RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
Assert.assertEquals(RMContainerState.ACQUIRED, rmContainer.getState());
// Now demote the container back..
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(uc.getVersion(), uc.getId(), ContainerUpdateType.DEMOTE_EXECUTION_TYPE, null, ExecutionType.OPPORTUNISTIC)));
// This should happen in the same heartbeat..
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.OPPORTUNISTIC, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 2);
// Verify Metrics After OPP allocation :
// Everything should have reverted to what it was
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent in project hadoop by apache.
the class TestFifoScheduler method testBlackListNodes.
@Test(timeout = 50000)
public void testBlackListNodes() throws Exception {
Configuration conf = new Configuration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class, ResourceScheduler.class);
MockRM rm = new MockRM(conf);
rm.start();
FifoScheduler fs = (FifoScheduler) rm.getResourceScheduler();
int rack_num_0 = 0;
int rack_num_1 = 1;
// Add 4 nodes in 2 racks
// host_0_0 in rack0
String host_0_0 = "127.0.0.1";
RMNode n1 = MockNodes.newNodeInfo(rack_num_0, MockNodes.newResource(4 * GB), 1, host_0_0);
fs.handle(new NodeAddedSchedulerEvent(n1));
// host_0_1 in rack0
String host_0_1 = "127.0.0.2";
RMNode n2 = MockNodes.newNodeInfo(rack_num_0, MockNodes.newResource(4 * GB), 1, host_0_1);
fs.handle(new NodeAddedSchedulerEvent(n2));
// host_1_0 in rack1
String host_1_0 = "127.0.0.3";
RMNode n3 = MockNodes.newNodeInfo(rack_num_1, MockNodes.newResource(4 * GB), 1, host_1_0);
fs.handle(new NodeAddedSchedulerEvent(n3));
// host_1_1 in rack1
String host_1_1 = "127.0.0.4";
RMNode n4 = MockNodes.newNodeInfo(rack_num_1, MockNodes.newResource(4 * GB), 1, host_1_1);
fs.handle(new NodeAddedSchedulerEvent(n4));
// Add one application
ApplicationId appId1 = BuilderUtils.newApplicationId(100, 1);
ApplicationAttemptId appAttemptId1 = BuilderUtils.newApplicationAttemptId(appId1, 1);
createMockRMApp(appAttemptId1, rm.getRMContext());
SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId1, "queue", "user");
fs.handle(appEvent);
SchedulerEvent attemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId1, false);
fs.handle(attemptEvent);
List<ContainerId> emptyId = new ArrayList<ContainerId>();
List<ResourceRequest> emptyAsk = new ArrayList<ResourceRequest>();
// Allow rack-locality for rack_1, but blacklist host_1_0
// Set up resource requests
// Ask for a 1 GB container for app 1
List<ResourceRequest> ask1 = new ArrayList<ResourceRequest>();
ask1.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), "rack1", BuilderUtils.newResource(GB, 1), 1, RMNodeLabelsManager.NO_LABEL));
ask1.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1, RMNodeLabelsManager.NO_LABEL));
fs.allocate(appAttemptId1, ask1, emptyId, Collections.singletonList(host_1_0), null, NULL_UPDATE_REQUESTS);
// Trigger container assignment
fs.handle(new NodeUpdateSchedulerEvent(n3));
// Get the allocation for the application and verify no allocation on
// blacklist node
Allocation allocation1 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("allocation1", 0, allocation1.getContainers().size());
// verify host_1_1 can get allocated as not in blacklist
fs.handle(new NodeUpdateSchedulerEvent(n4));
Allocation allocation2 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("allocation2", 1, allocation2.getContainers().size());
List<Container> containerList = allocation2.getContainers();
for (Container container : containerList) {
Assert.assertEquals("Container is allocated on n4", container.getNodeId(), n4.getNodeID());
}
// Ask for a 1 GB container again for app 1
List<ResourceRequest> ask2 = new ArrayList<ResourceRequest>();
// this time, rack0 is also in blacklist, so only host_1_1 is available to
// be assigned
ask2.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1));
fs.allocate(appAttemptId1, ask2, emptyId, Collections.singletonList("rack0"), null, NULL_UPDATE_REQUESTS);
// verify n1 is not qualified to be allocated
fs.handle(new NodeUpdateSchedulerEvent(n1));
Allocation allocation3 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("allocation3", 0, allocation3.getContainers().size());
// verify n2 is not qualified to be allocated
fs.handle(new NodeUpdateSchedulerEvent(n2));
Allocation allocation4 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("allocation4", 0, allocation4.getContainers().size());
// verify n3 is not qualified to be allocated
fs.handle(new NodeUpdateSchedulerEvent(n3));
Allocation allocation5 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("allocation5", 0, allocation5.getContainers().size());
fs.handle(new NodeUpdateSchedulerEvent(n4));
Allocation allocation6 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("allocation6", 1, allocation6.getContainers().size());
containerList = allocation6.getContainers();
for (Container container : containerList) {
Assert.assertEquals("Container is allocated on n4", container.getNodeId(), n4.getNodeID());
}
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent in project hadoop by apache.
the class TestFifoScheduler method testNodeLocalAssignment.
@Test(timeout = 2000)
public void testNodeLocalAssignment() throws Exception {
AsyncDispatcher dispatcher = new InlineDispatcher();
Configuration conf = new Configuration();
RMContainerTokenSecretManager containerTokenSecretManager = new RMContainerTokenSecretManager(conf);
containerTokenSecretManager.rollMasterKey();
NMTokenSecretManagerInRM nmTokenSecretManager = new NMTokenSecretManagerInRM(conf);
nmTokenSecretManager.rollMasterKey();
RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
FifoScheduler scheduler = new FifoScheduler();
RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null, null, containerTokenSecretManager, nmTokenSecretManager, null, scheduler);
rmContext.setSystemMetricsPublisher(mock(SystemMetricsPublisher.class));
rmContext.setRMApplicationHistoryWriter(mock(RMApplicationHistoryWriter.class));
((RMContextImpl) rmContext).setYarnConfiguration(new YarnConfiguration());
scheduler.setRMContext(rmContext);
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(new Configuration(), rmContext);
RMNode node0 = MockNodes.newNodeInfo(1, Resources.createResource(1024 * 64), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0);
scheduler.handle(nodeEvent1);
int _appId = 1;
int _appAttemptId = 1;
ApplicationAttemptId appAttemptId = createAppAttemptId(_appId, _appAttemptId);
createMockRMApp(appAttemptId, rmContext);
AppAddedSchedulerEvent appEvent = new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue1", "user1");
scheduler.handle(appEvent);
AppAttemptAddedSchedulerEvent attemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId, false);
scheduler.handle(attemptEvent);
int memory = 64;
int nConts = 3;
int priority = 20;
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
ResourceRequest nodeLocal = createResourceRequest(memory, node0.getHostName(), priority, nConts);
ResourceRequest rackLocal = createResourceRequest(memory, node0.getRackName(), priority, nConts);
ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority, nConts);
ask.add(nodeLocal);
ask.add(rackLocal);
ask.add(any);
scheduler.allocate(appAttemptId, ask, new ArrayList<ContainerId>(), null, null, NULL_UPDATE_REQUESTS);
NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0);
// Before the node update event, there are 3 local requests outstanding
Assert.assertEquals(3, nodeLocal.getNumContainers());
scheduler.handle(node0Update);
// After the node update event, check that there are no more local requests
// outstanding
Assert.assertEquals(0, nodeLocal.getNumContainers());
//Also check that the containers were scheduled
SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId);
Assert.assertEquals(3, info.getLiveContainers().size());
scheduler.stop();
}
Aggregations