use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestNodeLabelContainerAllocation method testAMContainerAllocationWillAlwaysBeExclusive.
@Test
public void testAMContainerAllocationWillAlwaysBeExclusive() throws Exception {
/**
* Test case: Submit one application without partition, trying to allocate a
* node has partition=x, it should fail to allocate since AM container will
* always respect exclusivity for partitions
*/
// set node -> label
mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x", false), NodeLabel.newInstance("y")));
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x")));
// inject node label manager
MockRM rm1 = new MockRM(TestUtils.getConfigurationWithQueueLabels(conf)) {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return mgr;
}
};
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
String nodeIdStr = "h1:1234";
// label = x
MockNM nm1 = rm1.registerNode(nodeIdStr, 8 * GB);
// launch an app to queue b1 (label = y), AM container should be launched in nm3
RMApp app = rm1.submitApp(1 * GB, "app", "user", null, "b1");
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
// Heartbeat for many times, app1 should get nothing
for (int i = 0; i < 50; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
}
Assert.assertTrue("Scheduler diagnostics should have reason for not assigning the node", app.getDiagnostics().toString().contains(CSAMContainerLaunchDiagnosticsConstants.SKIP_AM_ALLOCATION_IN_IGNORE_EXCLUSIVE_MODE));
Assert.assertTrue("Scheduler diagnostics should have last processed node information", app.getDiagnostics().toString().contains(CSAMContainerLaunchDiagnosticsConstants.LAST_NODE_PROCESSED_MSG + nodeIdStr + " ( Partition : [x]"));
Assert.assertEquals(0, cs.getSchedulerNode(nm1.getNodeId()).getNumContainers());
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestNodeLabelContainerAllocation method testPreferenceOfNeedyAppsTowardsNodePartitions.
@Test
public void testPreferenceOfNeedyAppsTowardsNodePartitions() throws Exception {
/**
* Test case: Submit two application to a queue (app1 first then app2), app1
* asked for no-label, app2 asked for label=x, when node1 has label=x
* doing heart beat, app2 will get allocation first, even if app2 submits later
* than app1
*/
// set node -> label
mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x"), NodeLabel.newInstance("y", false)));
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("y")));
// inject node label manager
MockRM rm1 = new MockRM(TestUtils.getConfigurationWithQueueLabels(conf)) {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return mgr;
}
};
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
// label = y
MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB);
// label = <empty>
MockNM nm2 = rm1.registerNode("h2:1234", 100 * GB);
// launch an app to queue b1 (label = y), AM container should be launched in nm2
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "b1");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm2);
// launch another app to queue b1 (label = y), AM container should be launched in nm2
RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "b1");
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
// request container and nm1 do heartbeat (nm2 has label=y), note that app1
// request non-labeled container, and app2 request labeled container, app2
// will get allocated first even if app1 submitted first.
am1.allocate("*", 1 * GB, 8, new ArrayList<ContainerId>());
am2.allocate("*", 1 * GB, 8, new ArrayList<ContainerId>(), "y");
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
// Do node heartbeats many times
for (int i = 0; i < 50; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
}
// App2 will get preference to be allocated on node1, and node1 will be all
// used by App2.
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(am1.getApplicationAttemptId());
FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(am2.getApplicationAttemptId());
// app1 get nothing in nm1 (partition=y)
checkNumOfContainersInAnAppOnGivenNode(0, nm1.getNodeId(), schedulerApp1);
checkNumOfContainersInAnAppOnGivenNode(9, nm2.getNodeId(), schedulerApp1);
// app2 get all resource in nm1 (partition=y)
checkNumOfContainersInAnAppOnGivenNode(8, nm1.getNodeId(), schedulerApp2);
checkNumOfContainersInAnAppOnGivenNode(1, nm2.getNodeId(), schedulerApp2);
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAndDemoteBeforeContainerStart.
@Test(timeout = 600000)
public void testContainerPromoteAndDemoteBeforeContainerStart() throws Exception {
HashMap<NodeId, MockNM> nodes = new HashMap<>();
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm1.getNodeId(), nm1);
MockNM nm2 = new MockNM("h1:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm2.getNodeId(), nm2);
MockNM nm3 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm3.getNodeId(), nm3);
MockNM nm4 = new MockNM("h2:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm4.getNodeId(), nm4);
nm1.registerNode();
nm2.registerNode();
nm3.registerNode();
nm4.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
RMNode rmNode3 = rm.getRMContext().getRMNodes().get(nm3.getNodeId());
RMNode rmNode4 = rm.getRMContext().getRMNodes().get(nm4.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode3).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode4).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeAddedSchedulerEvent(rmNode3));
amservice.handle(new NodeAddedSchedulerEvent(rmNode4));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode3));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode4));
// All nodes 1 - 4 will be applicable for scheduling.
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(1000);
QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
// Verify Metrics
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
Assert.assertEquals(2, allocatedContainers.size());
Container container = allocatedContainers.get(0);
MockNM allocNode = nodes.get(container.getNodeId());
MockNM sameHostDiffNode = null;
for (NodeId n : nodes.keySet()) {
if (n.getHost().equals(allocNode.getNodeId().getHost()) && n.getPort() != allocNode.getNodeId().getPort()) {
sameHostDiffNode = nodes.get(n);
}
}
// Verify Metrics After OPP allocation (Nothing should change)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
// Node on same host should not result in allocation
sameHostDiffNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
// Verify Metrics After OPP allocation (Nothing should change again)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
// Send Promotion req again... this should result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Send Promotion req again with incorrect version...
// this should also result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(1, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("INCORRECT_CONTAINER_VERSION_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(0, allocateResponse.getUpdateErrors().get(0).getCurrentContainerVersion());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Ensure after correct node heartbeats, we should get the allocation
allocNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
// Verify Metrics After OPP allocation :
// Allocated cores+mem should have increased, available should decrease
verifyMetrics(metrics, 14336, 14, 2048, 2, 2);
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(200);
// Verify that the container is still in ACQUIRED state wrt the RM.
RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
Assert.assertEquals(RMContainerState.ACQUIRED, rmContainer.getState());
// Now demote the container back..
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(uc.getVersion(), uc.getId(), ContainerUpdateType.DEMOTE_EXECUTION_TYPE, null, ExecutionType.OPPORTUNISTIC)));
// This should happen in the same heartbeat..
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.OPPORTUNISTIC, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 2);
// Verify Metrics After OPP allocation :
// Everything should have reverted to what it was
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestFifoScheduler method testBlackListNodes.
@Test(timeout = 50000)
public void testBlackListNodes() throws Exception {
Configuration conf = new Configuration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class, ResourceScheduler.class);
MockRM rm = new MockRM(conf);
rm.start();
FifoScheduler fs = (FifoScheduler) rm.getResourceScheduler();
int rack_num_0 = 0;
int rack_num_1 = 1;
// Add 4 nodes in 2 racks
// host_0_0 in rack0
String host_0_0 = "127.0.0.1";
RMNode n1 = MockNodes.newNodeInfo(rack_num_0, MockNodes.newResource(4 * GB), 1, host_0_0);
fs.handle(new NodeAddedSchedulerEvent(n1));
// host_0_1 in rack0
String host_0_1 = "127.0.0.2";
RMNode n2 = MockNodes.newNodeInfo(rack_num_0, MockNodes.newResource(4 * GB), 1, host_0_1);
fs.handle(new NodeAddedSchedulerEvent(n2));
// host_1_0 in rack1
String host_1_0 = "127.0.0.3";
RMNode n3 = MockNodes.newNodeInfo(rack_num_1, MockNodes.newResource(4 * GB), 1, host_1_0);
fs.handle(new NodeAddedSchedulerEvent(n3));
// host_1_1 in rack1
String host_1_1 = "127.0.0.4";
RMNode n4 = MockNodes.newNodeInfo(rack_num_1, MockNodes.newResource(4 * GB), 1, host_1_1);
fs.handle(new NodeAddedSchedulerEvent(n4));
// Add one application
ApplicationId appId1 = BuilderUtils.newApplicationId(100, 1);
ApplicationAttemptId appAttemptId1 = BuilderUtils.newApplicationAttemptId(appId1, 1);
createMockRMApp(appAttemptId1, rm.getRMContext());
SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId1, "queue", "user");
fs.handle(appEvent);
SchedulerEvent attemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId1, false);
fs.handle(attemptEvent);
List<ContainerId> emptyId = new ArrayList<ContainerId>();
List<ResourceRequest> emptyAsk = new ArrayList<ResourceRequest>();
// Allow rack-locality for rack_1, but blacklist host_1_0
// Set up resource requests
// Ask for a 1 GB container for app 1
List<ResourceRequest> ask1 = new ArrayList<ResourceRequest>();
ask1.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), "rack1", BuilderUtils.newResource(GB, 1), 1, RMNodeLabelsManager.NO_LABEL));
ask1.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1, RMNodeLabelsManager.NO_LABEL));
fs.allocate(appAttemptId1, ask1, emptyId, Collections.singletonList(host_1_0), null, NULL_UPDATE_REQUESTS);
// Trigger container assignment
fs.handle(new NodeUpdateSchedulerEvent(n3));
// Get the allocation for the application and verify no allocation on
// blacklist node
Allocation allocation1 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("allocation1", 0, allocation1.getContainers().size());
// verify host_1_1 can get allocated as not in blacklist
fs.handle(new NodeUpdateSchedulerEvent(n4));
Allocation allocation2 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("allocation2", 1, allocation2.getContainers().size());
List<Container> containerList = allocation2.getContainers();
for (Container container : containerList) {
Assert.assertEquals("Container is allocated on n4", container.getNodeId(), n4.getNodeID());
}
// Ask for a 1 GB container again for app 1
List<ResourceRequest> ask2 = new ArrayList<ResourceRequest>();
// this time, rack0 is also in blacklist, so only host_1_1 is available to
// be assigned
ask2.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1));
fs.allocate(appAttemptId1, ask2, emptyId, Collections.singletonList("rack0"), null, NULL_UPDATE_REQUESTS);
// verify n1 is not qualified to be allocated
fs.handle(new NodeUpdateSchedulerEvent(n1));
Allocation allocation3 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("allocation3", 0, allocation3.getContainers().size());
// verify n2 is not qualified to be allocated
fs.handle(new NodeUpdateSchedulerEvent(n2));
Allocation allocation4 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("allocation4", 0, allocation4.getContainers().size());
// verify n3 is not qualified to be allocated
fs.handle(new NodeUpdateSchedulerEvent(n3));
Allocation allocation5 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("allocation5", 0, allocation5.getContainers().size());
fs.handle(new NodeUpdateSchedulerEvent(n4));
Allocation allocation6 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
Assert.assertEquals("allocation6", 1, allocation6.getContainers().size());
containerList = allocation6.getContainers();
for (Container container : containerList) {
Assert.assertEquals("Container is allocated on n4", container.getNodeId(), n4.getNodeID());
}
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent in project hadoop by apache.
the class TestFifoScheduler method testNodeLocalAssignment.
@Test(timeout = 2000)
public void testNodeLocalAssignment() throws Exception {
AsyncDispatcher dispatcher = new InlineDispatcher();
Configuration conf = new Configuration();
RMContainerTokenSecretManager containerTokenSecretManager = new RMContainerTokenSecretManager(conf);
containerTokenSecretManager.rollMasterKey();
NMTokenSecretManagerInRM nmTokenSecretManager = new NMTokenSecretManagerInRM(conf);
nmTokenSecretManager.rollMasterKey();
RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
FifoScheduler scheduler = new FifoScheduler();
RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null, null, containerTokenSecretManager, nmTokenSecretManager, null, scheduler);
rmContext.setSystemMetricsPublisher(mock(SystemMetricsPublisher.class));
rmContext.setRMApplicationHistoryWriter(mock(RMApplicationHistoryWriter.class));
((RMContextImpl) rmContext).setYarnConfiguration(new YarnConfiguration());
scheduler.setRMContext(rmContext);
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(new Configuration(), rmContext);
RMNode node0 = MockNodes.newNodeInfo(1, Resources.createResource(1024 * 64), 1, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0);
scheduler.handle(nodeEvent1);
int _appId = 1;
int _appAttemptId = 1;
ApplicationAttemptId appAttemptId = createAppAttemptId(_appId, _appAttemptId);
createMockRMApp(appAttemptId, rmContext);
AppAddedSchedulerEvent appEvent = new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue1", "user1");
scheduler.handle(appEvent);
AppAttemptAddedSchedulerEvent attemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId, false);
scheduler.handle(attemptEvent);
int memory = 64;
int nConts = 3;
int priority = 20;
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
ResourceRequest nodeLocal = createResourceRequest(memory, node0.getHostName(), priority, nConts);
ResourceRequest rackLocal = createResourceRequest(memory, node0.getRackName(), priority, nConts);
ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority, nConts);
ask.add(nodeLocal);
ask.add(rackLocal);
ask.add(any);
scheduler.allocate(appAttemptId, ask, new ArrayList<ContainerId>(), null, null, NULL_UPDATE_REQUESTS);
NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0);
// Before the node update event, there are 3 local requests outstanding
Assert.assertEquals(3, nodeLocal.getNumContainers());
scheduler.handle(node0Update);
// After the node update event, check that there are no more local requests
// outstanding
Assert.assertEquals(0, nodeLocal.getNumContainers());
//Also check that the containers were scheduled
SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId);
Assert.assertEquals(3, info.getLiveContainers().size());
scheduler.stop();
}
Aggregations