use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport in project hadoop by apache.
the class RMNMInfo method getLiveNodeManagers.
/**
* Implements getLiveNodeManagers()
*
* @return JSON formatted string containing statuses of all node managers
*/
// RMNMInfoBeans
@Override
public String getLiveNodeManagers() {
Collection<RMNode> nodes = this.rmContext.getRMNodes().values();
List<InfoMap> nodesInfo = new ArrayList<InfoMap>();
for (final RMNode ni : nodes) {
SchedulerNodeReport report = scheduler.getNodeReport(ni.getNodeID());
InfoMap info = new InfoMap();
info.put("HostName", ni.getHostName());
info.put("Rack", ni.getRackName());
info.put("State", ni.getState().toString());
info.put("NodeId", ni.getNodeID());
info.put("NodeHTTPAddress", ni.getHttpAddress());
info.put("LastHealthUpdate", ni.getLastHealthReportTime());
info.put("HealthReport", ni.getHealthReport());
info.put("NodeManagerVersion", ni.getNodeManagerVersion());
if (report != null) {
info.put("NumContainers", report.getNumContainers());
info.put("UsedMemoryMB", report.getUsedResource().getMemorySize());
info.put("AvailableMemoryMB", report.getAvailableResource().getMemorySize());
}
nodesInfo.add(info);
}
return JSON.toString(nodesInfo);
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport in project hadoop by apache.
the class TestNodeLabelContainerAllocation method testQueueMetricsWithLabelsOnDefaultLabelNode.
@Test
public void testQueueMetricsWithLabelsOnDefaultLabelNode() throws Exception {
/**
* Test case: have a following queue structure:
*
* <pre>
* root
* / \
* a b
* (x) (x)
* </pre>
*
* a/b can access x, both of them has max-capacity-on-x = 50
*
* When doing non-exclusive allocation, app in a (or b) can use 100% of x
* resource.
*/
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(this.conf);
// Define top-level queues
csConf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] { "a", "b" });
csConf.setCapacityByLabel(CapacitySchedulerConfiguration.ROOT, "x", 100);
final String queueA = CapacitySchedulerConfiguration.ROOT + ".a";
csConf.setCapacity(queueA, 25);
csConf.setAccessibleNodeLabels(queueA, toSet("x"));
csConf.setCapacityByLabel(queueA, "x", 50);
csConf.setMaximumCapacityByLabel(queueA, "x", 50);
final String queueB = CapacitySchedulerConfiguration.ROOT + ".b";
csConf.setCapacity(queueB, 75);
csConf.setAccessibleNodeLabels(queueB, toSet("x"));
csConf.setCapacityByLabel(queueB, "x", 50);
csConf.setMaximumCapacityByLabel(queueB, "x", 50);
// set node -> label
mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x", false)));
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x")));
// inject node label manager
MockRM rm1 = new MockRM(csConf) {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return mgr;
}
};
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
// label = x
MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB);
// label = <no_label>
MockNM nm2 = rm1.registerNode("h2:1234", 10 * GB);
// app1 -> a
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm2);
// app1 asks for 3 partition= containers
am1.allocate("*", 1 * GB, 3, new ArrayList<ContainerId>());
// NM1 do 50 heartbeats
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
SchedulerNode schedulerNode1 = cs.getSchedulerNode(nm1.getNodeId());
for (int i = 0; i < 50; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
}
// app1 gets all resource in partition=x (non-exclusive)
Assert.assertEquals(3, schedulerNode1.getNumContainers());
SchedulerNodeReport reportNm1 = rm1.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(3 * GB, reportNm1.getUsedResource().getMemorySize());
Assert.assertEquals(7 * GB, reportNm1.getAvailableResource().getMemorySize());
SchedulerNodeReport reportNm2 = rm1.getResourceScheduler().getNodeReport(nm2.getNodeId());
Assert.assertEquals(1 * GB, reportNm2.getUsedResource().getMemorySize());
Assert.assertEquals(9 * GB, reportNm2.getAvailableResource().getMemorySize());
LeafQueue leafQueue = (LeafQueue) cs.getQueue("a");
double delta = 0.0001;
// 3GB is used from label x quota. 1.5 GB is remaining from default label.
// 2GB is remaining from label x.
assertEquals(3.5 * GB, leafQueue.getMetrics().getAvailableMB(), delta);
assertEquals(4 * GB, leafQueue.getMetrics().getAllocatedMB());
// app1 asks for 1 default partition container
am1.allocate("*", 1 * GB, 5, new ArrayList<ContainerId>());
// NM2 do couple of heartbeats
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
SchedulerNode schedulerNode2 = cs.getSchedulerNode(nm2.getNodeId());
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
// app1 gets all resource in default partition
Assert.assertEquals(2, schedulerNode2.getNumContainers());
// 3GB is used from label x quota. 2GB used from default label.
// So total 2.5 GB is remaining.
assertEquals(2.5 * GB, leafQueue.getMetrics().getAvailableMB(), delta);
assertEquals(5 * GB, leafQueue.getMetrics().getAllocatedMB());
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport in project hadoop by apache.
the class TestFifoScheduler method testFifoScheduling.
@Test(timeout = 60000)
public void testFifoScheduling() throws Exception {
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB);
MockNM nm2 = rm.registerNode("127.0.0.2:5678", 4 * GB);
RMApp app1 = rm.submitApp(2048);
// kick the scheduling, 2 GB given to AM1, remaining 4GB on nm1
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
RMApp app2 = rm.submitApp(2048);
// kick the scheduling, 2GB given to AM, remaining 2 GB on nm2
nm2.nodeHeartbeat(true);
RMAppAttempt attempt2 = app2.getCurrentAppAttempt();
MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId());
am2.registerAppAttempt();
SchedulerNodeReport report_nm2 = rm.getResourceScheduler().getNodeReport(nm2.getNodeId());
Assert.assertEquals(2 * GB, report_nm2.getUsedResource().getMemorySize());
// add request for containers
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, GB, 1, 1);
// send the request
AllocateResponse alloc1Response = am1.schedule();
// add request for containers
am2.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 3 * GB, 0, 1);
// send the request
AllocateResponse alloc2Response = am2.schedule();
// kick the scheduler, 1 GB and 3 GB given to AM1 and AM2, remaining 0
nm1.nodeHeartbeat(true);
while (alloc1Response.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(1000);
alloc1Response = am1.schedule();
}
while (alloc2Response.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 2...");
Thread.sleep(1000);
alloc2Response = am2.schedule();
}
// kick the scheduler, nothing given remaining 2 GB.
nm2.nodeHeartbeat(true);
List<Container> allocated1 = alloc1Response.getAllocatedContainers();
Assert.assertEquals(1, allocated1.size());
Assert.assertEquals(1 * GB, allocated1.get(0).getResource().getMemorySize());
Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());
List<Container> allocated2 = alloc2Response.getAllocatedContainers();
Assert.assertEquals(1, allocated2.size());
Assert.assertEquals(3 * GB, allocated2.get(0).getResource().getMemorySize());
Assert.assertEquals(nm1.getNodeId(), allocated2.get(0).getNodeId());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
report_nm2 = rm.getResourceScheduler().getNodeReport(nm2.getNodeId());
Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
Assert.assertEquals(2 * GB, report_nm2.getAvailableResource().getMemorySize());
Assert.assertEquals(6 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(2 * GB, report_nm2.getUsedResource().getMemorySize());
Container c1 = allocated1.get(0);
Assert.assertEquals(GB, c1.getResource().getMemorySize());
ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
nm1.containerStatus(containerStatus);
int waitCount = 0;
while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
LOG.info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already..");
Thread.sleep(1000);
}
Assert.assertEquals(1, attempt1.getJustFinishedContainers().size());
Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(5 * GB, report_nm1.getUsedResource().getMemorySize());
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport in project hadoop by apache.
the class TestFifoScheduler method testResourceOverCommit.
@Test(timeout = 60000)
public void testResourceOverCommit() throws Exception {
int waitCount;
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 4 * GB);
RMApp app1 = rm.submitApp(2048);
// kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report, 2 GB used and 2 GB available
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(2 * GB, report_nm1.getAvailableResource().getMemorySize());
// add request for containers
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 2 * GB, 1, 1);
// send the request
AllocateResponse alloc1Response = am1.schedule();
// kick the scheduler, 2 GB given to AM1, resource remaining 0
nm1.nodeHeartbeat(true);
while (alloc1Response.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(1000);
alloc1Response = am1.schedule();
}
List<Container> allocated1 = alloc1Response.getAllocatedContainers();
Assert.assertEquals(1, allocated1.size());
Assert.assertEquals(2 * GB, allocated1.get(0).getResource().getMemorySize());
Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report, 4 GB used and 0 GB available
Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
// check container is assigned with 2 GB.
Container c1 = allocated1.get(0);
Assert.assertEquals(2 * GB, c1.getResource().getMemorySize());
// update node resource to 2 GB, so resource is over-consumed.
Map<NodeId, ResourceOption> nodeResourceMap = new HashMap<NodeId, ResourceOption>();
nodeResourceMap.put(nm1.getNodeId(), ResourceOption.newInstance(Resource.newInstance(2 * GB, 1), -1));
UpdateNodeResourceRequest request = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
rm.getAdminService().updateNodeResource(request);
waitCount = 0;
while (waitCount++ != 20) {
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
if (null != report_nm1 && report_nm1.getAvailableResource().getMemorySize() != 0) {
break;
}
LOG.info("Waiting for RMNodeResourceUpdateEvent to be handled... Tried " + waitCount + " times already..");
Thread.sleep(1000);
}
// Now, the used resource is still 4 GB, and available resource is minus
// value.
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(-2 * GB, report_nm1.getAvailableResource().getMemorySize());
// Check container can complete successfully in case of resource
// over-commitment.
ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
nm1.containerStatus(containerStatus);
waitCount = 0;
while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
LOG.info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already..");
Thread.sleep(100);
}
Assert.assertEquals(1, attempt1.getJustFinishedContainers().size());
Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
// As container return 2 GB back, the available resource becomes 0 again.
Assert.assertEquals(0 * GB, report_nm1.getAvailableResource().getMemorySize());
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport in project hadoop by apache.
the class TestCapacityScheduler method testAppReservationWithDominantResourceCalculator.
// Test to ensure that we don't carry out reservation on nodes
// that have no CPU available when using the DominantResourceCalculator
@Test(timeout = 30000)
public void testAppReservationWithDominantResourceCalculator() throws Exception {
CapacitySchedulerConfiguration csconf = new CapacitySchedulerConfiguration();
csconf.setResourceComparator(DominantResourceCalculator.class);
YarnConfiguration conf = new YarnConfiguration(csconf);
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 10 * GB, 1);
// register extra nodes to bump up cluster resource
MockNM nm2 = rm.registerNode("127.0.0.1:1235", 10 * GB, 4);
rm.registerNode("127.0.0.1:1236", 10 * GB, 4);
RMApp app1 = rm.submitApp(1024);
// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report
Assert.assertEquals(1 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(9 * GB, report_nm1.getAvailableResource().getMemorySize());
// add request for containers
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 1 * GB, 1, 1);
// send the request
am1.schedule();
// kick the scheduler, container reservation should not happen
nm1.nodeHeartbeat(true);
Thread.sleep(1000);
AllocateResponse allocResponse = am1.schedule();
ApplicationResourceUsageReport report = rm.getResourceScheduler().getAppResourceUsageReport(attempt1.getAppAttemptId());
Assert.assertEquals(0, allocResponse.getAllocatedContainers().size());
Assert.assertEquals(0, report.getNumReservedContainers());
// container should get allocated on this node
nm2.nodeHeartbeat(true);
while (allocResponse.getAllocatedContainers().size() == 0) {
Thread.sleep(100);
allocResponse = am1.schedule();
}
report = rm.getResourceScheduler().getAppResourceUsageReport(attempt1.getAppAttemptId());
Assert.assertEquals(1, allocResponse.getAllocatedContainers().size());
Assert.assertEquals(0, report.getNumReservedContainers());
rm.stop();
}
Aggregations