use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport in project hadoop by apache.
the class TestCapacityScheduler method testResourceOverCommit.
@Test
public void testResourceOverCommit() throws Exception {
int waitCount;
Configuration conf = new Configuration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 4 * GB);
RMApp app1 = rm.submitApp(2048);
// kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report, 2 GB used and 2 GB available
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(2 * GB, report_nm1.getAvailableResource().getMemorySize());
// add request for containers
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 2 * GB, 1, 1);
// send the request
AllocateResponse alloc1Response = am1.schedule();
// kick the scheduler, 2 GB given to AM1, resource remaining 0
nm1.nodeHeartbeat(true);
while (alloc1Response.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(100);
alloc1Response = am1.schedule();
}
List<Container> allocated1 = alloc1Response.getAllocatedContainers();
Assert.assertEquals(1, allocated1.size());
Assert.assertEquals(2 * GB, allocated1.get(0).getResource().getMemorySize());
Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report, 4 GB used and 0 GB available
Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
// check container is assigned with 2 GB.
Container c1 = allocated1.get(0);
Assert.assertEquals(2 * GB, c1.getResource().getMemorySize());
// update node resource to 2 GB, so resource is over-consumed.
Map<NodeId, ResourceOption> nodeResourceMap = new HashMap<NodeId, ResourceOption>();
nodeResourceMap.put(nm1.getNodeId(), ResourceOption.newInstance(Resource.newInstance(2 * GB, 1), -1));
UpdateNodeResourceRequest request = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
AdminService as = ((MockRM) rm).getAdminService();
as.updateNodeResource(request);
waitCount = 0;
while (waitCount++ != 20) {
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
if (report_nm1.getAvailableResource().getMemorySize() != 0) {
break;
}
LOG.info("Waiting for RMNodeResourceUpdateEvent to be handled... Tried " + waitCount + " times already..");
Thread.sleep(1000);
}
// Now, the used resource is still 4 GB, and available resource is minus value.
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(-2 * GB, report_nm1.getAvailableResource().getMemorySize());
// Check container can complete successfully in case of resource over-commitment.
ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
nm1.containerStatus(containerStatus);
waitCount = 0;
while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
LOG.info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already..");
Thread.sleep(100);
}
Assert.assertEquals(1, attempt1.getJustFinishedContainers().size());
Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
// As container return 2 GB back, the available resource becomes 0 again.
Assert.assertEquals(0 * GB, report_nm1.getAvailableResource().getMemorySize());
// Verify no NPE is trigger in schedule after resource is updated.
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 3 * GB, 1, 1);
alloc1Response = am1.schedule();
Assert.assertEquals("Shouldn't have enough resource to allocate containers", 0, alloc1Response.getAllocatedContainers().size());
int times = 0;
// try 10 times as scheduling is async process.
while (alloc1Response.getAllocatedContainers().size() < 1 && times++ < 10) {
LOG.info("Waiting for containers to be allocated for app 1... Tried " + times + " times already..");
Thread.sleep(100);
}
Assert.assertEquals("Shouldn't have enough resource to allocate containers", 0, alloc1Response.getAllocatedContainers().size());
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport in project hadoop by apache.
the class TestApplicationPriority method testPriorityWithPendingApplications.
@Test
public void testPriorityWithPendingApplications() throws Exception {
Configuration conf = new Configuration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
// Set Max Application Priority as 10
conf.setInt(YarnConfiguration.MAX_CLUSTER_LEVEL_APPLICATION_PRIORITY, 10);
MockRM rm = new MockRM(conf);
rm.start();
Priority appPriority1 = Priority.newInstance(5);
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 8 * GB);
RMApp app1 = rm.submitApp(1 * GB, appPriority1);
// kick the scheduler, 1 GB given to AM1, remaining 7GB on nm1
MockAM am1 = MockRM.launchAM(app1, rm, nm1);
am1.registerAppAttempt();
// kick the scheduler, 7 containers will be allocated for App1
List<Container> allocated1 = am1.allocateAndWaitForContainers("127.0.0.1", 7, 1 * GB, nm1);
Assert.assertEquals(7, allocated1.size());
Assert.assertEquals(1 * GB, allocated1.get(0).getResource().getMemorySize());
// check node report, 8 GB used (1 AM and 7 containers) and 0 GB available
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(8 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(0 * GB, report_nm1.getAvailableResource().getMemorySize());
// Submit the second app App2 with priority 7
Priority appPriority2 = Priority.newInstance(7);
RMApp app2 = rm.submitApp(1 * GB, appPriority2);
// Submit the third app App3 with priority 8
Priority appPriority3 = Priority.newInstance(8);
RMApp app3 = rm.submitApp(1 * GB, appPriority3);
// Submit the second app App4 with priority 6
Priority appPriority4 = Priority.newInstance(6);
RMApp app4 = rm.submitApp(1 * GB, appPriority4);
// Only one app can run as AM resource limit restricts it. Kill app1,
// If app3 (highest priority among rest) gets active, it indicates that
// priority is working with pendingApplications.
rm.killApp(app1.getApplicationId());
rm.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.KILLED);
// kick the scheduler, app3 (high among pending) gets free space
MockAM am3 = MockRM.launchAM(app3, rm, nm1);
am3.registerAppAttempt();
// check node report, 1 GB used and 7 GB available
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(1 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(7 * GB, report_nm1.getAvailableResource().getMemorySize());
rm.stop();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport in project hadoop by apache.
the class TestRMWebServicesNodes method verifyNodeInfoGeneric.
public void verifyNodeInfoGeneric(RMNode node, String state, String rack, String id, String nodeHostName, String nodeHTTPAddress, long lastHealthUpdate, String healthReport, int numContainers, long usedMemoryMB, long availMemoryMB, long usedVirtualCores, long availVirtualCores, String version, int nodePhysicalMemoryMB, int nodeVirtualMemoryMB, double nodeCPUUsage, int containersPhysicalMemoryMB, int containersVirtualMemoryMB, double containersCPUUsage, int numRunningOpportContainers, long usedMemoryOpportGB, int usedVirtualCoresOpport, int numQueuedContainers) throws JSONException, Exception {
ResourceScheduler sched = rm.getResourceScheduler();
SchedulerNodeReport report = sched.getNodeReport(node.getNodeID());
OpportunisticContainersStatus opportunisticStatus = node.getOpportunisticContainersStatus();
WebServicesTestUtils.checkStringMatch("state", node.getState().toString(), state);
WebServicesTestUtils.checkStringMatch("rack", node.getRackName(), rack);
WebServicesTestUtils.checkStringMatch("id", node.getNodeID().toString(), id);
WebServicesTestUtils.checkStringMatch("nodeHostName", node.getNodeID().getHost(), nodeHostName);
WebServicesTestUtils.checkStringMatch("healthReport", String.valueOf(node.getHealthReport()), healthReport);
String expectedHttpAddress = node.getNodeID().getHost() + ":" + node.getHttpPort();
WebServicesTestUtils.checkStringMatch("nodeHTTPAddress", expectedHttpAddress, nodeHTTPAddress);
WebServicesTestUtils.checkStringMatch("version", node.getNodeManagerVersion(), version);
if (node.getNodeUtilization() != null) {
ResourceUtilization nodeResource = ResourceUtilization.newInstance(nodePhysicalMemoryMB, nodeVirtualMemoryMB, (float) nodeCPUUsage);
assertEquals("nodeResourceUtilization doesn't match", node.getNodeUtilization(), nodeResource);
}
if (node.getAggregatedContainersUtilization() != null) {
ResourceUtilization containerResource = ResourceUtilization.newInstance(containersPhysicalMemoryMB, containersVirtualMemoryMB, (float) containersCPUUsage);
assertEquals("containerResourceUtilization doesn't match", node.getAggregatedContainersUtilization(), containerResource);
}
long expectedHealthUpdate = node.getLastHealthReportTime();
assertEquals("lastHealthUpdate doesn't match, got: " + lastHealthUpdate + " expected: " + expectedHealthUpdate, expectedHealthUpdate, lastHealthUpdate);
if (report != null) {
assertEquals("numContainers doesn't match: " + numContainers, report.getNumContainers(), numContainers);
assertEquals("usedMemoryMB doesn't match: " + usedMemoryMB, report.getUsedResource().getMemorySize(), usedMemoryMB);
assertEquals("availMemoryMB doesn't match: " + availMemoryMB, report.getAvailableResource().getMemorySize(), availMemoryMB);
assertEquals("usedVirtualCores doesn't match: " + usedVirtualCores, report.getUsedResource().getVirtualCores(), usedVirtualCores);
assertEquals("availVirtualCores doesn't match: " + availVirtualCores, report.getAvailableResource().getVirtualCores(), availVirtualCores);
}
if (opportunisticStatus != null) {
assertEquals("numRunningOpportContainers doesn't match: " + numRunningOpportContainers, opportunisticStatus.getRunningOpportContainers(), numRunningOpportContainers);
assertEquals("usedMemoryOpportGB doesn't match: " + usedMemoryOpportGB, opportunisticStatus.getOpportMemoryUsed(), usedMemoryOpportGB);
assertEquals("usedVirtualCoresOpport doesn't match: " + usedVirtualCoresOpport, opportunisticStatus.getOpportCoresUsed(), usedVirtualCoresOpport);
assertEquals("numQueuedContainers doesn't match: " + numQueuedContainers, opportunisticStatus.getQueuedOpportContainers(), numQueuedContainers);
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport in project hadoop by apache.
the class TestNodeLabelContainerAllocation method testQueueMetricsWithLabels.
@Test
public void testQueueMetricsWithLabels() throws Exception {
/**
* Test case: have a following queue structure:
*
* <pre>
* root
* / \
* a b
* (x) (x)
* </pre>
*
* a/b can access x, both of them has max-capacity-on-x = 50
*
* When doing non-exclusive allocation, app in a (or b) can use 100% of x
* resource.
*/
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(this.conf);
// Define top-level queues
csConf.setQueues(CapacitySchedulerConfiguration.ROOT, new String[] { "a", "b" });
csConf.setCapacityByLabel(CapacitySchedulerConfiguration.ROOT, "x", 100);
final String queueA = CapacitySchedulerConfiguration.ROOT + ".a";
csConf.setCapacity(queueA, 25);
csConf.setAccessibleNodeLabels(queueA, toSet("x"));
csConf.setCapacityByLabel(queueA, "x", 50);
csConf.setMaximumCapacityByLabel(queueA, "x", 50);
final String queueB = CapacitySchedulerConfiguration.ROOT + ".b";
csConf.setCapacity(queueB, 75);
csConf.setAccessibleNodeLabels(queueB, toSet("x"));
csConf.setCapacityByLabel(queueB, "x", 50);
csConf.setMaximumCapacityByLabel(queueB, "x", 50);
// set node -> label
mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("x", false)));
mgr.addToCluserNodeLabels(ImmutableSet.of(NodeLabel.newInstance("y", false)));
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h1", 0), toSet("x")));
mgr.addLabelsToNode(ImmutableMap.of(NodeId.newInstance("h2", 0), toSet("y")));
// inject node label manager
MockRM rm1 = new MockRM(csConf) {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return mgr;
}
};
rm1.getRMContext().setNodeLabelManager(mgr);
rm1.start();
// label = x
MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB);
// label = y
MockNM nm2 = rm1.registerNode("h2:1234", 10 * GB);
// app1 -> a
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a", "x");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
// app1 asks for 5 partition=x containers
am1.allocate("*", 1 * GB, 5, new ArrayList<ContainerId>(), "x");
// NM1 do 50 heartbeats
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
SchedulerNode schedulerNode1 = cs.getSchedulerNode(nm1.getNodeId());
for (int i = 0; i < 50; i++) {
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
}
// app1 gets all resource in partition=x
Assert.assertEquals(5, schedulerNode1.getNumContainers());
SchedulerNodeReport reportNm1 = rm1.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(5 * GB, reportNm1.getUsedResource().getMemorySize());
Assert.assertEquals(5 * GB, reportNm1.getAvailableResource().getMemorySize());
SchedulerNodeReport reportNm2 = rm1.getResourceScheduler().getNodeReport(nm2.getNodeId());
Assert.assertEquals(0 * GB, reportNm2.getUsedResource().getMemorySize());
Assert.assertEquals(10 * GB, reportNm2.getAvailableResource().getMemorySize());
LeafQueue leafQueue = (LeafQueue) cs.getQueue("a");
assertEquals(0 * GB, leafQueue.getMetrics().getAvailableMB());
assertEquals(5 * GB, leafQueue.getMetrics().getAllocatedMB());
// Kill all apps in queue a
cs.killAllAppsInQueue("a");
rm1.waitForState(app1.getApplicationId(), RMAppState.KILLED);
rm1.waitForAppRemovedFromScheduler(app1.getApplicationId());
assertEquals(0 * GB, leafQueue.getMetrics().getUsedAMResourceMB());
assertEquals(0, leafQueue.getMetrics().getUsedAMResourceVCores());
rm1.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport in project hadoop by apache.
the class TestApplicationPriority method testApplicationPriorityAllocation.
@Test
public void testApplicationPriorityAllocation() throws Exception {
Configuration conf = new Configuration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
// Set Max Application Priority as 10
conf.setInt(YarnConfiguration.MAX_CLUSTER_LEVEL_APPLICATION_PRIORITY, 10);
MockRM rm = new MockRM(conf);
rm.start();
Priority appPriority1 = Priority.newInstance(5);
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 16 * GB);
RMApp app1 = rm.submitApp(1 * GB, appPriority1);
// kick the scheduler, 1 GB given to AM1, remaining 15GB on nm1
MockAM am1 = MockRM.launchAM(app1, rm, nm1);
am1.registerAppAttempt();
// allocate 7 containers for App1
List<Container> allocated1 = am1.allocateAndWaitForContainers("127.0.0.1", 7, 2 * GB, nm1);
Assert.assertEquals(7, allocated1.size());
Assert.assertEquals(2 * GB, allocated1.get(0).getResource().getMemorySize());
// check node report, 15 GB used (1 AM and 7 containers) and 1 GB available
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(15 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(1 * GB, report_nm1.getAvailableResource().getMemorySize());
// Submit the second app App2 with priority 8 (Higher than App1)
Priority appPriority2 = Priority.newInstance(8);
RMApp app2 = rm.submitApp(1 * GB, appPriority2);
// kick the scheduler, 1 GB which was free is given to AM of App2
MockAM am2 = MockRM.launchAM(app2, rm, nm1);
am2.registerAppAttempt();
// check node report, 16 GB used and 0 GB available
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(16 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(0 * GB, report_nm1.getAvailableResource().getMemorySize());
// get scheduler
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
// get scheduler app
FiCaSchedulerApp schedulerAppAttempt = cs.getSchedulerApplications().get(app1.getApplicationId()).getCurrentAppAttempt();
// kill 2 containers of App1 to free up some space
int counter = 0;
for (Container c : allocated1) {
if (++counter > 2) {
break;
}
cs.markContainerForKillable(schedulerAppAttempt.getRMContainer(c.getId()));
}
// check node report, 12 GB used and 4 GB available
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(12 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(4 * GB, report_nm1.getAvailableResource().getMemorySize());
// send updated request for App1
am1.allocate("127.0.0.1", 2 * GB, 10, new ArrayList<ContainerId>());
// kick the scheduler, since App2 priority is more than App1, it will get
// remaining cluster space.
List<Container> allocated2 = am2.allocateAndWaitForContainers("127.0.0.1", 2, 2 * GB, nm1);
// App2 has got 2 containers now.
Assert.assertEquals(2, allocated2.size());
// check node report, 16 GB used and 0 GB available
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(16 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(0 * GB, report_nm1.getAvailableResource().getMemorySize());
rm.stop();
}
Aggregations