use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class TestFifoScheduler method testResourceOverCommit.
@Test(timeout = 60000)
public void testResourceOverCommit() throws Exception {
int waitCount;
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 4 * GB);
RMApp app1 = rm.submitApp(2048);
// kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report, 2 GB used and 2 GB available
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(2 * GB, report_nm1.getAvailableResource().getMemorySize());
// add request for containers
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 2 * GB, 1, 1);
// send the request
AllocateResponse alloc1Response = am1.schedule();
// kick the scheduler, 2 GB given to AM1, resource remaining 0
nm1.nodeHeartbeat(true);
while (alloc1Response.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(1000);
alloc1Response = am1.schedule();
}
List<Container> allocated1 = alloc1Response.getAllocatedContainers();
Assert.assertEquals(1, allocated1.size());
Assert.assertEquals(2 * GB, allocated1.get(0).getResource().getMemorySize());
Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report, 4 GB used and 0 GB available
Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
// check container is assigned with 2 GB.
Container c1 = allocated1.get(0);
Assert.assertEquals(2 * GB, c1.getResource().getMemorySize());
// update node resource to 2 GB, so resource is over-consumed.
Map<NodeId, ResourceOption> nodeResourceMap = new HashMap<NodeId, ResourceOption>();
nodeResourceMap.put(nm1.getNodeId(), ResourceOption.newInstance(Resource.newInstance(2 * GB, 1), -1));
UpdateNodeResourceRequest request = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
rm.getAdminService().updateNodeResource(request);
waitCount = 0;
while (waitCount++ != 20) {
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
if (null != report_nm1 && report_nm1.getAvailableResource().getMemorySize() != 0) {
break;
}
LOG.info("Waiting for RMNodeResourceUpdateEvent to be handled... Tried " + waitCount + " times already..");
Thread.sleep(1000);
}
// Now, the used resource is still 4 GB, and available resource is minus
// value.
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(-2 * GB, report_nm1.getAvailableResource().getMemorySize());
// Check container can complete successfully in case of resource
// over-commitment.
ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
nm1.containerStatus(containerStatus);
waitCount = 0;
while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
LOG.info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already..");
Thread.sleep(100);
}
Assert.assertEquals(1, attempt1.getJustFinishedContainers().size());
Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
// As container return 2 GB back, the available resource becomes 0 again.
Assert.assertEquals(0 * GB, report_nm1.getAvailableResource().getMemorySize());
rm.stop();
}
use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class TestRMWebApp method mockRMContext.
public static RMContext mockRMContext(int numApps, int racks, int numNodes, int mbsPerNode) {
final List<RMApp> apps = MockAsm.newApplications(numApps);
final ConcurrentMap<ApplicationId, RMApp> applicationsMaps = Maps.newConcurrentMap();
for (RMApp app : apps) {
applicationsMaps.put(app.getApplicationId(), app);
}
final List<RMNode> nodes = MockNodes.newNodes(racks, numNodes, newResource(mbsPerNode));
final ConcurrentMap<NodeId, RMNode> nodesMap = Maps.newConcurrentMap();
for (RMNode node : nodes) {
nodesMap.put(node.getNodeID(), node);
}
final List<RMNode> deactivatedNodes = MockNodes.deactivatedNodes(racks, numNodes, newResource(mbsPerNode));
final ConcurrentMap<NodeId, RMNode> deactivatedNodesMap = Maps.newConcurrentMap();
for (RMNode node : deactivatedNodes) {
deactivatedNodesMap.put(node.getNodeID(), node);
}
RMContextImpl rmContext = new RMContextImpl(null, null, null, null, null, null, null, null, null, null) {
@Override
public ConcurrentMap<ApplicationId, RMApp> getRMApps() {
return applicationsMaps;
}
@Override
public ConcurrentMap<NodeId, RMNode> getInactiveRMNodes() {
return deactivatedNodesMap;
}
@Override
public ConcurrentMap<NodeId, RMNode> getRMNodes() {
return nodesMap;
}
};
rmContext.setNodeLabelManager(new NullRMNodeLabelsManager());
rmContext.setYarnConfiguration(new YarnConfiguration());
return rmContext;
}
use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class TestRMAppLogAggregationStatus method testGetLogAggregationStatusForAppReport.
@Test(timeout = 10000)
public void testGetLogAggregationStatusForAppReport() {
YarnConfiguration conf = new YarnConfiguration();
// Disable the log aggregation
conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, false);
RMAppImpl rmApp = (RMAppImpl) createRMApp(conf);
// The log aggregation status should be DISABLED.
Assert.assertEquals(LogAggregationStatus.DISABLED, rmApp.getLogAggregationStatusForAppReport());
// Enable the log aggregation
conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
rmApp = (RMAppImpl) createRMApp(conf);
// If we do not know any NodeManagers for this application , and
// the log aggregation is enabled, the log aggregation status will
// return NOT_START
Assert.assertEquals(LogAggregationStatus.NOT_START, rmApp.getLogAggregationStatusForAppReport());
NodeId nodeId1 = NodeId.newInstance("localhost", 1111);
NodeId nodeId2 = NodeId.newInstance("localhost", 2222);
NodeId nodeId3 = NodeId.newInstance("localhost", 3333);
NodeId nodeId4 = NodeId.newInstance("localhost", 4444);
// If the log aggregation status for all NMs are NOT_START,
// the log aggregation status for this app will return NOT_START
rmApp.aggregateLogReport(nodeId1, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.NOT_START, ""));
rmApp.aggregateLogReport(nodeId2, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.NOT_START, ""));
rmApp.aggregateLogReport(nodeId3, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.NOT_START, ""));
rmApp.aggregateLogReport(nodeId4, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.NOT_START, ""));
Assert.assertEquals(LogAggregationStatus.NOT_START, rmApp.getLogAggregationStatusForAppReport());
rmApp.aggregateLogReport(nodeId1, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.NOT_START, ""));
rmApp.aggregateLogReport(nodeId2, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.RUNNING, ""));
rmApp.aggregateLogReport(nodeId3, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.SUCCEEDED, ""));
rmApp.aggregateLogReport(nodeId4, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.SUCCEEDED, ""));
Assert.assertEquals(LogAggregationStatus.RUNNING, rmApp.getLogAggregationStatusForAppReport());
rmApp.handle(new RMAppEvent(rmApp.getApplicationId(), RMAppEventType.KILL));
Assert.assertTrue(RMAppImpl.isAppInFinalState(rmApp));
// If at least of one log aggregation status for one NM is TIME_OUT,
// others are SUCCEEDED, the log aggregation status for this app will
// return TIME_OUT
rmApp.aggregateLogReport(nodeId1, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.SUCCEEDED, ""));
rmApp.aggregateLogReport(nodeId2, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.TIME_OUT, ""));
rmApp.aggregateLogReport(nodeId3, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.SUCCEEDED, ""));
rmApp.aggregateLogReport(nodeId4, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.SUCCEEDED, ""));
Assert.assertEquals(LogAggregationStatus.TIME_OUT, rmApp.getLogAggregationStatusForAppReport());
// If the log aggregation status for all NMs are SUCCEEDED and Application
// is at the final state, the log aggregation status for this app will
// return SUCCEEDED
rmApp.aggregateLogReport(nodeId1, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.SUCCEEDED, ""));
rmApp.aggregateLogReport(nodeId2, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.SUCCEEDED, ""));
rmApp.aggregateLogReport(nodeId3, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.SUCCEEDED, ""));
rmApp.aggregateLogReport(nodeId4, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.SUCCEEDED, ""));
Assert.assertEquals(LogAggregationStatus.SUCCEEDED, rmApp.getLogAggregationStatusForAppReport());
rmApp = (RMAppImpl) createRMApp(conf);
// If the log aggregation status for at least one of NMs are RUNNING,
// the log aggregation status for this app will return RUNNING
rmApp.aggregateLogReport(nodeId1, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.NOT_START, ""));
rmApp.aggregateLogReport(nodeId2, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.RUNNING, ""));
rmApp.aggregateLogReport(nodeId3, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.NOT_START, ""));
rmApp.aggregateLogReport(nodeId4, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.NOT_START, ""));
Assert.assertEquals(LogAggregationStatus.RUNNING, rmApp.getLogAggregationStatusForAppReport());
// If the log aggregation status for at least one of NMs
// are RUNNING_WITH_FAILURE, the log aggregation status
// for this app will return RUNNING_WITH_FAILURE
rmApp.aggregateLogReport(nodeId1, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.NOT_START, ""));
rmApp.aggregateLogReport(nodeId2, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.RUNNING, ""));
rmApp.aggregateLogReport(nodeId3, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.NOT_START, ""));
rmApp.aggregateLogReport(nodeId4, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.RUNNING_WITH_FAILURE, ""));
Assert.assertEquals(LogAggregationStatus.RUNNING_WITH_FAILURE, rmApp.getLogAggregationStatusForAppReport());
// For node4, the previous log aggregation status is RUNNING_WITH_FAILURE,
// it will not be changed even it get a new log aggregation status
// as RUNNING
rmApp.aggregateLogReport(nodeId1, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.NOT_START, ""));
rmApp.aggregateLogReport(nodeId2, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.RUNNING, ""));
rmApp.aggregateLogReport(nodeId3, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.NOT_START, ""));
rmApp.aggregateLogReport(nodeId4, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.RUNNING, ""));
Assert.assertEquals(LogAggregationStatus.RUNNING_WITH_FAILURE, rmApp.getLogAggregationStatusForAppReport());
rmApp.handle(new RMAppEvent(rmApp.getApplicationId(), RMAppEventType.KILL));
Assert.assertTrue(RMAppImpl.isAppInFinalState(rmApp));
// If at least of one log aggregation status for one NM is FAILED,
// others are either SUCCEEDED or TIME_OUT, and this application is
// at the final state, the log aggregation status for this app
// will return FAILED
rmApp.aggregateLogReport(nodeId1, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.SUCCEEDED, ""));
rmApp.aggregateLogReport(nodeId2, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.TIME_OUT, ""));
rmApp.aggregateLogReport(nodeId3, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.FAILED, ""));
rmApp.aggregateLogReport(nodeId4, LogAggregationReport.newInstance(rmApp.getApplicationId(), LogAggregationStatus.FAILED, ""));
Assert.assertEquals(LogAggregationStatus.FAILED, rmApp.getLogAggregationStatusForAppReport());
}
use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class TestCapacityScheduler method testResourceOverCommit.
@Test
public void testResourceOverCommit() throws Exception {
int waitCount;
Configuration conf = new Configuration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
MockRM rm = new MockRM(conf);
rm.start();
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 4 * GB);
RMApp app1 = rm.submitApp(2048);
// kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report, 2 GB used and 2 GB available
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(2 * GB, report_nm1.getAvailableResource().getMemorySize());
// add request for containers
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 2 * GB, 1, 1);
// send the request
AllocateResponse alloc1Response = am1.schedule();
// kick the scheduler, 2 GB given to AM1, resource remaining 0
nm1.nodeHeartbeat(true);
while (alloc1Response.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(100);
alloc1Response = am1.schedule();
}
List<Container> allocated1 = alloc1Response.getAllocatedContainers();
Assert.assertEquals(1, allocated1.size());
Assert.assertEquals(2 * GB, allocated1.get(0).getResource().getMemorySize());
Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
// check node report, 4 GB used and 0 GB available
Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
// check container is assigned with 2 GB.
Container c1 = allocated1.get(0);
Assert.assertEquals(2 * GB, c1.getResource().getMemorySize());
// update node resource to 2 GB, so resource is over-consumed.
Map<NodeId, ResourceOption> nodeResourceMap = new HashMap<NodeId, ResourceOption>();
nodeResourceMap.put(nm1.getNodeId(), ResourceOption.newInstance(Resource.newInstance(2 * GB, 1), -1));
UpdateNodeResourceRequest request = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
AdminService as = ((MockRM) rm).getAdminService();
as.updateNodeResource(request);
waitCount = 0;
while (waitCount++ != 20) {
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
if (report_nm1.getAvailableResource().getMemorySize() != 0) {
break;
}
LOG.info("Waiting for RMNodeResourceUpdateEvent to be handled... Tried " + waitCount + " times already..");
Thread.sleep(1000);
}
// Now, the used resource is still 4 GB, and available resource is minus value.
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
Assert.assertEquals(-2 * GB, report_nm1.getAvailableResource().getMemorySize());
// Check container can complete successfully in case of resource over-commitment.
ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
nm1.containerStatus(containerStatus);
waitCount = 0;
while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
LOG.info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already..");
Thread.sleep(100);
}
Assert.assertEquals(1, attempt1.getJustFinishedContainers().size());
Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size());
report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
// As container return 2 GB back, the available resource becomes 0 again.
Assert.assertEquals(0 * GB, report_nm1.getAvailableResource().getMemorySize());
// Verify no NPE is trigger in schedule after resource is updated.
am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 3 * GB, 1, 1);
alloc1Response = am1.schedule();
Assert.assertEquals("Shouldn't have enough resource to allocate containers", 0, alloc1Response.getAllocatedContainers().size());
int times = 0;
// try 10 times as scheduling is async process.
while (alloc1Response.getAllocatedContainers().size() < 1 && times++ < 10) {
LOG.info("Waiting for containers to be allocated for app 1... Tried " + times + " times already..");
Thread.sleep(100);
}
Assert.assertEquals("Shouldn't have enough resource to allocate containers", 0, alloc1Response.getAllocatedContainers().size());
rm.stop();
}
use of org.apache.hadoop.yarn.api.records.NodeId in project hadoop by apache.
the class TestRMWebServicesNodes method testNodesResourceUtilization.
@Test
public void testNodesResourceUtilization() throws JSONException, Exception {
WebResource r = resource();
RMNode rmnode1 = getRunningRMNode("h1", 1234, 5120);
NodeId nodeId1 = rmnode1.getNodeID();
RMNodeImpl node = (RMNodeImpl) rm.getRMContext().getRMNodes().get(nodeId1);
NodeHealthStatus nodeHealth = NodeHealthStatus.newInstance(true, "test health report", System.currentTimeMillis());
ResourceUtilization nodeResource = ResourceUtilization.newInstance(4096, 0, (float) 10.5);
ResourceUtilization containerResource = ResourceUtilization.newInstance(2048, 0, (float) 5.05);
NodeStatus nodeStatus = NodeStatus.newInstance(nodeId1, 0, new ArrayList<ContainerStatus>(), null, nodeHealth, containerResource, nodeResource, null);
node.handle(new RMNodeStatusEvent(nodeId1, nodeStatus, null));
rm.waitForState(nodeId1, NodeState.RUNNING);
ClientResponse response = r.path("ws").path("v1").path("cluster").path("nodes").accept(MediaType.APPLICATION_JSON).get(ClientResponse.class);
assertEquals(MediaType.APPLICATION_JSON_TYPE + "; " + JettyUtils.UTF_8, response.getType().toString());
JSONObject json = response.getEntity(JSONObject.class);
assertEquals("incorrect number of elements", 1, json.length());
JSONObject nodes = json.getJSONObject("nodes");
assertEquals("incorrect number of elements", 1, nodes.length());
JSONArray nodeArray = nodes.getJSONArray("node");
assertEquals("incorrect number of elements", 1, nodeArray.length());
JSONObject info = nodeArray.getJSONObject(0);
// verify the resource utilization
verifyNodeInfo(info, rmnode1);
}
Aggregations