use of org.apache.hadoop.yarn.api.records.Container in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAndDemoteBeforeContainerStart.
@Test(timeout = 600000)
public void testContainerPromoteAndDemoteBeforeContainerStart() throws Exception {
HashMap<NodeId, MockNM> nodes = new HashMap<>();
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm1.getNodeId(), nm1);
MockNM nm2 = new MockNM("h1:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm2.getNodeId(), nm2);
MockNM nm3 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm3.getNodeId(), nm3);
MockNM nm4 = new MockNM("h2:4321", 4096, rm.getResourceTrackerService());
nodes.put(nm4.getNodeId(), nm4);
nm1.registerNode();
nm2.registerNode();
nm3.registerNode();
nm4.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
RMNode rmNode3 = rm.getRMContext().getRMNodes().get(nm3.getNodeId());
RMNode rmNode4 = rm.getRMContext().getRMNodes().get(nm4.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode3).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode4).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeAddedSchedulerEvent(rmNode3));
amservice.handle(new NodeAddedSchedulerEvent(rmNode4));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode3));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode4));
// All nodes 1 - 4 will be applicable for scheduling.
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(1000);
QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
// Verify Metrics
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
Assert.assertEquals(2, allocatedContainers.size());
Container container = allocatedContainers.get(0);
MockNM allocNode = nodes.get(container.getNodeId());
MockNM sameHostDiffNode = null;
for (NodeId n : nodes.keySet()) {
if (n.getHost().equals(allocNode.getNodeId().getHost()) && n.getPort() != allocNode.getNodeId().getPort()) {
sameHostDiffNode = nodes.get(n);
}
}
// Verify Metrics After OPP allocation (Nothing should change)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
// Node on same host should not result in allocation
sameHostDiffNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
// Verify Metrics After OPP allocation (Nothing should change again)
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
// Send Promotion req again... this should result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Send Promotion req again with incorrect version...
// this should also result in update error
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(1, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("INCORRECT_CONTAINER_VERSION_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(0, allocateResponse.getUpdateErrors().get(0).getCurrentContainerVersion());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Ensure after correct node heartbeats, we should get the allocation
allocNode.nodeHeartbeat(true);
Thread.sleep(200);
allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
// Verify Metrics After OPP allocation :
// Allocated cores+mem should have increased, available should decrease
verifyMetrics(metrics, 14336, 14, 2048, 2, 2);
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
nm3.nodeHeartbeat(true);
nm4.nodeHeartbeat(true);
Thread.sleep(200);
// Verify that the container is still in ACQUIRED state wrt the RM.
RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
Assert.assertEquals(RMContainerState.ACQUIRED, rmContainer.getState());
// Now demote the container back..
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(uc.getVersion(), uc.getId(), ContainerUpdateType.DEMOTE_EXECUTION_TYPE, null, ExecutionType.OPPORTUNISTIC)));
// This should happen in the same heartbeat..
Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
Assert.assertEquals(ExecutionType.OPPORTUNISTIC, uc.getExecutionType());
Assert.assertEquals(uc.getId(), container.getId());
Assert.assertEquals(uc.getVersion(), container.getVersion() + 2);
// Verify Metrics After OPP allocation :
// Everything should have reverted to what it was
verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
}
use of org.apache.hadoop.yarn.api.records.Container in project hadoop by apache.
the class TestApplicationMasterService method testRMIdentifierOnContainerAllocation.
@Test(timeout = 3000000)
public void testRMIdentifierOnContainerAllocation() throws Exception {
MockRM rm = new MockRM(conf);
rm.start();
// Register node1
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB);
// Submit an application
RMApp app1 = rm.submitApp(2048);
// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
am1.addRequests(new String[] { "127.0.0.1" }, GB, 1, 1);
// send the request
AllocateResponse alloc1Response = am1.schedule();
// kick the scheduler
nm1.nodeHeartbeat(true);
while (alloc1Response.getAllocatedContainers().size() < 1) {
LOG.info("Waiting for containers to be created for app 1...");
sleep(1000);
alloc1Response = am1.schedule();
}
// assert RMIdentifer is set properly in allocated containers
Container allocatedContainer = alloc1Response.getAllocatedContainers().get(0);
ContainerTokenIdentifier tokenId = BuilderUtils.newContainerTokenIdentifier(allocatedContainer.getContainerToken());
Assert.assertEquals(MockRM.getClusterTimeStamp(), tokenId.getRMIdentifier());
rm.stop();
}
use of org.apache.hadoop.yarn.api.records.Container in project hadoop by apache.
the class TestApplicationCleanup method testProcessingNMContainerStatusesOnNMRestart.
// The test verifies processing of NMContainerStatuses which are sent during
// NM registration.
// 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
// 2. AM sends ResourceRequest for 1 container with memory 2048MB.
// 3. Verify for number of container allocated by RM
// 4. Verify Memory Usage by cluster, it should be 3072. AM memory + requested
// memory. 1024 + 2048=3072
// 5. Re-register NM by sending completed container status
// 6. Verify for Memory Used, it should be 1024
// 7. Send AM heatbeat to RM. Allocated response should contain completed
// container.
@Test(timeout = 60000)
public void testProcessingNMContainerStatusesOnNMRestart() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// 1. Start the cluster-RM,NM,Submit app with 1024MB,Launch & register AM
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
int nmMemory = 8192;
int amMemory = 1024;
int containerMemory = 2048;
MockNM nm1 = new MockNM("127.0.0.1:1234", nmMemory, rm1.getResourceTrackerService());
nm1.registerNode();
RMApp app0 = rm1.submitApp(amMemory);
MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1);
// 2. AM sends ResourceRequest for 1 container with memory 2048MB.
int noOfContainers = 1;
List<Container> allocateContainers = am0.allocateAndWaitForContainers(noOfContainers, containerMemory, nm1);
// 3. Verify for number of container allocated by RM
Assert.assertEquals(noOfContainers, allocateContainers.size());
Container container = allocateContainers.get(0);
nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.RUNNING);
nm1.nodeHeartbeat(am0.getApplicationAttemptId(), container.getId().getContainerId(), ContainerState.RUNNING);
rm1.waitForState(app0.getApplicationId(), RMAppState.RUNNING);
// 4. Verify Memory Usage by cluster, it should be 3072. AM memory +
// requested memory. 1024 + 2048=3072
ResourceScheduler rs = rm1.getRMContext().getScheduler();
long allocatedMB = rs.getRootQueueMetrics().getAllocatedMB();
Assert.assertEquals(amMemory + containerMemory, allocatedMB);
// 5. Re-register NM by sending completed container status
List<NMContainerStatus> nMContainerStatusForApp = createNMContainerStatusForApp(am0);
nm1.registerNode(nMContainerStatusForApp, Arrays.asList(app0.getApplicationId()));
waitForClusterMemory(nm1, rs, amMemory);
// 6. Verify for Memory Used, it should be 1024
Assert.assertEquals(amMemory, rs.getRootQueueMetrics().getAllocatedMB());
// 7. Send AM heatbeat to RM. Allocated response should contain completed
// container
AllocateRequest req = AllocateRequest.newInstance(0, 0F, new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>(), null);
AllocateResponse allocate = am0.allocate(req);
List<ContainerStatus> completedContainersStatuses = allocate.getCompletedContainersStatuses();
Assert.assertEquals(noOfContainers, completedContainersStatuses.size());
// Application clean up should happen Cluster memory used is 0
nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
waitForClusterMemory(nm1, rs, 0);
rm1.stop();
}
use of org.apache.hadoop.yarn.api.records.Container in project hadoop by apache.
the class RegularContainerAllocator method doAllocation.
ContainerAllocation doAllocation(ContainerAllocation allocationResult, FiCaSchedulerNode node, SchedulerRequestKey schedulerKey, RMContainer reservedContainer) {
// Create the container if necessary
Container container = getContainer(reservedContainer, node, allocationResult.getResourceToBeAllocated(), schedulerKey);
// something went wrong getting/creating the container
if (container == null) {
application.updateAppSkipNodeDiagnostics("Scheduling of container failed. ");
LOG.warn("Couldn't get container for allocation!");
ActivitiesLogger.APP.recordAppActivityWithoutAllocation(activitiesManager, node, application, schedulerKey.getPriority(), ActivityDiagnosticConstant.COULD_NOT_GET_CONTAINER, ActivityState.REJECTED);
return ContainerAllocation.APP_SKIPPED;
}
if (allocationResult.getAllocationState() == AllocationState.ALLOCATED) {
// When allocating container
allocationResult = handleNewContainerAllocation(allocationResult, node, schedulerKey, container);
} else {
// When reserving container
RMContainer updatedContainer = reservedContainer;
if (updatedContainer == null) {
updatedContainer = new RMContainerImpl(container, schedulerKey, application.getApplicationAttemptId(), node.getNodeID(), application.getAppSchedulingInfo().getUser(), rmContext);
}
allocationResult.updatedContainer = updatedContainer;
}
// reservedContainer != null, it's not the first time)
if (reservedContainer == null) {
// This helps apps with many off-cluster requests schedule faster.
if (allocationResult.containerNodeType != NodeType.OFF_SWITCH) {
if (LOG.isDebugEnabled()) {
LOG.debug("Resetting scheduling opportunities");
}
// RACK_LOCAL without delay.
if (allocationResult.containerNodeType == NodeType.NODE_LOCAL || application.getCSLeafQueue().getRackLocalityFullReset()) {
application.resetSchedulingOpportunities(schedulerKey);
}
}
// most likely allocated on non-labeled nodes first.
if (StringUtils.equals(node.getPartition(), RMNodeLabelsManager.NO_LABEL)) {
application.resetMissedNonPartitionedRequestSchedulingOpportunity(schedulerKey);
}
}
return allocationResult;
}
use of org.apache.hadoop.yarn.api.records.Container in project hadoop by apache.
the class ContainerUpdateContext method matchContainerToOutstandingIncreaseReq.
/**
* Check if a new container is to be matched up against an outstanding
* Container increase request.
* @param node SchedulerNode.
* @param schedulerKey SchedulerRequestKey.
* @param rmContainer RMContainer.
* @return ContainerId.
*/
public ContainerId matchContainerToOutstandingIncreaseReq(SchedulerNode node, SchedulerRequestKey schedulerKey, RMContainer rmContainer) {
ContainerId retVal = null;
Container container = rmContainer.getContainer();
Map<Resource, Map<NodeId, Set<ContainerId>>> resourceMap = outstandingIncreases.get(schedulerKey);
if (resourceMap != null) {
Map<NodeId, Set<ContainerId>> locationMap = resourceMap.get(container.getResource());
if (locationMap != null) {
Set<ContainerId> containerIds = locationMap.get(container.getNodeId());
if (containerIds != null && !containerIds.isEmpty()) {
retVal = containerIds.iterator().next();
}
}
}
// We also need to add these requests back.. to be reallocated.
if (resourceMap != null && retVal == null) {
Map<SchedulerRequestKey, Map<String, ResourceRequest>> reqsToUpdate = new HashMap<>();
Map<String, ResourceRequest> resMap = createResourceRequests(rmContainer, node, schedulerKey, rmContainer.getContainer().getResource());
reqsToUpdate.put(schedulerKey, resMap);
appSchedulingInfo.addToPlacementSets(true, reqsToUpdate);
return UNDEFINED;
}
return retVal;
}
Aggregations