use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestIncreaseAllocationExpirer method testConsecutiveContainerIncreaseAllocationExpiration.
@Test
public void testConsecutiveContainerIncreaseAllocationExpiration() throws Exception {
/**
* 1. Allocate 1 container: containerId2 (1G)
* 2. Increase resource of containerId2: 1G -> 3G
* 3. AM acquires the token
* 4. Increase resource of containerId2 again: 3G -> 5G
* 5. AM acquires the token
* 6. AM uses the first token to increase the container in NM to 3G
* 7. AM NEVER uses the second token
* 8. Verify containerId2 eventually is allocated 3G after token expires
* 9. Verify NM eventually uses 3G for containerId2
*/
// Set the allocation expiration to 5 seconds
conf.setLong(YarnConfiguration.RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS, 5000);
MockRM rm1 = new MockRM(conf);
rm1.start();
// Submit an application
MockNM nm1 = rm1.registerNode("127.0.0.1:1234", 20 * GB);
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 1, ContainerState.RUNNING);
// AM request a new container
am1.allocate("127.0.0.1", 1 * GB, 1, new ArrayList<ContainerId>());
ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED);
// AM acquire a new container to start container allocation expirer
am1.allocate(null, null).getAllocatedContainers();
// Report container status
nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 2, ContainerState.RUNNING);
// Wait until container status is RUNNING, and is removed from
// allocation expirer
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
// am1 asks to change containerId2 from 1GB to 3GB
am1.sendContainerResizingRequest(Collections.singletonList(UpdateContainerRequest.newInstance(0, containerId2, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(3 * GB), null)));
// Kick off scheduling and sleep for 1 second to
// make sure the allocation is done
nm1.nodeHeartbeat(true);
Thread.sleep(1000);
// Start container increase allocation expirer
am1.allocate(null, null);
// Remember the resource (3G) in order to report status
Resource resource1 = Resources.clone(rm1.getResourceScheduler().getRMContainer(containerId2).getAllocatedResource());
// This should not work, since the container version is wrong
AllocateResponse response = am1.sendContainerResizingRequest(Collections.singletonList(UpdateContainerRequest.newInstance(0, containerId2, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(5 * GB), null)));
List<UpdateContainerError> updateErrors = response.getUpdateErrors();
Assert.assertEquals(1, updateErrors.size());
Assert.assertEquals("INCORRECT_CONTAINER_VERSION_ERROR", updateErrors.get(0).getReason());
Assert.assertEquals(1, updateErrors.get(0).getCurrentContainerVersion());
// am1 asks to change containerId2 from 3GB to 5GB
am1.sendContainerResizingRequest(Collections.singletonList(UpdateContainerRequest.newInstance(1, containerId2, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(5 * GB), null)));
// Kick off scheduling and sleep for 1 second to
// make sure the allocation is done
nm1.nodeHeartbeat(true);
Thread.sleep(1000);
// Reset container increase allocation expirer
am1.allocate(null, null);
// Verify current resource allocation in RM
checkUsedResource(rm1, "default", 6 * GB, null);
FiCaSchedulerApp app = TestUtils.getFiCaSchedulerApp(rm1, app1.getApplicationId());
Assert.assertEquals(6 * GB, app.getAppAttemptResourceUsage().getUsed().getMemorySize());
// Verify available resource is now reduced to 14GB
verifyAvailableResourceOfSchedulerNode(rm1, nm1.getNodeId(), 14 * GB);
// Use the first token (3G)
nm1.containerIncreaseStatus(getContainer(rm1, containerId2, resource1));
// Wait long enough for the second token (5G) to expire, and verify that
// the roll back action is completed as expected
Thread.sleep(10000);
am1.allocate(null, null);
Thread.sleep(2000);
// Verify container size is rolled back to 3G
Assert.assertEquals(3 * GB, rm1.getResourceScheduler().getRMContainer(containerId2).getAllocatedResource().getMemorySize());
// Verify total resource usage is 4G
checkUsedResource(rm1, "default", 4 * GB, null);
Assert.assertEquals(4 * GB, app.getAppAttemptResourceUsage().getUsed().getMemorySize());
// Verify available resource is rolled back to 14GB
verifyAvailableResourceOfSchedulerNode(rm1, nm1.getNodeId(), 16 * GB);
// Verify NM receives the decrease message (3G)
List<Container> containersToDecrease = nm1.nodeHeartbeat(true).getContainersToDecrease();
Assert.assertEquals(1, containersToDecrease.size());
Assert.assertEquals(3 * GB, containersToDecrease.get(0).getResource().getMemorySize());
rm1.stop();
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestIncreaseAllocationExpirer method testDecreaseAfterIncreaseWithAllocationExpiration.
@Test
public void testDecreaseAfterIncreaseWithAllocationExpiration() throws Exception {
/**
* 1. Allocate three containers: containerId2, containerId3, containerId4
* 2. Increase resource of containerId2: 3G -> 6G
* 3. Increase resource of containerId3: 3G -> 6G
* 4. Increase resource of containerId4: 3G -> 6G
* 5. Do NOT use the increase tokens for containerId2 and containerId3
* 6. Decrease containerId2: 6G -> 2G (i.e., below last confirmed resource)
* 7. Decrease containerId3: 6G -> 4G (i.e., above last confirmed resource)
* 8. Decrease containerId4: 6G -> 4G (i.e., above last confirmed resource)
* 9. Use token for containerId4 to increase containerId4 on NM to 6G
* 10. Verify containerId2 eventually uses 2G (removed from expirer)
* 11. verify containerId3 eventually uses 3G (increase token expires)
* 12. Verify containerId4 eventually uses 4G (removed from expirer)
* 13. Verify NM evetually uses 3G for containerId3, 4G for containerId4
*/
// Set the allocation expiration to 5 seconds
conf.setLong(YarnConfiguration.RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS, 5000);
MockRM rm1 = new MockRM(conf);
rm1.start();
// Submit an application
MockNM nm1 = rm1.registerNode("127.0.0.1:1234", 20 * GB);
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 1, ContainerState.RUNNING);
// AM request two new continers
am1.allocate("127.0.0.1", 3 * GB, 3, new ArrayList<ContainerId>());
ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED);
ContainerId containerId3 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 3);
rm1.waitForState(nm1, containerId3, RMContainerState.ALLOCATED);
ContainerId containerId4 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 4);
rm1.waitForState(nm1, containerId4, RMContainerState.ALLOCATED);
// AM acquires tokens to start container allocation expirer
List<Container> containers = am1.allocate(null, null).getAllocatedContainers();
Assert.assertEquals(3, containers.size());
Assert.assertNotNull(containers.get(0).getContainerToken());
Assert.assertNotNull(containers.get(1).getContainerToken());
Assert.assertNotNull(containers.get(2).getContainerToken());
// Report container status
nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 2, ContainerState.RUNNING);
nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 3, ContainerState.RUNNING);
nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 4, ContainerState.RUNNING);
// Wait until container status becomes RUNNING
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
rm1.waitForState(nm1, containerId3, RMContainerState.RUNNING);
rm1.waitForState(nm1, containerId4, RMContainerState.RUNNING);
// am1 asks to change containerId2 and containerId3 from 1GB to 3GB
List<UpdateContainerRequest> increaseRequests = new ArrayList<>();
increaseRequests.add(UpdateContainerRequest.newInstance(0, containerId2, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(6 * GB), null));
increaseRequests.add(UpdateContainerRequest.newInstance(0, containerId3, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(6 * GB), null));
increaseRequests.add(UpdateContainerRequest.newInstance(0, containerId4, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(6 * GB), null));
am1.sendContainerResizingRequest(increaseRequests);
nm1.nodeHeartbeat(true);
Thread.sleep(1000);
// Start container increase allocation expirer
am1.allocate(null, null);
// Decrease containers
List<UpdateContainerRequest> decreaseRequests = new ArrayList<>();
decreaseRequests.add(UpdateContainerRequest.newInstance(1, containerId2, ContainerUpdateType.DECREASE_RESOURCE, Resources.createResource(2 * GB), null));
decreaseRequests.add(UpdateContainerRequest.newInstance(1, containerId3, ContainerUpdateType.DECREASE_RESOURCE, Resources.createResource(4 * GB), null));
decreaseRequests.add(UpdateContainerRequest.newInstance(1, containerId4, ContainerUpdateType.DECREASE_RESOURCE, Resources.createResource(4 * GB), null));
AllocateResponse response = am1.sendContainerResizingRequest(decreaseRequests);
// Verify containers are decreased in scheduler
Assert.assertEquals(3, response.getUpdatedContainers().size());
// Use the token for containerId4 on NM (6G). This should set the last
// confirmed resource to 4G, and cancel the allocation expirer
nm1.containerIncreaseStatus(getContainer(rm1, containerId4, Resources.createResource(6 * GB)));
// Wait for containerId3 token to expire,
Thread.sleep(10000);
am1.allocate(null, null);
Assert.assertEquals(2 * GB, rm1.getResourceScheduler().getRMContainer(containerId2).getAllocatedResource().getMemorySize());
Assert.assertEquals(3 * GB, rm1.getResourceScheduler().getRMContainer(containerId3).getAllocatedResource().getMemorySize());
Assert.assertEquals(4 * GB, rm1.getResourceScheduler().getRMContainer(containerId4).getAllocatedResource().getMemorySize());
// Verify NM receives 2 decrease message
List<Container> containersToDecrease = nm1.nodeHeartbeat(true).getContainersToDecrease();
Assert.assertEquals(2, containersToDecrease.size());
// Sort the list to make sure containerId3 is the first
Collections.sort(containersToDecrease);
Assert.assertEquals(3 * GB, containersToDecrease.get(0).getResource().getMemorySize());
Assert.assertEquals(4 * GB, containersToDecrease.get(1).getResource().getMemorySize());
rm1.stop();
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestContainerAllocation method testExcessReservationThanNodeManagerCapacity.
@Test(timeout = 60000)
public void testExcessReservationThanNodeManagerCapacity() throws Exception {
@SuppressWarnings("resource") MockRM rm = new MockRM(conf);
rm.start();
// Register node1
MockNM nm1 = rm.registerNode("127.0.0.1:1234", 2 * GB, 4);
MockNM nm2 = rm.registerNode("127.0.0.1:2234", 3 * GB, 4);
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
// wait..
int waitCount = 20;
int size = rm.getRMContext().getRMNodes().size();
while ((size = rm.getRMContext().getRMNodes().size()) != 2 && waitCount-- > 0) {
LOG.info("Waiting for node managers to register : " + size);
Thread.sleep(100);
}
Assert.assertEquals(2, rm.getRMContext().getRMNodes().size());
// Submit an application
RMApp app1 = rm.submitApp(128);
// kick the scheduling
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
am1.registerAppAttempt();
LOG.info("sending container requests ");
am1.addRequests(new String[] { "*" }, 2 * GB, 1, 1);
// send the request
AllocateResponse alloc1Response = am1.schedule();
// kick the scheduler
nm1.nodeHeartbeat(true);
int waitCounter = 20;
LOG.info("heartbeating nm1");
while (alloc1Response.getAllocatedContainers().size() < 1 && waitCounter-- > 0) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(500);
alloc1Response = am1.schedule();
}
LOG.info("received container : " + alloc1Response.getAllocatedContainers().size());
// No container should be allocated.
// Internally it should not been reserved.
Assert.assertTrue(alloc1Response.getAllocatedContainers().size() == 0);
LOG.info("heartbeating nm2");
waitCounter = 20;
nm2.nodeHeartbeat(true);
while (alloc1Response.getAllocatedContainers().size() < 1 && waitCounter-- > 0) {
LOG.info("Waiting for containers to be created for app 1...");
Thread.sleep(500);
alloc1Response = am1.schedule();
}
LOG.info("received container : " + alloc1Response.getAllocatedContainers().size());
Assert.assertTrue(alloc1Response.getAllocatedContainers().size() == 1);
rm.stop();
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestContainerResizing method testOrderOfIncreaseContainerRequestAllocation.
@Test
public void testOrderOfIncreaseContainerRequestAllocation() throws Exception {
/**
* There're multiple containers need to be increased, check container will
* be increase sorted by priority, if priority is same, smaller containerId
* container will get preferred
*/
MockRM rm1 = new MockRM() {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return mgr;
}
};
rm1.start();
MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB);
// app1 -> a1
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
FiCaSchedulerApp app = TestUtils.getFiCaSchedulerApp(rm1, app1.getApplicationId());
ApplicationAttemptId attemptId = am1.getApplicationAttemptId();
// Container 2, 3 (priority=3)
allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 3, 2);
// Container 4, 5 (priority=2)
allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 2, 4);
// Container 6, 7 (priority=4)
allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 4, 6);
// am1 asks to change its container[2-7] from 1G to 2G
List<UpdateContainerRequest> increaseRequests = new ArrayList<>();
for (int cId = 2; cId <= 7; cId++) {
ContainerId containerId = ContainerId.newContainerId(am1.getApplicationAttemptId(), cId);
increaseRequests.add(UpdateContainerRequest.newInstance(0, containerId, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(2 * GB), null));
}
am1.sendContainerResizingRequest(increaseRequests);
checkPendingResource(rm1, "default", 6 * GB, null);
Assert.assertEquals(6 * GB, app.getAppAttemptResourceUsage().getPending().getMemorySize());
// Get rmNode1
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
// assignContainer, container-4/5/2 increased (which has highest priority OR
// earlier allocated)
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
AllocateResponse allocateResponse = am1.allocate(null, null);
Assert.assertEquals(3, allocateResponse.getUpdatedContainers().size());
verifyContainerIncreased(allocateResponse, ContainerId.newContainerId(attemptId, 4), 2 * GB);
verifyContainerIncreased(allocateResponse, ContainerId.newContainerId(attemptId, 5), 2 * GB);
verifyContainerIncreased(allocateResponse, ContainerId.newContainerId(attemptId, 2), 2 * GB);
/* Check statuses after allocation */
// There're still 3 pending increase requests
checkPendingResource(rm1, "default", 3 * GB, null);
Assert.assertEquals(3 * GB, app.getAppAttemptResourceUsage().getPending().getMemorySize());
// Queue/user/application's usage will be updated
checkUsedResource(rm1, "default", 10 * GB, null);
Assert.assertEquals(10 * GB, ((LeafQueue) cs.getQueue("default")).getUser("user").getUsed().getMemorySize());
Assert.assertEquals(0 * GB, app.getAppAttemptResourceUsage().getReserved().getMemorySize());
Assert.assertEquals(10 * GB, app.getAppAttemptResourceUsage().getUsed().getMemorySize());
rm1.close();
}
use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.
the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAfterContainerComplete.
@Test(timeout = 600000)
public void testContainerPromoteAfterContainerComplete() throws Exception {
HashMap<NodeId, MockNM> nodes = new HashMap<>();
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm1.getNodeId(), nm1);
MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
nodes.put(nm2.getNodeId(), nm2);
nm1.registerNode();
nm2.registerNode();
OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
ResourceScheduler scheduler = rm.getResourceScheduler();
RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
// Send add and update node events to AM Service.
amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
// All nodes 1 to 2 will be applicable for scheduling.
nm1.nodeHeartbeat(true);
nm2.nodeHeartbeat(true);
Thread.sleep(1000);
QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
// Verify Metrics
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
Assert.assertEquals(2, allocatedContainers.size());
Container container = allocatedContainers.get(0);
MockNM allocNode = nodes.get(container.getNodeId());
// Start Container in NM
allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
Thread.sleep(200);
// Verify that container is actually running wrt the RM..
RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
// Container Completed in the NM
allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.COMPLETE, "", 0)), true);
Thread.sleep(200);
// Verify that container has been removed..
rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
Assert.assertNull(rmContainer);
// Verify Metrics After OPP allocation (Nothing should change)
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
// Send Promotion req... this should result in update error
// Since the container doesn't exist anymore..
allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
Assert.assertEquals(1, allocateResponse.getCompletedContainersStatuses().size());
Assert.assertEquals(container.getId(), allocateResponse.getCompletedContainersStatuses().get(0).getContainerId());
Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
Assert.assertEquals("INVALID_CONTAINER_ID", allocateResponse.getUpdateErrors().get(0).getReason());
Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
// Verify Metrics After OPP allocation (Nothing should change again)
verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
}
Aggregations