Search in sources :

Example 51 with AllocateResponse

use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.

the class TestIncreaseAllocationExpirer method testConsecutiveContainerIncreaseAllocationExpiration.

@Test
public void testConsecutiveContainerIncreaseAllocationExpiration() throws Exception {
    /**
     * 1. Allocate 1 container: containerId2 (1G)
     * 2. Increase resource of containerId2: 1G -> 3G
     * 3. AM acquires the token
     * 4. Increase resource of containerId2 again: 3G -> 5G
     * 5. AM acquires the token
     * 6. AM uses the first token to increase the container in NM to 3G
     * 7. AM NEVER uses the second token
     * 8. Verify containerId2 eventually is allocated 3G after token expires
     * 9. Verify NM eventually uses 3G for containerId2
     */
    // Set the allocation expiration to 5 seconds
    conf.setLong(YarnConfiguration.RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS, 5000);
    MockRM rm1 = new MockRM(conf);
    rm1.start();
    // Submit an application
    MockNM nm1 = rm1.registerNode("127.0.0.1:1234", 20 * GB);
    RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default");
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 1, ContainerState.RUNNING);
    // AM request a new container
    am1.allocate("127.0.0.1", 1 * GB, 1, new ArrayList<ContainerId>());
    ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
    rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED);
    // AM acquire a new container to start container allocation expirer
    am1.allocate(null, null).getAllocatedContainers();
    // Report container status
    nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 2, ContainerState.RUNNING);
    // Wait until container status is RUNNING, and is removed from
    // allocation expirer
    rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
    // am1 asks to change containerId2 from 1GB to 3GB
    am1.sendContainerResizingRequest(Collections.singletonList(UpdateContainerRequest.newInstance(0, containerId2, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(3 * GB), null)));
    // Kick off scheduling and sleep for 1 second to
    // make sure the allocation is done
    nm1.nodeHeartbeat(true);
    Thread.sleep(1000);
    // Start container increase allocation expirer
    am1.allocate(null, null);
    // Remember the resource (3G) in order to report status
    Resource resource1 = Resources.clone(rm1.getResourceScheduler().getRMContainer(containerId2).getAllocatedResource());
    // This should not work, since the container version is wrong
    AllocateResponse response = am1.sendContainerResizingRequest(Collections.singletonList(UpdateContainerRequest.newInstance(0, containerId2, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(5 * GB), null)));
    List<UpdateContainerError> updateErrors = response.getUpdateErrors();
    Assert.assertEquals(1, updateErrors.size());
    Assert.assertEquals("INCORRECT_CONTAINER_VERSION_ERROR", updateErrors.get(0).getReason());
    Assert.assertEquals(1, updateErrors.get(0).getCurrentContainerVersion());
    // am1 asks to change containerId2 from 3GB to 5GB
    am1.sendContainerResizingRequest(Collections.singletonList(UpdateContainerRequest.newInstance(1, containerId2, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(5 * GB), null)));
    // Kick off scheduling and sleep for 1 second to
    // make sure the allocation is done
    nm1.nodeHeartbeat(true);
    Thread.sleep(1000);
    // Reset container increase allocation expirer
    am1.allocate(null, null);
    // Verify current resource allocation in RM
    checkUsedResource(rm1, "default", 6 * GB, null);
    FiCaSchedulerApp app = TestUtils.getFiCaSchedulerApp(rm1, app1.getApplicationId());
    Assert.assertEquals(6 * GB, app.getAppAttemptResourceUsage().getUsed().getMemorySize());
    // Verify available resource is now reduced to 14GB
    verifyAvailableResourceOfSchedulerNode(rm1, nm1.getNodeId(), 14 * GB);
    // Use the first token (3G)
    nm1.containerIncreaseStatus(getContainer(rm1, containerId2, resource1));
    // Wait long enough for the second token (5G) to expire, and verify that
    // the roll back action is completed as expected
    Thread.sleep(10000);
    am1.allocate(null, null);
    Thread.sleep(2000);
    // Verify container size is rolled back to 3G
    Assert.assertEquals(3 * GB, rm1.getResourceScheduler().getRMContainer(containerId2).getAllocatedResource().getMemorySize());
    // Verify total resource usage is 4G
    checkUsedResource(rm1, "default", 4 * GB, null);
    Assert.assertEquals(4 * GB, app.getAppAttemptResourceUsage().getUsed().getMemorySize());
    // Verify available resource is rolled back to 14GB
    verifyAvailableResourceOfSchedulerNode(rm1, nm1.getNodeId(), 16 * GB);
    // Verify NM receives the decrease message (3G)
    List<Container> containersToDecrease = nm1.nodeHeartbeat(true).getContainersToDecrease();
    Assert.assertEquals(1, containersToDecrease.size());
    Assert.assertEquals(3 * GB, containersToDecrease.get(0).getResource().getMemorySize());
    rm1.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) Resource(org.apache.hadoop.yarn.api.records.Resource) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) UpdateContainerError(org.apache.hadoop.yarn.api.records.UpdateContainerError) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) Test(org.junit.Test)

Example 52 with AllocateResponse

use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.

the class TestIncreaseAllocationExpirer method testDecreaseAfterIncreaseWithAllocationExpiration.

@Test
public void testDecreaseAfterIncreaseWithAllocationExpiration() throws Exception {
    /**
     * 1. Allocate three containers: containerId2, containerId3, containerId4
     * 2. Increase resource of containerId2: 3G -> 6G
     * 3. Increase resource of containerId3: 3G -> 6G
     * 4. Increase resource of containerId4: 3G -> 6G
     * 5. Do NOT use the increase tokens for containerId2 and containerId3
     * 6. Decrease containerId2: 6G -> 2G (i.e., below last confirmed resource)
     * 7. Decrease containerId3: 6G -> 4G (i.e., above last confirmed resource)
     * 8. Decrease containerId4: 6G -> 4G (i.e., above last confirmed resource)
     * 9. Use token for containerId4 to increase containerId4 on NM to 6G
     * 10. Verify containerId2 eventually uses 2G (removed from expirer)
     * 11. verify containerId3 eventually uses 3G (increase token expires)
     * 12. Verify containerId4 eventually uses 4G (removed from expirer)
     * 13. Verify NM evetually uses 3G for containerId3, 4G for containerId4
     */
    // Set the allocation expiration to 5 seconds
    conf.setLong(YarnConfiguration.RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS, 5000);
    MockRM rm1 = new MockRM(conf);
    rm1.start();
    // Submit an application
    MockNM nm1 = rm1.registerNode("127.0.0.1:1234", 20 * GB);
    RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default");
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 1, ContainerState.RUNNING);
    // AM request two new continers
    am1.allocate("127.0.0.1", 3 * GB, 3, new ArrayList<ContainerId>());
    ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
    rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED);
    ContainerId containerId3 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 3);
    rm1.waitForState(nm1, containerId3, RMContainerState.ALLOCATED);
    ContainerId containerId4 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 4);
    rm1.waitForState(nm1, containerId4, RMContainerState.ALLOCATED);
    // AM acquires tokens to start container allocation expirer
    List<Container> containers = am1.allocate(null, null).getAllocatedContainers();
    Assert.assertEquals(3, containers.size());
    Assert.assertNotNull(containers.get(0).getContainerToken());
    Assert.assertNotNull(containers.get(1).getContainerToken());
    Assert.assertNotNull(containers.get(2).getContainerToken());
    // Report container status
    nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 2, ContainerState.RUNNING);
    nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 3, ContainerState.RUNNING);
    nm1.nodeHeartbeat(app1.getCurrentAppAttempt().getAppAttemptId(), 4, ContainerState.RUNNING);
    // Wait until container status becomes RUNNING
    rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
    rm1.waitForState(nm1, containerId3, RMContainerState.RUNNING);
    rm1.waitForState(nm1, containerId4, RMContainerState.RUNNING);
    // am1 asks to change containerId2 and containerId3 from 1GB to 3GB
    List<UpdateContainerRequest> increaseRequests = new ArrayList<>();
    increaseRequests.add(UpdateContainerRequest.newInstance(0, containerId2, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(6 * GB), null));
    increaseRequests.add(UpdateContainerRequest.newInstance(0, containerId3, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(6 * GB), null));
    increaseRequests.add(UpdateContainerRequest.newInstance(0, containerId4, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(6 * GB), null));
    am1.sendContainerResizingRequest(increaseRequests);
    nm1.nodeHeartbeat(true);
    Thread.sleep(1000);
    // Start container increase allocation expirer
    am1.allocate(null, null);
    // Decrease containers
    List<UpdateContainerRequest> decreaseRequests = new ArrayList<>();
    decreaseRequests.add(UpdateContainerRequest.newInstance(1, containerId2, ContainerUpdateType.DECREASE_RESOURCE, Resources.createResource(2 * GB), null));
    decreaseRequests.add(UpdateContainerRequest.newInstance(1, containerId3, ContainerUpdateType.DECREASE_RESOURCE, Resources.createResource(4 * GB), null));
    decreaseRequests.add(UpdateContainerRequest.newInstance(1, containerId4, ContainerUpdateType.DECREASE_RESOURCE, Resources.createResource(4 * GB), null));
    AllocateResponse response = am1.sendContainerResizingRequest(decreaseRequests);
    // Verify containers are decreased in scheduler
    Assert.assertEquals(3, response.getUpdatedContainers().size());
    // Use the token for containerId4 on NM (6G). This should set the last
    // confirmed resource to 4G, and cancel the allocation expirer
    nm1.containerIncreaseStatus(getContainer(rm1, containerId4, Resources.createResource(6 * GB)));
    // Wait for containerId3 token to expire,
    Thread.sleep(10000);
    am1.allocate(null, null);
    Assert.assertEquals(2 * GB, rm1.getResourceScheduler().getRMContainer(containerId2).getAllocatedResource().getMemorySize());
    Assert.assertEquals(3 * GB, rm1.getResourceScheduler().getRMContainer(containerId3).getAllocatedResource().getMemorySize());
    Assert.assertEquals(4 * GB, rm1.getResourceScheduler().getRMContainer(containerId4).getAllocatedResource().getMemorySize());
    // Verify NM receives 2 decrease message
    List<Container> containersToDecrease = nm1.nodeHeartbeat(true).getContainersToDecrease();
    Assert.assertEquals(2, containersToDecrease.size());
    // Sort the list to make sure containerId3 is the first
    Collections.sort(containersToDecrease);
    Assert.assertEquals(3 * GB, containersToDecrease.get(0).getResource().getMemorySize());
    Assert.assertEquals(4 * GB, containersToDecrease.get(1).getResource().getMemorySize());
    rm1.stop();
}
Also used : AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ArrayList(java.util.ArrayList) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) UpdateContainerRequest(org.apache.hadoop.yarn.api.records.UpdateContainerRequest) Test(org.junit.Test)

Example 53 with AllocateResponse

use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.

the class TestContainerAllocation method testExcessReservationThanNodeManagerCapacity.

@Test(timeout = 60000)
public void testExcessReservationThanNodeManagerCapacity() throws Exception {
    @SuppressWarnings("resource") MockRM rm = new MockRM(conf);
    rm.start();
    // Register node1
    MockNM nm1 = rm.registerNode("127.0.0.1:1234", 2 * GB, 4);
    MockNM nm2 = rm.registerNode("127.0.0.1:2234", 3 * GB, 4);
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    // wait..
    int waitCount = 20;
    int size = rm.getRMContext().getRMNodes().size();
    while ((size = rm.getRMContext().getRMNodes().size()) != 2 && waitCount-- > 0) {
        LOG.info("Waiting for node managers to register : " + size);
        Thread.sleep(100);
    }
    Assert.assertEquals(2, rm.getRMContext().getRMNodes().size());
    // Submit an application
    RMApp app1 = rm.submitApp(128);
    // kick the scheduling
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
    am1.registerAppAttempt();
    LOG.info("sending container requests ");
    am1.addRequests(new String[] { "*" }, 2 * GB, 1, 1);
    // send the request
    AllocateResponse alloc1Response = am1.schedule();
    // kick the scheduler
    nm1.nodeHeartbeat(true);
    int waitCounter = 20;
    LOG.info("heartbeating nm1");
    while (alloc1Response.getAllocatedContainers().size() < 1 && waitCounter-- > 0) {
        LOG.info("Waiting for containers to be created for app 1...");
        Thread.sleep(500);
        alloc1Response = am1.schedule();
    }
    LOG.info("received container : " + alloc1Response.getAllocatedContainers().size());
    // No container should be allocated.
    // Internally it should not been reserved.
    Assert.assertTrue(alloc1Response.getAllocatedContainers().size() == 0);
    LOG.info("heartbeating nm2");
    waitCounter = 20;
    nm2.nodeHeartbeat(true);
    while (alloc1Response.getAllocatedContainers().size() < 1 && waitCounter-- > 0) {
        LOG.info("Waiting for containers to be created for app 1...");
        Thread.sleep(500);
        alloc1Response = am1.schedule();
    }
    LOG.info("received container : " + alloc1Response.getAllocatedContainers().size());
    Assert.assertTrue(alloc1Response.getAllocatedContainers().size() == 1);
    rm.stop();
}
Also used : AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) Test(org.junit.Test)

Example 54 with AllocateResponse

use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.

the class TestContainerResizing method testOrderOfIncreaseContainerRequestAllocation.

@Test
public void testOrderOfIncreaseContainerRequestAllocation() throws Exception {
    /**
     * There're multiple containers need to be increased, check container will
     * be increase sorted by priority, if priority is same, smaller containerId
     * container will get preferred
     */
    MockRM rm1 = new MockRM() {

        @Override
        public RMNodeLabelsManager createNodeLabelManager() {
            return mgr;
        }
    };
    rm1.start();
    MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB);
    // app1 -> a1
    RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default");
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    FiCaSchedulerApp app = TestUtils.getFiCaSchedulerApp(rm1, app1.getApplicationId());
    ApplicationAttemptId attemptId = am1.getApplicationAttemptId();
    // Container 2, 3 (priority=3)
    allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 3, 2);
    // Container 4, 5 (priority=2)
    allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 2, 4);
    // Container 6, 7 (priority=4)
    allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 4, 6);
    // am1 asks to change its container[2-7] from 1G to 2G
    List<UpdateContainerRequest> increaseRequests = new ArrayList<>();
    for (int cId = 2; cId <= 7; cId++) {
        ContainerId containerId = ContainerId.newContainerId(am1.getApplicationAttemptId(), cId);
        increaseRequests.add(UpdateContainerRequest.newInstance(0, containerId, ContainerUpdateType.INCREASE_RESOURCE, Resources.createResource(2 * GB), null));
    }
    am1.sendContainerResizingRequest(increaseRequests);
    checkPendingResource(rm1, "default", 6 * GB, null);
    Assert.assertEquals(6 * GB, app.getAppAttemptResourceUsage().getPending().getMemorySize());
    // Get rmNode1
    CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
    RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
    // assignContainer, container-4/5/2 increased (which has highest priority OR
    // earlier allocated)
    cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
    AllocateResponse allocateResponse = am1.allocate(null, null);
    Assert.assertEquals(3, allocateResponse.getUpdatedContainers().size());
    verifyContainerIncreased(allocateResponse, ContainerId.newContainerId(attemptId, 4), 2 * GB);
    verifyContainerIncreased(allocateResponse, ContainerId.newContainerId(attemptId, 5), 2 * GB);
    verifyContainerIncreased(allocateResponse, ContainerId.newContainerId(attemptId, 2), 2 * GB);
    /* Check statuses after allocation */
    // There're still 3 pending increase requests
    checkPendingResource(rm1, "default", 3 * GB, null);
    Assert.assertEquals(3 * GB, app.getAppAttemptResourceUsage().getPending().getMemorySize());
    // Queue/user/application's usage will be updated
    checkUsedResource(rm1, "default", 10 * GB, null);
    Assert.assertEquals(10 * GB, ((LeafQueue) cs.getQueue("default")).getUser("user").getUsed().getMemorySize());
    Assert.assertEquals(0 * GB, app.getAppAttemptResourceUsage().getReserved().getMemorySize());
    Assert.assertEquals(10 * GB, app.getAppAttemptResourceUsage().getUsed().getMemorySize());
    rm1.close();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ArrayList(java.util.ArrayList) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) UpdateContainerRequest(org.apache.hadoop.yarn.api.records.UpdateContainerRequest) Test(org.junit.Test)

Example 55 with AllocateResponse

use of org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse in project hadoop by apache.

the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAfterContainerComplete.

@Test(timeout = 600000)
public void testContainerPromoteAfterContainerComplete() throws Exception {
    HashMap<NodeId, MockNM> nodes = new HashMap<>();
    MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm1.getNodeId(), nm1);
    MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm2.getNodeId(), nm2);
    nm1.registerNode();
    nm2.registerNode();
    OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
    RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
    ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
    ResourceScheduler scheduler = rm.getResourceScheduler();
    RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    ((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
    // Send add and update node events to AM Service.
    amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
    // All nodes 1 to 2 will be applicable for scheduling.
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    Thread.sleep(1000);
    QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
    // Verify Metrics
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
    List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
    Assert.assertEquals(2, allocatedContainers.size());
    Container container = allocatedContainers.get(0);
    MockNM allocNode = nodes.get(container.getNodeId());
    // Start Container in NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
    Thread.sleep(200);
    // Verify that container is actually running wrt the RM..
    RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
    Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
    // Container Completed in the NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.COMPLETE, "", 0)), true);
    Thread.sleep(200);
    // Verify that container has been removed..
    rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
    Assert.assertNull(rmContainer);
    // Verify Metrics After OPP allocation (Nothing should change)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    // Send Promotion req... this should result in update error
    // Since the container doesn't exist anymore..
    allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    Assert.assertEquals(1, allocateResponse.getCompletedContainersStatuses().size());
    Assert.assertEquals(container.getId(), allocateResponse.getCompletedContainersStatuses().get(0).getContainerId());
    Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
    Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
    Assert.assertEquals("INVALID_CONTAINER_ID", allocateResponse.getUpdateErrors().get(0).getReason());
    Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
    // Verify Metrics After OPP allocation (Nothing should change again)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) HashMap(java.util.HashMap) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) DistributedSchedulingAllocateResponse(org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateResponse) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Aggregations

AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)85 Test (org.junit.Test)54 Container (org.apache.hadoop.yarn.api.records.Container)44 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)38 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)31 ArrayList (java.util.ArrayList)24 AllocateRequest (org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest)24 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)19 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)19 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)18 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)17 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)17 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)16 ContainerRequest (org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest)15 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)15 HashMap (java.util.HashMap)14 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)13 NMToken (org.apache.hadoop.yarn.api.records.NMToken)12 UpdatedContainer (org.apache.hadoop.yarn.api.records.UpdatedContainer)12 Configuration (org.apache.hadoop.conf.Configuration)11