Search in sources :

Example 1 with OpportunisticContainerContext

use of org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext in project hadoop by apache.

the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAndDemoteBeforeContainerStart.

@Test(timeout = 600000)
public void testContainerPromoteAndDemoteBeforeContainerStart() throws Exception {
    HashMap<NodeId, MockNM> nodes = new HashMap<>();
    MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm1.getNodeId(), nm1);
    MockNM nm2 = new MockNM("h1:4321", 4096, rm.getResourceTrackerService());
    nodes.put(nm2.getNodeId(), nm2);
    MockNM nm3 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm3.getNodeId(), nm3);
    MockNM nm4 = new MockNM("h2:4321", 4096, rm.getResourceTrackerService());
    nodes.put(nm4.getNodeId(), nm4);
    nm1.registerNode();
    nm2.registerNode();
    nm3.registerNode();
    nm4.registerNode();
    OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
    RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
    ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
    ResourceScheduler scheduler = rm.getResourceScheduler();
    RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
    RMNode rmNode3 = rm.getRMContext().getRMNodes().get(nm3.getNodeId());
    RMNode rmNode4 = rm.getRMContext().getRMNodes().get(nm4.getNodeId());
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    nm3.nodeHeartbeat(true);
    nm4.nodeHeartbeat(true);
    ((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode3).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode4).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
    // Send add and update node events to AM Service.
    amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode3));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode4));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode3));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode4));
    // All nodes 1 - 4 will be applicable for scheduling.
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    nm3.nodeHeartbeat(true);
    nm4.nodeHeartbeat(true);
    Thread.sleep(1000);
    QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
    // Verify Metrics
    verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
    AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
    List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
    Assert.assertEquals(2, allocatedContainers.size());
    Container container = allocatedContainers.get(0);
    MockNM allocNode = nodes.get(container.getNodeId());
    MockNM sameHostDiffNode = null;
    for (NodeId n : nodes.keySet()) {
        if (n.getHost().equals(allocNode.getNodeId().getHost()) && n.getPort() != allocNode.getNodeId().getPort()) {
            sameHostDiffNode = nodes.get(n);
        }
    }
    // Verify Metrics After OPP allocation (Nothing should change)
    verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
    am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    // Node on same host should not result in allocation
    sameHostDiffNode.nodeHeartbeat(true);
    Thread.sleep(200);
    allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
    Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
    // Verify Metrics After OPP allocation (Nothing should change again)
    verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
    // Send Promotion req again... this should result in update error
    allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
    Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
    Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
    Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
    // Send Promotion req again with incorrect version...
    // this should also result in update error
    allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(1, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
    Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
    Assert.assertEquals("INCORRECT_CONTAINER_VERSION_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
    Assert.assertEquals(0, allocateResponse.getUpdateErrors().get(0).getCurrentContainerVersion());
    Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
    // Ensure after correct node heartbeats, we should get the allocation
    allocNode.nodeHeartbeat(true);
    Thread.sleep(200);
    allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
    Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
    Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
    Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
    Assert.assertEquals(uc.getId(), container.getId());
    Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
    // Verify Metrics After OPP allocation :
    // Allocated cores+mem should have increased, available should decrease
    verifyMetrics(metrics, 14336, 14, 2048, 2, 2);
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    nm3.nodeHeartbeat(true);
    nm4.nodeHeartbeat(true);
    Thread.sleep(200);
    // Verify that the container is still in ACQUIRED state wrt the RM.
    RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
    Assert.assertEquals(RMContainerState.ACQUIRED, rmContainer.getState());
    // Now demote the container back..
    allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(uc.getVersion(), uc.getId(), ContainerUpdateType.DEMOTE_EXECUTION_TYPE, null, ExecutionType.OPPORTUNISTIC)));
    // This should happen in the same heartbeat..
    Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
    uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
    Assert.assertEquals(ExecutionType.OPPORTUNISTIC, uc.getExecutionType());
    Assert.assertEquals(uc.getId(), container.getId());
    Assert.assertEquals(uc.getVersion(), container.getVersion() + 2);
    // Verify Metrics After OPP allocation :
    // Everything should have reverted to what it was
    verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) DistributedSchedulingAllocateResponse(org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateResponse) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) Test(org.junit.Test)

Example 2 with OpportunisticContainerContext

use of org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext in project hadoop by apache.

the class AbstractYarnScheduler method handleDecreaseRequests.

private void handleDecreaseRequests(SchedulerApplicationAttempt appAttempt, List<UpdateContainerRequest> demotionRequests) {
    OpportunisticContainerContext oppCntxt = appAttempt.getOpportunisticContainerContext();
    for (UpdateContainerRequest uReq : demotionRequests) {
        RMContainer rmContainer = rmContext.getScheduler().getRMContainer(uReq.getContainerId());
        if (rmContainer != null) {
            SchedulerNode schedulerNode = rmContext.getScheduler().getSchedulerNode(rmContainer.getContainer().getNodeId());
            if (appAttempt.getUpdateContext().checkAndAddToOutstandingDecreases(uReq, schedulerNode, rmContainer.getContainer())) {
                if (ContainerUpdateType.DEMOTE_EXECUTION_TYPE == uReq.getContainerUpdateType()) {
                    RMContainer demotedRMContainer = createDemotedRMContainer(appAttempt, oppCntxt, rmContainer);
                    appAttempt.addToNewlyDemotedContainers(uReq.getContainerId(), demotedRMContainer);
                } else {
                    RMContainer demotedRMContainer = createDecreasedRMContainer(appAttempt, uReq, rmContainer);
                    appAttempt.addToNewlyDecreasedContainers(uReq.getContainerId(), demotedRMContainer);
                }
            } else {
                appAttempt.addToUpdateContainerErrors(UpdateContainerError.newInstance(RMServerUtils.UPDATE_OUTSTANDING_ERROR, uReq));
            }
        } else {
            LOG.warn("Cannot demote/decrease non-existent (or completed) " + "Container [" + uReq.getContainerId() + "]");
        }
    }
}
Also used : OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) UpdateContainerRequest(org.apache.hadoop.yarn.api.records.UpdateContainerRequest) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)

Example 3 with OpportunisticContainerContext

use of org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext in project hadoop by apache.

the class TestOpportunisticContainerAllocatorAMService method testNodeRemovalDuringAllocate.

@Test(timeout = 60000)
public void testNodeRemovalDuringAllocate() throws Exception {
    MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
    MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
    nm1.registerNode();
    nm2.registerNode();
    OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
    RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
    ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
    ResourceScheduler scheduler = rm.getResourceScheduler();
    RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    ((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
    // Send add and update node events to AM Service.
    amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
    // Both node 1 and node 2 will be applicable for scheduling.
    for (int i = 0; i < 10; i++) {
        am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2)), null);
        if (ctxt.getNodeMap().size() == 2) {
            break;
        }
        Thread.sleep(50);
    }
    Assert.assertEquals(2, ctxt.getNodeMap().size());
    // Remove node from scheduler but not from AM Service.
    scheduler.handle(new NodeRemovedSchedulerEvent(rmNode1));
    // After removal of node 1, only 1 node will be applicable for scheduling.
    for (int i = 0; i < 10; i++) {
        try {
            am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2)), null);
        } catch (Exception e) {
            Assert.fail("Allocate request should be handled on node removal");
        }
        if (ctxt.getNodeMap().size() == 1) {
            break;
        }
        Thread.sleep(50);
    }
    Assert.assertEquals(1, ctxt.getNodeMap().size());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) NodeRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) Test(org.junit.Test)

Example 4 with OpportunisticContainerContext

use of org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext in project hadoop by apache.

the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAfterContainerStart.

@Test(timeout = 60000)
public void testContainerPromoteAfterContainerStart() throws Exception {
    HashMap<NodeId, MockNM> nodes = new HashMap<>();
    MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm1.getNodeId(), nm1);
    MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm2.getNodeId(), nm2);
    nm1.registerNode();
    nm2.registerNode();
    OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
    RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
    ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
    ResourceScheduler scheduler = rm.getResourceScheduler();
    RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    ((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
    // Send add and update node events to AM Service.
    amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
    // All nodes 1 to 2 will be applicable for scheduling.
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    Thread.sleep(1000);
    QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
    // Verify Metrics
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
    List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
    Assert.assertEquals(2, allocatedContainers.size());
    Container container = allocatedContainers.get(0);
    MockNM allocNode = nodes.get(container.getNodeId());
    // Start Container in NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
    Thread.sleep(200);
    // Verify that container is actually running wrt the RM..
    RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
    Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
    // Verify Metrics After OPP allocation (Nothing should change)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    // Verify Metrics After OPP allocation (Nothing should change again)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    // Send Promotion req again... this should result in update error
    allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
    Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
    Assert.assertEquals("UPDATE_OUTSTANDING_ERROR", allocateResponse.getUpdateErrors().get(0).getReason());
    Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
    // Start Container in NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
    Thread.sleep(200);
    allocateResponse = am1.allocate(new ArrayList<>(), new ArrayList<>());
    Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
    Container uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
    Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
    Assert.assertEquals(uc.getId(), container.getId());
    Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
    // Verify that the Container is still in RUNNING state wrt RM..
    rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
    Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
    // Verify Metrics After OPP allocation :
    // Allocated cores+mem should have increased, available should decrease
    verifyMetrics(metrics, 6144, 6, 2048, 2, 2);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) DistributedSchedulingAllocateResponse(org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateResponse) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Example 5 with OpportunisticContainerContext

use of org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext in project hadoop by apache.

the class TestOpportunisticContainerAllocatorAMService method testContainerPromoteAfterContainerComplete.

@Test(timeout = 600000)
public void testContainerPromoteAfterContainerComplete() throws Exception {
    HashMap<NodeId, MockNM> nodes = new HashMap<>();
    MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm1.getNodeId(), nm1);
    MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
    nodes.put(nm2.getNodeId(), nm2);
    nm1.registerNode();
    nm2.registerNode();
    OpportunisticContainerAllocatorAMService amservice = (OpportunisticContainerAllocatorAMService) rm.getApplicationMasterService();
    RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
    ApplicationAttemptId attemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
    ResourceScheduler scheduler = rm.getResourceScheduler();
    RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
    RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    ((RMNodeImpl) rmNode1).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    ((RMNodeImpl) rmNode2).setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
    OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler).getApplicationAttempt(attemptId).getOpportunisticContainerContext();
    // Send add and update node events to AM Service.
    amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
    amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
    amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
    // All nodes 1 to 2 will be applicable for scheduling.
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    Thread.sleep(1000);
    QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue().getMetrics();
    // Verify Metrics
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    AllocateResponse allocateResponse = am1.allocate(Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", Resources.createResource(1 * GB), 2, true, null, ExecutionTypeRequest.newInstance(ExecutionType.OPPORTUNISTIC, true))), null);
    List<Container> allocatedContainers = allocateResponse.getAllocatedContainers();
    Assert.assertEquals(2, allocatedContainers.size());
    Container container = allocatedContainers.get(0);
    MockNM allocNode = nodes.get(container.getNodeId());
    // Start Container in NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)), true);
    Thread.sleep(200);
    // Verify that container is actually running wrt the RM..
    RMContainer rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
    Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
    // Container Completed in the NM
    allocNode.nodeHeartbeat(Arrays.asList(ContainerStatus.newInstance(container.getId(), ExecutionType.OPPORTUNISTIC, ContainerState.COMPLETE, "", 0)), true);
    Thread.sleep(200);
    // Verify that container has been removed..
    rmContainer = ((CapacityScheduler) scheduler).getApplicationAttempt(container.getId().getApplicationAttemptId()).getRMContainer(container.getId());
    Assert.assertNull(rmContainer);
    // Verify Metrics After OPP allocation (Nothing should change)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
    // Send Promotion req... this should result in update error
    // Since the container doesn't exist anymore..
    allocateResponse = am1.sendContainerUpdateRequest(Arrays.asList(UpdateContainerRequest.newInstance(0, container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE, null, ExecutionType.GUARANTEED)));
    Assert.assertEquals(1, allocateResponse.getCompletedContainersStatuses().size());
    Assert.assertEquals(container.getId(), allocateResponse.getCompletedContainersStatuses().get(0).getContainerId());
    Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
    Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
    Assert.assertEquals("INVALID_CONTAINER_ID", allocateResponse.getUpdateErrors().get(0).getReason());
    Assert.assertEquals(container.getId(), allocateResponse.getUpdateErrors().get(0).getUpdateContainerRequest().getContainerId());
    // Verify Metrics After OPP allocation (Nothing should change again)
    verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) HashMap(java.util.HashMap) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) DistributedSchedulingAllocateResponse(org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateResponse) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ResourceScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler) OpportunisticContainerContext(org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext) RMNodeImpl(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl) CapacityScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler) Test(org.junit.Test)

Aggregations

OpportunisticContainerContext (org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext)7 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)5 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)5 Container (org.apache.hadoop.yarn.api.records.Container)4 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)4 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)4 RMNodeImpl (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl)4 ResourceScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler)4 NodeAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent)4 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)4 Test (org.junit.Test)4 HashMap (java.util.HashMap)3 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)3 NodeId (org.apache.hadoop.yarn.api.records.NodeId)3 DistributedSchedulingAllocateResponse (org.apache.hadoop.yarn.server.api.protocolrecords.DistributedSchedulingAllocateResponse)3 QueueMetrics (org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics)3 ArrayList (java.util.ArrayList)2 AbstractYarnScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler)2 SchedulerApplicationAttempt (org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt)2 CapacityScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler)2