Search in sources :

Example 46 with Container

use of org.apache.hadoop.yarn.api.records.Container in project hadoop by apache.

the class TestFifoScheduler method testFifoScheduling.

@Test(timeout = 60000)
public void testFifoScheduling() throws Exception {
    Logger rootLogger = LogManager.getRootLogger();
    rootLogger.setLevel(Level.DEBUG);
    MockRM rm = new MockRM(conf);
    rm.start();
    MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB);
    MockNM nm2 = rm.registerNode("127.0.0.2:5678", 4 * GB);
    RMApp app1 = rm.submitApp(2048);
    // kick the scheduling, 2 GB given to AM1, remaining 4GB on nm1
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
    am1.registerAppAttempt();
    SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
    RMApp app2 = rm.submitApp(2048);
    // kick the scheduling, 2GB given to AM, remaining 2 GB on nm2
    nm2.nodeHeartbeat(true);
    RMAppAttempt attempt2 = app2.getCurrentAppAttempt();
    MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId());
    am2.registerAppAttempt();
    SchedulerNodeReport report_nm2 = rm.getResourceScheduler().getNodeReport(nm2.getNodeId());
    Assert.assertEquals(2 * GB, report_nm2.getUsedResource().getMemorySize());
    // add request for containers
    am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, GB, 1, 1);
    // send the request
    AllocateResponse alloc1Response = am1.schedule();
    // add request for containers
    am2.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 3 * GB, 0, 1);
    // send the request
    AllocateResponse alloc2Response = am2.schedule();
    // kick the scheduler, 1 GB and 3 GB given to AM1 and AM2, remaining 0
    nm1.nodeHeartbeat(true);
    while (alloc1Response.getAllocatedContainers().size() < 1) {
        LOG.info("Waiting for containers to be created for app 1...");
        Thread.sleep(1000);
        alloc1Response = am1.schedule();
    }
    while (alloc2Response.getAllocatedContainers().size() < 1) {
        LOG.info("Waiting for containers to be created for app 2...");
        Thread.sleep(1000);
        alloc2Response = am2.schedule();
    }
    // kick the scheduler, nothing given remaining 2 GB.
    nm2.nodeHeartbeat(true);
    List<Container> allocated1 = alloc1Response.getAllocatedContainers();
    Assert.assertEquals(1, allocated1.size());
    Assert.assertEquals(1 * GB, allocated1.get(0).getResource().getMemorySize());
    Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());
    List<Container> allocated2 = alloc2Response.getAllocatedContainers();
    Assert.assertEquals(1, allocated2.size());
    Assert.assertEquals(3 * GB, allocated2.get(0).getResource().getMemorySize());
    Assert.assertEquals(nm1.getNodeId(), allocated2.get(0).getNodeId());
    report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    report_nm2 = rm.getResourceScheduler().getNodeReport(nm2.getNodeId());
    Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
    Assert.assertEquals(2 * GB, report_nm2.getAvailableResource().getMemorySize());
    Assert.assertEquals(6 * GB, report_nm1.getUsedResource().getMemorySize());
    Assert.assertEquals(2 * GB, report_nm2.getUsedResource().getMemorySize());
    Container c1 = allocated1.get(0);
    Assert.assertEquals(GB, c1.getResource().getMemorySize());
    ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
    nm1.containerStatus(containerStatus);
    int waitCount = 0;
    while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
        LOG.info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already..");
        Thread.sleep(1000);
    }
    Assert.assertEquals(1, attempt1.getJustFinishedContainers().size());
    Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size());
    report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    Assert.assertEquals(5 * GB, report_nm1.getUsedResource().getMemorySize());
    rm.stop();
}
Also used : AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Container(org.apache.hadoop.yarn.api.records.Container) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) SchedulerNodeReport(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) Logger(org.apache.log4j.Logger) Test(org.junit.Test)

Example 47 with Container

use of org.apache.hadoop.yarn.api.records.Container in project hadoop by apache.

the class TestFifoScheduler method testBlackListNodes.

@Test(timeout = 50000)
public void testBlackListNodes() throws Exception {
    Configuration conf = new Configuration();
    conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class, ResourceScheduler.class);
    MockRM rm = new MockRM(conf);
    rm.start();
    FifoScheduler fs = (FifoScheduler) rm.getResourceScheduler();
    int rack_num_0 = 0;
    int rack_num_1 = 1;
    // Add 4 nodes in 2 racks
    // host_0_0 in rack0
    String host_0_0 = "127.0.0.1";
    RMNode n1 = MockNodes.newNodeInfo(rack_num_0, MockNodes.newResource(4 * GB), 1, host_0_0);
    fs.handle(new NodeAddedSchedulerEvent(n1));
    // host_0_1 in rack0
    String host_0_1 = "127.0.0.2";
    RMNode n2 = MockNodes.newNodeInfo(rack_num_0, MockNodes.newResource(4 * GB), 1, host_0_1);
    fs.handle(new NodeAddedSchedulerEvent(n2));
    // host_1_0 in rack1
    String host_1_0 = "127.0.0.3";
    RMNode n3 = MockNodes.newNodeInfo(rack_num_1, MockNodes.newResource(4 * GB), 1, host_1_0);
    fs.handle(new NodeAddedSchedulerEvent(n3));
    // host_1_1 in rack1
    String host_1_1 = "127.0.0.4";
    RMNode n4 = MockNodes.newNodeInfo(rack_num_1, MockNodes.newResource(4 * GB), 1, host_1_1);
    fs.handle(new NodeAddedSchedulerEvent(n4));
    // Add one application
    ApplicationId appId1 = BuilderUtils.newApplicationId(100, 1);
    ApplicationAttemptId appAttemptId1 = BuilderUtils.newApplicationAttemptId(appId1, 1);
    createMockRMApp(appAttemptId1, rm.getRMContext());
    SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId1, "queue", "user");
    fs.handle(appEvent);
    SchedulerEvent attemptEvent = new AppAttemptAddedSchedulerEvent(appAttemptId1, false);
    fs.handle(attemptEvent);
    List<ContainerId> emptyId = new ArrayList<ContainerId>();
    List<ResourceRequest> emptyAsk = new ArrayList<ResourceRequest>();
    // Allow rack-locality for rack_1, but blacklist host_1_0
    // Set up resource requests
    // Ask for a 1 GB container for app 1
    List<ResourceRequest> ask1 = new ArrayList<ResourceRequest>();
    ask1.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), "rack1", BuilderUtils.newResource(GB, 1), 1, RMNodeLabelsManager.NO_LABEL));
    ask1.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1, RMNodeLabelsManager.NO_LABEL));
    fs.allocate(appAttemptId1, ask1, emptyId, Collections.singletonList(host_1_0), null, NULL_UPDATE_REQUESTS);
    // Trigger container assignment
    fs.handle(new NodeUpdateSchedulerEvent(n3));
    // Get the allocation for the application and verify no allocation on
    // blacklist node
    Allocation allocation1 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
    Assert.assertEquals("allocation1", 0, allocation1.getContainers().size());
    // verify host_1_1 can get allocated as not in blacklist
    fs.handle(new NodeUpdateSchedulerEvent(n4));
    Allocation allocation2 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
    Assert.assertEquals("allocation2", 1, allocation2.getContainers().size());
    List<Container> containerList = allocation2.getContainers();
    for (Container container : containerList) {
        Assert.assertEquals("Container is allocated on n4", container.getNodeId(), n4.getNodeID());
    }
    // Ask for a 1 GB container again for app 1
    List<ResourceRequest> ask2 = new ArrayList<ResourceRequest>();
    // this time, rack0 is also in blacklist, so only host_1_1 is available to
    // be assigned
    ask2.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1));
    fs.allocate(appAttemptId1, ask2, emptyId, Collections.singletonList("rack0"), null, NULL_UPDATE_REQUESTS);
    // verify n1 is not qualified to be allocated
    fs.handle(new NodeUpdateSchedulerEvent(n1));
    Allocation allocation3 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
    Assert.assertEquals("allocation3", 0, allocation3.getContainers().size());
    // verify n2 is not qualified to be allocated
    fs.handle(new NodeUpdateSchedulerEvent(n2));
    Allocation allocation4 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
    Assert.assertEquals("allocation4", 0, allocation4.getContainers().size());
    // verify n3 is not qualified to be allocated
    fs.handle(new NodeUpdateSchedulerEvent(n3));
    Allocation allocation5 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
    Assert.assertEquals("allocation5", 0, allocation5.getContainers().size());
    fs.handle(new NodeUpdateSchedulerEvent(n4));
    Allocation allocation6 = fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, NULL_UPDATE_REQUESTS);
    Assert.assertEquals("allocation6", 1, allocation6.getContainers().size());
    containerList = allocation6.getContainers();
    for (Container container : containerList) {
        Assert.assertEquals("Container is allocated on n4", container.getNodeId(), n4.getNodeID());
    }
    rm.stop();
}
Also used : NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) AppAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent) ArrayList(java.util.ArrayList) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) NodeAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent) AppAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent) NodeResourceUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeResourceUpdateSchedulerEvent) SchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent) AppAttemptAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) NodeRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent) RMNode(org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode) Container(org.apache.hadoop.yarn.api.records.Container) Allocation(org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) AppAttemptAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent) UpdateNodeResourceRequest(org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Test(org.junit.Test)

Example 48 with Container

use of org.apache.hadoop.yarn.api.records.Container in project hadoop by apache.

the class TestFifoScheduler method testResourceOverCommit.

@Test(timeout = 60000)
public void testResourceOverCommit() throws Exception {
    int waitCount;
    MockRM rm = new MockRM(conf);
    rm.start();
    MockNM nm1 = rm.registerNode("127.0.0.1:1234", 4 * GB);
    RMApp app1 = rm.submitApp(2048);
    // kick the scheduling, 2 GB given to AM1, remaining 2GB on nm1
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId());
    am1.registerAppAttempt();
    SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    // check node report, 2 GB used and 2 GB available
    Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
    Assert.assertEquals(2 * GB, report_nm1.getAvailableResource().getMemorySize());
    // add request for containers
    am1.addRequests(new String[] { "127.0.0.1", "127.0.0.2" }, 2 * GB, 1, 1);
    // send the request
    AllocateResponse alloc1Response = am1.schedule();
    // kick the scheduler, 2 GB given to AM1, resource remaining 0
    nm1.nodeHeartbeat(true);
    while (alloc1Response.getAllocatedContainers().size() < 1) {
        LOG.info("Waiting for containers to be created for app 1...");
        Thread.sleep(1000);
        alloc1Response = am1.schedule();
    }
    List<Container> allocated1 = alloc1Response.getAllocatedContainers();
    Assert.assertEquals(1, allocated1.size());
    Assert.assertEquals(2 * GB, allocated1.get(0).getResource().getMemorySize());
    Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId());
    report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    // check node report, 4 GB used and 0 GB available
    Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
    Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
    // check container is assigned with 2 GB.
    Container c1 = allocated1.get(0);
    Assert.assertEquals(2 * GB, c1.getResource().getMemorySize());
    // update node resource to 2 GB, so resource is over-consumed.
    Map<NodeId, ResourceOption> nodeResourceMap = new HashMap<NodeId, ResourceOption>();
    nodeResourceMap.put(nm1.getNodeId(), ResourceOption.newInstance(Resource.newInstance(2 * GB, 1), -1));
    UpdateNodeResourceRequest request = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
    rm.getAdminService().updateNodeResource(request);
    waitCount = 0;
    while (waitCount++ != 20) {
        report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
        if (null != report_nm1 && report_nm1.getAvailableResource().getMemorySize() != 0) {
            break;
        }
        LOG.info("Waiting for RMNodeResourceUpdateEvent to be handled... Tried " + waitCount + " times already..");
        Thread.sleep(1000);
    }
    // Now, the used resource is still 4 GB, and available resource is minus
    // value.
    report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    Assert.assertEquals(4 * GB, report_nm1.getUsedResource().getMemorySize());
    Assert.assertEquals(-2 * GB, report_nm1.getAvailableResource().getMemorySize());
    // Check container can complete successfully in case of resource
    // over-commitment.
    ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
    nm1.containerStatus(containerStatus);
    waitCount = 0;
    while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
        LOG.info("Waiting for containers to be finished for app 1... Tried " + waitCount + " times already..");
        Thread.sleep(100);
    }
    Assert.assertEquals(1, attempt1.getJustFinishedContainers().size());
    Assert.assertEquals(1, am1.schedule().getCompletedContainersStatuses().size());
    report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId());
    Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize());
    // As container return 2 GB back, the available resource becomes 0 again.
    Assert.assertEquals(0 * GB, report_nm1.getAvailableResource().getMemorySize());
    rm.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) HashMap(java.util.HashMap) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) UpdateNodeResourceRequest(org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) Container(org.apache.hadoop.yarn.api.records.Container) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ResourceOption(org.apache.hadoop.yarn.api.records.ResourceOption) SchedulerNodeReport(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport) NodeId(org.apache.hadoop.yarn.api.records.NodeId) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) Test(org.junit.Test)

Example 49 with Container

use of org.apache.hadoop.yarn.api.records.Container in project hadoop by apache.

the class TestAMRestart method testAMRestartWithExistingContainers.

@Test(timeout = 30000)
public void testAMRestartWithExistingContainers() throws Exception {
    YarnConfiguration conf = new YarnConfiguration();
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
    MockRM rm1 = new MockRM(conf);
    rm1.start();
    RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false, true);
    MockNM nm1 = new MockNM("127.0.0.1:1234", 10240, rm1.getResourceTrackerService());
    nm1.registerNode();
    MockNM nm2 = new MockNM("127.0.0.1:2351", 4089, rm1.getResourceTrackerService());
    nm2.registerNode();
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    int NUM_CONTAINERS = 3;
    allocateContainers(nm1, am1, NUM_CONTAINERS);
    // launch the 2nd container, for testing running container transferred.
    nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
    ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
    rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
    // launch the 3rd container, for testing container allocated by previous
    // attempt is completed by the next new attempt/
    nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 3, ContainerState.RUNNING);
    ContainerId containerId3 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 3);
    rm1.waitForState(nm1, containerId3, RMContainerState.RUNNING);
    // 4th container still in AQUIRED state. for testing Acquired container is
    // always killed.
    ContainerId containerId4 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 4);
    rm1.waitForState(nm1, containerId4, RMContainerState.ACQUIRED);
    // 5th container is in Allocated state. for testing allocated container is
    // always killed.
    am1.allocate("127.0.0.1", 1024, 1, new ArrayList<ContainerId>());
    nm1.nodeHeartbeat(true);
    ContainerId containerId5 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 5);
    rm1.waitForState(nm1, containerId5, RMContainerState.ALLOCATED);
    // 6th container is in Reserved state.
    am1.allocate("127.0.0.1", 6000, 1, new ArrayList<ContainerId>());
    ContainerId containerId6 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 6);
    nm1.nodeHeartbeat(true);
    SchedulerApplicationAttempt schedulerAttempt = ((AbstractYarnScheduler) rm1.getResourceScheduler()).getCurrentAttemptForContainer(containerId6);
    while (schedulerAttempt.getReservedContainers().isEmpty()) {
        System.out.println("Waiting for container " + containerId6 + " to be reserved.");
        nm1.nodeHeartbeat(true);
        Thread.sleep(200);
    }
    // assert containerId6 is reserved.
    Assert.assertEquals(containerId6, schedulerAttempt.getReservedContainers().get(0).getContainerId());
    // fail the AM by sending CONTAINER_FINISHED event without registering.
    nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    // wait for some time. previous AM's running containers should still remain
    // in scheduler even though am failed
    Thread.sleep(3000);
    rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
    // acquired/allocated containers are cleaned up.
    Assert.assertNull(rm1.getResourceScheduler().getRMContainer(containerId4));
    Assert.assertNull(rm1.getResourceScheduler().getRMContainer(containerId5));
    // wait for app to start a new attempt.
    rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    // assert this is a new AM.
    ApplicationAttemptId newAttemptId = app1.getCurrentAppAttempt().getAppAttemptId();
    Assert.assertFalse(newAttemptId.equals(am1.getApplicationAttemptId()));
    // launch the new AM
    MockAM am2 = rm1.launchAM(app1, rm1, nm1);
    RegisterApplicationMasterResponse registerResponse = am2.registerAppAttempt();
    // Assert two containers are running: container2 and container3;
    Assert.assertEquals(2, registerResponse.getContainersFromPreviousAttempts().size());
    boolean containerId2Exists = false, containerId3Exists = false;
    for (Container container : registerResponse.getContainersFromPreviousAttempts()) {
        if (container.getId().equals(containerId2)) {
            containerId2Exists = true;
        }
        if (container.getId().equals(containerId3)) {
            containerId3Exists = true;
        }
    }
    Assert.assertTrue(containerId2Exists && containerId3Exists);
    rm1.waitForState(app1.getApplicationId(), RMAppState.RUNNING);
    // complete container by sending the container complete event which has earlier
    // attempt's attemptId
    nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 3, ContainerState.COMPLETE);
    // Even though the completed container containerId3 event was sent to the
    // earlier failed attempt, new RMAppAttempt can also capture this container
    // info.
    // completed containerId4 is also transferred to the new attempt.
    RMAppAttempt newAttempt = app1.getRMAppAttempt(am2.getApplicationAttemptId());
    // 4 containers finished, acquired/allocated/reserved/completed.
    waitForContainersToFinish(4, newAttempt);
    boolean container3Exists = false, container4Exists = false, container5Exists = false, container6Exists = false;
    for (ContainerStatus status : newAttempt.getJustFinishedContainers()) {
        if (status.getContainerId().equals(containerId3)) {
            // containerId3 is the container ran by previous attempt but finished by the
            // new attempt.
            container3Exists = true;
        }
        if (status.getContainerId().equals(containerId4)) {
            // containerId4 is the Acquired Container killed by the previous attempt,
            // it's now inside new attempt's finished container list.
            container4Exists = true;
        }
        if (status.getContainerId().equals(containerId5)) {
            // containerId5 is the Allocated container killed by previous failed attempt.
            container5Exists = true;
        }
        if (status.getContainerId().equals(containerId6)) {
            // containerId6 is the reserved container killed by previous failed attempt.
            container6Exists = true;
        }
    }
    Assert.assertTrue(container3Exists && container4Exists && container5Exists && container6Exists);
    // New SchedulerApplicationAttempt also has the containers info.
    rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
    // record the scheduler attempt for testing.
    SchedulerApplicationAttempt schedulerNewAttempt = ((AbstractYarnScheduler) rm1.getResourceScheduler()).getCurrentAttemptForContainer(containerId2);
    // finish this application
    MockRM.finishAMAndVerifyAppState(app1, rm1, nm1, am2);
    // the 2nd attempt released the 1st attempt's running container, when the
    // 2nd attempt finishes.
    Assert.assertFalse(schedulerNewAttempt.getLiveContainers().contains(containerId2));
    // all 4 normal containers finished.
    System.out.println("New attempt's just finished containers: " + newAttempt.getJustFinishedContainers());
    waitForContainersToFinish(5, newAttempt);
    rm1.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Container(org.apache.hadoop.yarn.api.records.Container) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) RegisterApplicationMasterResponse(org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) SchedulerApplicationAttempt(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt) Test(org.junit.Test)

Example 50 with Container

use of org.apache.hadoop.yarn.api.records.Container in project hadoop by apache.

the class TestAMRestart method testAMRestartNotLostContainerAfterAttemptFailuresValidityInterval.

// Test restarting AM launched with the KeepContainers and AM reset window.
// after AM reset window, even if AM who was the last is failed,
// all containers are launched by previous AM should be kept.
@Test(timeout = 20000)
public void testAMRestartNotLostContainerAfterAttemptFailuresValidityInterval() throws Exception {
    YarnConfiguration conf = new YarnConfiguration();
    // explicitly set max-am-retry count as 2.
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
    MockRM rm1 = new MockRM(conf);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
    nm1.registerNode();
    // set window size to 10s and enable keepContainers
    RMAppImpl app1 = (RMAppImpl) rm1.submitApp(200, 10000, true);
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    int NUM_CONTAINERS = 2;
    allocateContainers(nm1, am1, NUM_CONTAINERS);
    // launch the 2nd container, for testing running container transferred.
    nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
    ContainerId containerId2 = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
    rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
    // Fail attempt1 normally
    nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    // launch the second attempt
    rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    Assert.assertEquals(2, app1.getAppAttempts().size());
    // It will be the last attempt.
    RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
    MockAM am2 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.RUNNING);
    // wait for 10 seconds to reset AM failure count
    Thread.sleep(10 * 1000);
    // Fail attempt2 normally
    nm1.nodeHeartbeat(am2.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    // can launch the third attempt successfully
    rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    Assert.assertEquals(3, app1.getAppAttempts().size());
    MockAM am3 = rm1.launchAM(app1, rm1, nm1);
    RegisterApplicationMasterResponse registerResponse = am3.registerAppAttempt();
    // keepContainers is applied, even if attempt2 was the last attempt.
    Assert.assertEquals(1, registerResponse.getContainersFromPreviousAttempts().size());
    boolean containerId2Exists = false;
    Container container = registerResponse.getContainersFromPreviousAttempts().get(0);
    if (container.getId().equals(containerId2)) {
        containerId2Exists = true;
    }
    Assert.assertTrue(containerId2Exists);
    rm1.waitForState(app1.getApplicationId(), RMAppState.RUNNING);
    rm1.stop();
}
Also used : RMAppImpl(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl) Container(org.apache.hadoop.yarn.api.records.Container) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) RegisterApplicationMasterResponse(org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) Test(org.junit.Test)

Aggregations

Container (org.apache.hadoop.yarn.api.records.Container)336 Test (org.junit.Test)187 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)161 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)107 Resource (org.apache.hadoop.yarn.api.records.Resource)84 NodeId (org.apache.hadoop.yarn.api.records.NodeId)83 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)77 Configuration (org.apache.hadoop.conf.Configuration)73 ArrayList (java.util.ArrayList)68 Priority (org.apache.hadoop.yarn.api.records.Priority)56 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)55 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)55 TezConfiguration (org.apache.tez.dag.api.TezConfiguration)48 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)47 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)44 ContainerStatus (org.apache.hadoop.yarn.api.records.ContainerStatus)42 TezTaskAttemptID (org.apache.tez.dag.records.TezTaskAttemptID)40 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)39 TaskCommunicatorManagerInterface (org.apache.tez.dag.app.TaskCommunicatorManagerInterface)34 AMContainerMap (org.apache.tez.dag.app.rm.container.AMContainerMap)33