Search in sources :

Example 26 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class AbstractYarnScheduler method recoverContainersOnNode.

public void recoverContainersOnNode(List<NMContainerStatus> containerReports, RMNode nm) {
    try {
        writeLock.lock();
        if (!rmContext.isWorkPreservingRecoveryEnabled() || containerReports == null || (containerReports != null && containerReports.isEmpty())) {
            return;
        }
        for (NMContainerStatus container : containerReports) {
            ApplicationId appId = container.getContainerId().getApplicationAttemptId().getApplicationId();
            RMApp rmApp = rmContext.getRMApps().get(appId);
            if (rmApp == null) {
                LOG.error("Skip recovering container " + container + " for unknown application.");
                killOrphanContainerOnNode(nm, container);
                continue;
            }
            SchedulerApplication<T> schedulerApp = applications.get(appId);
            if (schedulerApp == null) {
                LOG.info("Skip recovering container  " + container + " for unknown SchedulerApplication. " + "Application current state is " + rmApp.getState());
                killOrphanContainerOnNode(nm, container);
                continue;
            }
            LOG.info("Recovering container " + container);
            SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
            if (!rmApp.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()) {
                // Do not recover containers for stopped attempt or previous attempt.
                if (schedulerAttempt.isStopped() || !schedulerAttempt.getApplicationAttemptId().equals(container.getContainerId().getApplicationAttemptId())) {
                    LOG.info("Skip recovering container " + container + " for already stopped attempt.");
                    killOrphanContainerOnNode(nm, container);
                    continue;
                }
            }
            // create container
            RMContainer rmContainer = recoverAndCreateContainer(container, nm);
            // recover RMContainer
            rmContainer.handle(new RMContainerRecoverEvent(container.getContainerId(), container));
            // recover scheduler node
            SchedulerNode schedulerNode = nodeTracker.getNode(nm.getNodeID());
            schedulerNode.recoverContainer(rmContainer);
            // recover queue: update headroom etc.
            Queue queue = schedulerAttempt.getQueue();
            queue.recoverContainer(getClusterResource(), schedulerAttempt, rmContainer);
            // recover scheduler attempt
            schedulerAttempt.recoverContainer(schedulerNode, rmContainer);
            // set master container for the current running AMContainer for this
            // attempt.
            RMAppAttempt appAttempt = rmApp.getCurrentAppAttempt();
            if (appAttempt != null) {
                Container masterContainer = appAttempt.getMasterContainer();
                // container ID stored in AppAttempt.
                if (masterContainer != null && masterContainer.getId().equals(rmContainer.getContainerId())) {
                    ((RMContainerImpl) rmContainer).setAMContainer(true);
                }
            }
            if (schedulerAttempt.getPendingRelease().remove(container.getContainerId())) {
                // release the container
                rmContainer.handle(new RMContainerFinishedEvent(container.getContainerId(), SchedulerUtils.createAbnormalContainerStatus(container.getContainerId(), SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED));
                LOG.info(container.getContainerId() + " is released by application.");
            }
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) RMContainerRecoverEvent(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerRecoverEvent) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) RMContainerImpl(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) RMContainerFinishedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Example 27 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class RMStateStore method removeApplication.

/**
   * Non-blocking API
   * ResourceManager services call this to remove an application from the state
   * store
   * This does not block the dispatcher threads
   * There is no notification of completion for this operation.
   */
@SuppressWarnings("unchecked")
public void removeApplication(RMApp app) {
    ApplicationStateData appState = ApplicationStateData.newInstance(app.getSubmitTime(), app.getStartTime(), app.getApplicationSubmissionContext(), app.getUser(), app.getCallerContext());
    for (RMAppAttempt appAttempt : app.getAppAttempts().values()) {
        appState.attempts.put(appAttempt.getAppAttemptId(), null);
    }
    getRMStateStoreEventHandler().handle(new RMStateStoreRemoveAppEvent(appState));
}
Also used : RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)

Example 28 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class RMAppImpl method getRMAppMetrics.

@Override
public RMAppMetrics getRMAppMetrics() {
    Resource resourcePreempted = Resource.newInstance(0, 0);
    int numAMContainerPreempted = 0;
    int numNonAMContainerPreempted = 0;
    long memorySeconds = 0;
    long vcoreSeconds = 0;
    long preemptedMemorySeconds = 0;
    long preemptedVcoreSeconds = 0;
    for (RMAppAttempt attempt : attempts.values()) {
        if (null != attempt) {
            RMAppAttemptMetrics attemptMetrics = attempt.getRMAppAttemptMetrics();
            Resources.addTo(resourcePreempted, attemptMetrics.getResourcePreempted());
            numAMContainerPreempted += attemptMetrics.getIsPreempted() ? 1 : 0;
            numNonAMContainerPreempted += attemptMetrics.getNumNonAMContainersPreempted();
            // getAggregateAppResourceUsage() will calculate resource usage stats
            // for both running and finished containers.
            AggregateAppResourceUsage resUsage = attempt.getRMAppAttemptMetrics().getAggregateAppResourceUsage();
            memorySeconds += resUsage.getMemorySeconds();
            vcoreSeconds += resUsage.getVcoreSeconds();
            preemptedMemorySeconds += attemptMetrics.getPreemptedMemory();
            preemptedVcoreSeconds += attemptMetrics.getPreemptedVcore();
        }
    }
    return new RMAppMetrics(resourcePreempted, numNonAMContainerPreempted, numAMContainerPreempted, memorySeconds, vcoreSeconds, preemptedMemorySeconds, preemptedVcoreSeconds);
}
Also used : RMAppAttemptMetrics(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) Resource(org.apache.hadoop.yarn.api.records.Resource) AggregateAppResourceUsage(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage)

Example 29 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestRMRestart method testQueueMetricsOnRMRestart.

@SuppressWarnings("resource")
@Test(timeout = 60000)
public void testQueueMetricsOnRMRestart() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // PHASE 1: create state in an RM
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    QueueMetrics qm1 = rm1.getResourceScheduler().getRootQueueMetrics();
    resetQueueMetrics(qm1);
    assertQueueMetrics(qm1, 0, 0, 0, 0);
    // create app that gets launched and does allocate before RM restart
    RMApp app1 = rm1.submitApp(200);
    // Need to wait first for AppAttempt to be started (RMAppState.ACCEPTED)
    // and then for it to reach RMAppAttemptState.SCHEDULED
    // inorder to ensure appsPending metric is incremented
    rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
    rm1.waitForState(attemptId1, RMAppAttemptState.SCHEDULED);
    assertQueueMetrics(qm1, 1, 1, 0, 0);
    nm1.nodeHeartbeat(true);
    rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    MockAM am1 = rm1.sendAMLaunched(attempt1.getAppAttemptId());
    am1.registerAppAttempt();
    am1.allocate("127.0.0.1", 1000, 1, new ArrayList<ContainerId>());
    nm1.nodeHeartbeat(true);
    List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
    while (conts.size() == 0) {
        nm1.nodeHeartbeat(true);
        conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
        Thread.sleep(500);
    }
    assertQueueMetrics(qm1, 1, 0, 1, 0);
    // PHASE 2: create new RM and start from old state
    // create new RM to represent restart and recover state
    MockRM rm2 = createMockRM(conf, memStore);
    QueueMetrics qm2 = rm2.getResourceScheduler().getRootQueueMetrics();
    resetQueueMetrics(qm2);
    assertQueueMetrics(qm2, 0, 0, 0, 0);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    // recover app
    RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
    nm1.nodeHeartbeat(true);
    nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());
    NMContainerStatus status = TestRMRestart.createNMContainerStatus(loadedApp1.getCurrentAppAttempt().getAppAttemptId(), 1, ContainerState.COMPLETE);
    nm1.registerNode(Arrays.asList(status), null);
    while (loadedApp1.getAppAttempts().size() != 2) {
        Thread.sleep(200);
    }
    attempt1 = loadedApp1.getCurrentAppAttempt();
    attemptId1 = attempt1.getAppAttemptId();
    rm2.waitForState(attemptId1, RMAppAttemptState.SCHEDULED);
    assertQueueMetrics(qm2, 1, 1, 0, 0);
    nm1.nodeHeartbeat(true);
    rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    assertQueueMetrics(qm2, 1, 0, 1, 0);
    am1 = rm2.sendAMLaunched(attempt1.getAppAttemptId());
    am1.registerAppAttempt();
    am1.allocate("127.0.0.1", 1000, 3, new ArrayList<ContainerId>());
    nm1.nodeHeartbeat(true);
    conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
    while (conts.size() == 0) {
        nm1.nodeHeartbeat(true);
        conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
        Thread.sleep(500);
    }
    // finish the AMs
    finishApplicationMaster(loadedApp1, rm2, nm1, am1);
    // now AppAttempt and App becomes FINISHED,
    // we should also grant APP_ATTEMPT_REMOVE/APP_REMOVE event
    // had processed by scheduler
    rm2.waitForAppRemovedFromScheduler(loadedApp1.getApplicationId());
    assertQueueMetrics(qm2, 1, 0, 0, 1);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) QueueMetrics(org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics) Container(org.apache.hadoop.yarn.api.records.Container) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) Test(org.junit.Test)

Example 30 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestRMRestart method testRMRestart.

@SuppressWarnings("rawtypes")
@Test(timeout = 180000)
public void testRMRestart() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    // PHASE 1: create state in an RM
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    // start like normal because state is empty
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    MockNM nm2 = new MockNM("127.0.0.2:5678", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // nm2 will not heartbeat with RM1
    nm2.registerNode();
    // create app that will finish and the final state should be saved.
    RMApp app0 = rm1.submitApp(200);
    RMAppAttempt attempt0 = app0.getCurrentAppAttempt();
    // spot check that app is saved
    Assert.assertEquals(1, rmAppState.size());
    nm1.nodeHeartbeat(true);
    MockAM am0 = rm1.sendAMLaunched(attempt0.getAppAttemptId());
    am0.registerAppAttempt();
    finishApplicationMaster(app0, rm1, nm1, am0);
    // create app that gets launched and does allocate before RM restart
    RMApp app1 = rm1.submitApp(200);
    // assert app1 info is saved
    ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
    Assert.assertNotNull(appState);
    Assert.assertEquals(0, appState.getAttemptCount());
    Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app1.getApplicationSubmissionContext().getApplicationId());
    //kick the scheduling to allocate AM container
    nm1.nodeHeartbeat(true);
    // assert app1 attempt is saved
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
    rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    Assert.assertEquals(1, appState.getAttemptCount());
    ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
    // launch the AM
    MockAM am1 = rm1.sendAMLaunched(attempt1.getAppAttemptId());
    am1.registerAppAttempt();
    // AM request for containers
    am1.allocate("127.0.0.1", 1000, 1, new ArrayList<ContainerId>());
    // kick the scheduler
    nm1.nodeHeartbeat(true);
    List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
    while (conts.size() == 0) {
        nm1.nodeHeartbeat(true);
        conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
        Thread.sleep(500);
    }
    // create app that does not get launched by RM before RM restart
    RMApp app2 = rm1.submitApp(200);
    // assert app2 info is saved
    appState = rmAppState.get(app2.getApplicationId());
    Assert.assertNotNull(appState);
    Assert.assertEquals(0, appState.getAttemptCount());
    Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app2.getApplicationSubmissionContext().getApplicationId());
    // create unmanaged app
    RMApp appUnmanaged = rm1.submitApp(200, "someApp", "someUser", null, true, null, conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS), null);
    ApplicationAttemptId unmanagedAttemptId = appUnmanaged.getCurrentAppAttempt().getAppAttemptId();
    // assert appUnmanaged info is saved
    ApplicationId unmanagedAppId = appUnmanaged.getApplicationId();
    appState = rmAppState.get(unmanagedAppId);
    Assert.assertNotNull(appState);
    // wait for attempt to reach LAUNCHED state 
    rm1.waitForState(unmanagedAttemptId, RMAppAttemptState.LAUNCHED);
    rm1.waitForState(unmanagedAppId, RMAppState.ACCEPTED);
    // assert unmanaged attempt info is saved
    Assert.assertEquals(1, appState.getAttemptCount());
    Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), appUnmanaged.getApplicationSubmissionContext().getApplicationId());
    // PHASE 2: create new RM and start from old state
    // create new RM to represent restart and recover state
    MockRM rm2 = createMockRM(conf, memStore);
    // start new RM
    rm2.start();
    // change NM to point to new RM
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    nm2.setResourceTrackerService(rm2.getResourceTrackerService());
    // verify load of old state
    // 4 apps are loaded.
    // FINISHED app and attempt is also loaded back.
    // Unmanaged app state is still loaded back but it cannot be restarted by
    // the RM. this will change with work preserving RM restart in which AMs/NMs
    // are not rebooted.
    Assert.assertEquals(4, rm2.getRMContext().getRMApps().size());
    // check that earlier finished app and attempt is also loaded back and move
    // to finished state.
    rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);
    rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
    // verify correct number of attempts and other data
    RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
    Assert.assertNotNull(loadedApp1);
    Assert.assertEquals(1, loadedApp1.getAppAttempts().size());
    Assert.assertEquals(app1.getApplicationSubmissionContext().getApplicationId(), loadedApp1.getApplicationSubmissionContext().getApplicationId());
    RMApp loadedApp2 = rm2.getRMContext().getRMApps().get(app2.getApplicationId());
    Assert.assertNotNull(loadedApp2);
    //Assert.assertEquals(0, loadedApp2.getAppAttempts().size());
    Assert.assertEquals(app2.getApplicationSubmissionContext().getApplicationId(), loadedApp2.getApplicationSubmissionContext().getApplicationId());
    // verify state machine kicked into expected states
    rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED);
    rm2.waitForState(loadedApp2.getApplicationId(), RMAppState.ACCEPTED);
    // verify attempts for apps
    // The app for which AM was started will wait for previous am
    // container finish event to arrive. However for an application for which
    // no am container was running will start new application attempt.
    Assert.assertEquals(1, loadedApp1.getAppAttempts().size());
    Assert.assertEquals(1, loadedApp2.getAppAttempts().size());
    // verify old AM is not accepted
    // change running AM to talk to new RM
    am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext());
    try {
        am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
        Assert.fail();
    } catch (ApplicationAttemptNotFoundException e) {
        Assert.assertTrue(e instanceof ApplicationAttemptNotFoundException);
    }
    // NM should be rebooted on heartbeat, even first heartbeat for nm2
    NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
    hbResponse = nm2.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
    // new NM to represent NM re-register
    nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());
    nm2 = new MockNM("127.0.0.2:5678", 15120, rm2.getResourceTrackerService());
    NMContainerStatus status = TestRMRestart.createNMContainerStatus(loadedApp1.getCurrentAppAttempt().getAppAttemptId(), 1, ContainerState.COMPLETE);
    nm1.registerNode(Arrays.asList(status), null);
    nm2.registerNode();
    rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED);
    // wait for the 2nd attempt to be started.
    int timeoutSecs = 0;
    while (loadedApp1.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {
        ;
        Thread.sleep(200);
    }
    // verify no more reboot response sent
    hbResponse = nm1.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.RESYNC != hbResponse.getNodeAction());
    hbResponse = nm2.nodeHeartbeat(true);
    Assert.assertTrue(NodeAction.RESYNC != hbResponse.getNodeAction());
    // assert app1 attempt is saved
    attempt1 = loadedApp1.getCurrentAppAttempt();
    attemptId1 = attempt1.getAppAttemptId();
    rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    appState = rmAppState.get(loadedApp1.getApplicationId());
    attemptState = appState.getAttempt(attemptId1);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
    // Nodes on which the AM's run 
    MockNM am1Node = nm1;
    if (attemptState.getMasterContainer().getNodeId().toString().contains("127.0.0.2")) {
        am1Node = nm2;
    }
    // assert app2 attempt is saved
    RMAppAttempt attempt2 = loadedApp2.getCurrentAppAttempt();
    ApplicationAttemptId attemptId2 = attempt2.getAppAttemptId();
    rm2.waitForState(attemptId2, RMAppAttemptState.ALLOCATED);
    appState = rmAppState.get(loadedApp2.getApplicationId());
    attemptState = appState.getAttempt(attemptId2);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId2, 1), attemptState.getMasterContainer().getId());
    MockNM am2Node = nm1;
    if (attemptState.getMasterContainer().getNodeId().toString().contains("127.0.0.2")) {
        am2Node = nm2;
    }
    // start the AM's
    am1 = rm2.sendAMLaunched(attempt1.getAppAttemptId());
    am1.registerAppAttempt();
    MockAM am2 = rm2.sendAMLaunched(attempt2.getAppAttemptId());
    am2.registerAppAttempt();
    //request for containers
    am1.allocate("127.0.0.1", 1000, 3, new ArrayList<ContainerId>());
    am2.allocate("127.0.0.2", 1000, 1, new ArrayList<ContainerId>());
    // verify container allocate continues to work
    nm1.nodeHeartbeat(true);
    nm2.nodeHeartbeat(true);
    conts = am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
    while (conts.size() == 0) {
        nm1.nodeHeartbeat(true);
        nm2.nodeHeartbeat(true);
        conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
        Thread.sleep(500);
    }
    // finish the AMs
    finishApplicationMaster(loadedApp1, rm2, am1Node, am1);
    finishApplicationMaster(loadedApp2, rm2, am2Node, am2);
    // stop RM's
    rm2.stop();
    rm1.stop();
    // completed apps are not removed immediately after app finish
    // And finished app is also loaded back.
    Assert.assertEquals(4, rmAppState.size());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) ArrayList(java.util.ArrayList) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) Container(org.apache.hadoop.yarn.api.records.Container) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) ApplicationAttemptStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData) Test(org.junit.Test)

Aggregations

RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)123 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)91 Test (org.junit.Test)71 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)40 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)35 Container (org.apache.hadoop.yarn.api.records.Container)31 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)30 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)28 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)28 ArrayList (java.util.ArrayList)26 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)22 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)22 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)21 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)19 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)18 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)16 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)14 HashMap (java.util.HashMap)13 ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)13 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)12