Search in sources :

Example 96 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class RMStateStoreTestBase method testRemoveAttempt.

public void testRemoveAttempt(RMStateStoreHelper stateStoreHelper) throws Exception {
    RMStateStore store = stateStoreHelper.getRMStateStore();
    TestDispatcher dispatcher = new TestDispatcher();
    store.setRMDispatcher(dispatcher);
    ApplicationId appId = ApplicationId.newInstance(1383183339, 6);
    storeApp(store, appId, 123456, 564321);
    ApplicationAttemptId attemptId1 = ApplicationAttemptId.newInstance(appId, 1);
    RMAppAttempt attempt1 = storeAttempt(store, attemptId1, ContainerId.newContainerId(attemptId1, 1).toString(), null, null, dispatcher);
    ApplicationAttemptId attemptId2 = ApplicationAttemptId.newInstance(appId, 2);
    RMAppAttempt attempt2 = storeAttempt(store, attemptId2, ContainerId.newContainerId(attemptId2, 1).toString(), null, null, dispatcher);
    store.removeApplicationAttemptInternal(attemptId1);
    Assert.assertFalse(stateStoreHelper.attemptExists(attempt1));
    Assert.assertTrue(stateStoreHelper.attemptExists(attempt2));
    // let things settle down
    Thread.sleep(1000);
    store.close();
    // load state
    store = stateStoreHelper.getRMStateStore();
    RMState state = store.loadState();
    Map<ApplicationId, ApplicationStateData> rmAppState = state.getApplicationState();
    ApplicationStateData appState = rmAppState.get(appId);
    // app is loaded
    assertNotNull(appState);
    assertEquals(2, appState.getFirstAttemptId());
    assertNull(appState.getAttempt(attemptId1));
    assertNotNull(appState.getAttempt(attemptId2));
}
Also used : RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState)

Example 97 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestAMRestart method testRMRestartOrFailoverNotCountedForAMFailures.

// Test regular RM restart/failover, new RM should not count
// AM failure towards the max-retry-account and should be able to
// re-launch the AM.
@Test(timeout = 50000)
public void testRMRestartOrFailoverNotCountedForAMFailures() throws Exception {
    YarnConfiguration conf = new YarnConfiguration();
    conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, ResourceScheduler.class);
    conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
    conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, false);
    conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
    // explicitly set max-am-retry count as 1.
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    MockRM rm1 = new MockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8000, rm1.getResourceTrackerService());
    nm1.registerNode();
    RMApp app1 = rm1.submitApp(200);
    // AM should be restarted even though max-am-attempt is 1.
    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    // Restart rm.
    MockRM rm2 = new MockRM(conf, memStore);
    rm2.start();
    ApplicationStateData appState = memStore.getState().getApplicationState().get(app1.getApplicationId());
    // re-register the NM
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    NMContainerStatus status = Records.newRecord(NMContainerStatus.class);
    status.setContainerExitStatus(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER);
    status.setContainerId(attempt1.getMasterContainer().getId());
    status.setContainerState(ContainerState.COMPLETE);
    status.setDiagnostics("");
    nm1.registerNode(Collections.singletonList(status), null);
    rm2.waitForState(attempt1.getAppAttemptId(), RMAppAttemptState.FAILED);
    Assert.assertEquals(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER, appState.getAttempt(am1.getApplicationAttemptId()).getAMContainerExitStatus());
    // Will automatically start a new AppAttempt in rm2
    rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    MockAM am2 = rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
    MockRM.finishAMAndVerifyAppState(app1, rm2, nm1, am2);
    RMAppAttempt attempt3 = rm2.getRMContext().getRMApps().get(app1.getApplicationId()).getCurrentAppAttempt();
    Assert.assertTrue(attempt3.shouldCountTowardsMaxAttemptRetry());
    Assert.assertEquals(ContainerExitStatus.INVALID, appState.getAttempt(am2.getApplicationAttemptId()).getAMContainerExitStatus());
    rm1.stop();
    rm2.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) Test(org.junit.Test)

Example 98 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestWorkPreservingRMRestart method testContainerCompleteMsgNotLostAfterAMFailedAndRMRestart.

@Test(timeout = 20000)
public void testContainerCompleteMsgNotLostAfterAMFailedAndRMRestart() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    rm1 = new MockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
    nm1.registerNode();
    // submit app with keepContainersAcrossApplicationAttempts true
    Resource resource = Records.newRecord(Resource.class);
    resource.setMemorySize(200);
    RMApp app0 = rm1.submitApp(resource, "", UserGroupInformation.getCurrentUser().getShortUserName(), null, false, null, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS, null, null, true, true, false, null, 0, null, true, null);
    MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1);
    am0.allocate("127.0.0.1", 1000, 2, new ArrayList<ContainerId>());
    nm1.nodeHeartbeat(true);
    List<Container> conts = am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
    while (conts.size() == 0) {
        nm1.nodeHeartbeat(true);
        conts.addAll(am0.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers());
        Thread.sleep(500);
    }
    // am failed,and relaunch it
    nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    rm1.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED);
    MockAM am1 = MockRM.launchAndRegisterAM(app0, rm1, nm1);
    // rm failover
    rm2 = new MockRM(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    // container launched by first am completed
    NMContainerStatus amContainer = TestRMRestart.createNMContainerStatus(am0.getApplicationAttemptId(), 1, ContainerState.RUNNING);
    NMContainerStatus completedContainer = TestRMRestart.createNMContainerStatus(am0.getApplicationAttemptId(), 2, ContainerState.COMPLETE);
    NMContainerStatus runningContainer = TestRMRestart.createNMContainerStatus(am0.getApplicationAttemptId(), 3, ContainerState.RUNNING);
    nm1.registerNode(Arrays.asList(amContainer, runningContainer, completedContainer), null);
    Thread.sleep(200);
    // check whether current am could get containerCompleteMsg
    RMApp recoveredApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
    RMAppAttempt loadedAttempt1 = recoveredApp0.getCurrentAppAttempt();
    assertEquals(1, loadedAttempt1.getJustFinishedContainers().size());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) Resource(org.apache.hadoop.yarn.api.records.Resource) ArrayList(java.util.ArrayList) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) Container(org.apache.hadoop.yarn.api.records.Container) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) Test(org.junit.Test)

Example 99 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestRMApplicationHistoryWriter method testRMWritingMassiveHistory.

private void testRMWritingMassiveHistory(MockRM rm) throws Exception {
    rm.start();
    MockNM nm = rm.registerNode("127.0.0.1:1234", 1024 * 10100);
    RMApp app = rm.submitApp(1024);
    //Wait to make sure the attempt has the right state
    //TODO explore a better way than sleeping for a while (YARN-4929)
    Thread.sleep(1000);
    nm.nodeHeartbeat(true);
    RMAppAttempt attempt = app.getCurrentAppAttempt();
    MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
    am.registerAppAttempt();
    int request = 10000;
    am.allocate("127.0.0.1", 1024, request, new ArrayList<ContainerId>());
    nm.nodeHeartbeat(true);
    List<Container> allocated = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
    int waitCount = 0;
    int allocatedSize = allocated.size();
    while (allocatedSize < request && waitCount++ < 200) {
        Thread.sleep(300);
        allocated = am.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()).getAllocatedContainers();
        allocatedSize += allocated.size();
        nm.nodeHeartbeat(true);
    }
    Assert.assertEquals(request, allocatedSize);
    am.unregisterAppAttempt();
    rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.FINISHING);
    nm.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
    NodeHeartbeatResponse resp = nm.nodeHeartbeat(true);
    List<ContainerId> cleaned = resp.getContainersToCleanup();
    int cleanedSize = cleaned.size();
    waitCount = 0;
    while (cleanedSize < allocatedSize && waitCount++ < 200) {
        Thread.sleep(300);
        resp = nm.nodeHeartbeat(true);
        cleaned = resp.getContainersToCleanup();
        cleanedSize += cleaned.size();
    }
    Assert.assertEquals(allocatedSize, cleanedSize);
    rm.waitForState(app.getApplicationId(), RMAppState.FINISHED);
    rm.stop();
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ArrayList(java.util.ArrayList) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest)

Example 100 with RMAppAttempt

use of org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt in project hadoop by apache.

the class TestRMApplicationHistoryWriter method testWriteApplicationAttempt.

@Test
public void testWriteApplicationAttempt() throws Exception {
    RMAppAttempt appAttempt = createRMAppAttempt(ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 1), 1));
    writer.applicationAttemptStarted(appAttempt);
    ApplicationAttemptHistoryData appAttemptHD = null;
    for (int i = 0; i < MAX_RETRIES; ++i) {
        appAttemptHD = store.getApplicationAttempt(ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 1), 1));
        if (appAttemptHD != null) {
            break;
        } else {
            Thread.sleep(100);
        }
    }
    Assert.assertNotNull(appAttemptHD);
    Assert.assertEquals("test host", appAttemptHD.getHost());
    Assert.assertEquals(-100, appAttemptHD.getRPCPort());
    Assert.assertEquals(ContainerId.newContainerId(ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 1), 1), 1), appAttemptHD.getMasterContainerId());
    writer.applicationAttemptFinished(appAttempt, RMAppAttemptState.FINISHED);
    for (int i = 0; i < MAX_RETRIES; ++i) {
        appAttemptHD = store.getApplicationAttempt(ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 1), 1));
        if (appAttemptHD.getYarnApplicationAttemptState() != null) {
            break;
        } else {
            Thread.sleep(100);
        }
    }
    Assert.assertEquals("test diagnostics info", appAttemptHD.getDiagnosticsInfo());
    Assert.assertEquals("test url", appAttemptHD.getTrackingURL());
    Assert.assertEquals(FinalApplicationStatus.UNDEFINED, appAttemptHD.getFinalApplicationStatus());
    Assert.assertEquals(YarnApplicationAttemptState.FINISHED, appAttemptHD.getYarnApplicationAttemptState());
}
Also used : RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationAttemptHistoryData(org.apache.hadoop.yarn.server.applicationhistoryservice.records.ApplicationAttemptHistoryData) Test(org.junit.Test)

Aggregations

RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)123 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)91 Test (org.junit.Test)71 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)40 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)35 Container (org.apache.hadoop.yarn.api.records.Container)31 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)30 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)28 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)28 ArrayList (java.util.ArrayList)26 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)22 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)22 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)21 AllocateResponse (org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse)19 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)18 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)16 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)14 HashMap (java.util.HashMap)13 ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)13 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)12