Search in sources :

Example 16 with ApplicationStateData

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData in project hadoop by apache.

the class TestRMRestart method testFinishedAppRemovalAfterRMRestart.

@Test(timeout = 60000)
public void testFinishedAppRemovalAfterRMRestart() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 1);
    memStore.init(conf);
    RMState rmState = memStore.getState();
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create an app and finish the app.
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    finishApplicationMaster(app0, rm1, nm1, am0);
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    nm1 = rm2.registerNode("127.0.0.1:1234", 15120);
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    // app0 exits in both state store and rmContext
    Assert.assertEquals(RMAppState.FINISHED, rmAppState.get(app0.getApplicationId()).getState());
    rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);
    // create one more app and finish the app.
    RMApp app1 = rm2.submitApp(200);
    MockAM am1 = launchAM(app1, rm2, nm1);
    finishApplicationMaster(app1, rm2, nm1, am1);
    rm2.drainEvents();
    // the first app0 get kicked out from both rmContext and state store
    Assert.assertNull(rm2.getRMContext().getRMApps().get(app0.getApplicationId()));
    Assert.assertNull(rmAppState.get(app0.getApplicationId()));
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 17 with ApplicationStateData

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData in project hadoop by apache.

the class TestRMRestart method testRMRestartOnMaxAppAttempts.

@Test(timeout = 60000)
public void testRMRestartOnMaxAppAttempts() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // submit an app with maxAppAttempts equals to 1
    RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", 1, null);
    // submit an app with maxAppAttempts equals to -1
    RMApp app2 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null);
    // assert app1 info is saved
    ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
    Assert.assertNotNull(appState);
    Assert.assertEquals(0, appState.getAttemptCount());
    Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app1.getApplicationSubmissionContext().getApplicationId());
    // Allocate the AM
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt = app1.getCurrentAppAttempt();
    ApplicationAttemptId attemptId1 = attempt.getAppAttemptId();
    rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    Assert.assertEquals(1, appState.getAttemptCount());
    ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
    // Setting AMLivelinessMonitor interval to be 3 Secs.
    conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 3000);
    // start new RM   
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    // verify that maxAppAttempts is set to global value
    Assert.assertEquals(2, rm2.getRMContext().getRMApps().get(app2.getApplicationId()).getMaxAppAttempts());
    // app1 and app2 are loaded back, but app1 failed because it's
    // hitting max-retry.
    Assert.assertEquals(2, rm2.getRMContext().getRMApps().size());
    rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
    rm2.waitForState(app2.getApplicationId(), RMAppState.ACCEPTED);
    // app1 failed state is saved in state store. app2 final saved state is not
    // determined yet.
    Assert.assertEquals(RMAppState.FAILED, rmAppState.get(app1.getApplicationId()).getState());
    Assert.assertNull(rmAppState.get(app2.getApplicationId()).getState());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) ApplicationAttemptStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData) Test(org.junit.Test)

Example 18 with ApplicationStateData

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData in project hadoop by apache.

the class TestRMRestart method testRMRestartFailAppAttempt.

@Test(timeout = 60000)
public void testRMRestartFailAppAttempt() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    int maxAttempt = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create app and launch the AM
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    ApplicationId applicationId = app0.getApplicationId();
    ApplicationAttemptId appAttemptId1 = app0.getCurrentAppAttempt().getAppAttemptId();
    Assert.assertEquals(1, appAttemptId1.getAttemptId());
    // fail the 1st app attempt.
    rm1.failApplicationAttempt(appAttemptId1);
    rm1.waitForState(appAttemptId1, RMAppAttemptState.FAILED);
    rm1.waitForState(applicationId, RMAppState.ACCEPTED);
    ApplicationAttemptId appAttemptId2 = app0.getCurrentAppAttempt().getAppAttemptId();
    Assert.assertEquals(2, appAttemptId2.getAttemptId());
    rm1.waitForState(appAttemptId2, RMAppAttemptState.SCHEDULED);
    // restart rm
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(applicationId);
    rm2.waitForState(applicationId, RMAppState.ACCEPTED);
    rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    //Wait to make sure the loadedApp0 has the right number of attempts
    //TODO explore a better way than sleeping for a while (YARN-4929)
    Thread.sleep(1000);
    Assert.assertEquals(2, loadedApp0.getAppAttempts().size());
    rm2.waitForState(appAttemptId2, RMAppAttemptState.SCHEDULED);
    appAttemptId2 = loadedApp0.getCurrentAppAttempt().getAppAttemptId();
    Assert.assertEquals(2, appAttemptId2.getAttemptId());
    // fail 2nd attempt
    rm2.failApplicationAttempt(appAttemptId2);
    rm2.waitForState(appAttemptId2, RMAppAttemptState.FAILED);
    rm2.waitForState(applicationId, RMAppState.FAILED);
    Assert.assertEquals(maxAttempt, loadedApp0.getAppAttempts().size());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 19 with ApplicationStateData

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData in project hadoop by apache.

the class TestRMRestart method testRMStateStoreDispatcherDrainedOnRMStop.

@Test(timeout = 60000)
public void testRMStateStoreDispatcherDrainedOnRMStop() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore() {

        volatile boolean wait = true;

        @Override
        public void serviceStop() throws Exception {
            // Unblock app saving request.
            wait = false;
            super.serviceStop();
        }

        @Override
        protected void handleStoreEvent(RMStateStoreEvent event) {
            // Skip if synchronous updation of DTToken
            if (!(event instanceof RMStateStoreAMRMTokenEvent) && !(event instanceof RMStateStoreRMDTEvent) && !(event instanceof RMStateStoreRMDTMasterKeyEvent)) {
                while (wait) ;
            }
            super.handleStoreEvent(event);
        }
    };
    memStore.init(conf);
    // start RM
    final MockRM rm1 = createMockRM(conf, memStore);
    rm1.disableDrainEventsImplicitly();
    rm1.start();
    // create apps.
    final ArrayList<RMApp> appList = new ArrayList<RMApp>();
    final int NUM_APPS = 5;
    for (int i = 0; i < NUM_APPS; i++) {
        RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false);
        appList.add(app);
        rm1.waitForState(app.getApplicationId(), RMAppState.NEW_SAVING);
    }
    // all apps's saving request are now enqueued to RMStateStore's dispatcher
    // queue, and will be processed once rm.stop() is called.
    // Nothing exist in state store before stop is called.
    Map<ApplicationId, ApplicationStateData> rmAppState = memStore.getState().getApplicationState();
    Assert.assertTrue(rmAppState.size() == 0);
    // stop rm
    rm1.stop();
    // request on dispatcher.
    for (RMApp app : appList) {
        ApplicationStateData appState = rmAppState.get(app.getApplicationId());
        Assert.assertNotNull(appState);
        Assert.assertEquals(0, appState.getAttemptCount());
        Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app.getApplicationSubmissionContext().getApplicationId());
    }
    Assert.assertTrue(rmAppState.size() == NUM_APPS);
}
Also used : RMStateStoreAMRMTokenEvent(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreAMRMTokenEvent) RMStateStoreRMDTMasterKeyEvent(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreRMDTMasterKeyEvent) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) ArrayList(java.util.ArrayList) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) RMStateStoreEvent(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreEvent) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) RMStateStoreRMDTEvent(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreRMDTEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Test(org.junit.Test)

Example 20 with ApplicationStateData

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData in project hadoop by apache.

the class TestRMRestart method testRMRestartKilledApp.

@Test(timeout = 60000)
public void testRMRestartKilledApp() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create app and launch the AM
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    // kill the app.
    rm1.killApp(app0.getApplicationId());
    rm1.waitForState(app0.getApplicationId(), RMAppState.KILLED);
    rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.KILLED);
    // killed state is saved.
    ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
    Assert.assertEquals(RMAppState.KILLED, appState.getState());
    Assert.assertEquals(RMAppAttemptState.KILLED, appState.getAttempt(am0.getApplicationAttemptId()).getState());
    String trackingUrl = app0.getCurrentAppAttempt().getOriginalTrackingUrl();
    Assert.assertNotNull(trackingUrl);
    // restart rm
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
    rm2.waitForState(app0.getApplicationId(), RMAppState.KILLED);
    rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.KILLED);
    // no new attempt is created.
    Assert.assertEquals(1, loadedApp0.getAppAttempts().size());
    ApplicationReport appReport = verifyAppReportAfterRMRestart(app0, rm2);
    Assert.assertEquals(app0.getDiagnostics().toString(), appReport.getDiagnostics());
    Assert.assertEquals(trackingUrl, loadedApp0.getCurrentAppAttempt().getOriginalTrackingUrl());
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Aggregations

ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)51 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)32 Test (org.junit.Test)29 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)27 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)26 RMState (org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState)21 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)14 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)13 ApplicationAttemptStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData)12 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)8 ApplicationAccessType (org.apache.hadoop.yarn.api.records.ApplicationAccessType)7 IOException (java.io.IOException)6 ApplicationSubmissionContext (org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext)6 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)6 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)6 ArrayList (java.util.ArrayList)5 YarnRuntimeException (org.apache.hadoop.yarn.exceptions.YarnRuntimeException)5 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)5 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)5 HashMap (java.util.HashMap)4