Search in sources :

Example 46 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestRMRestart method testRMRestartFailAppAttempt.

@Test(timeout = 60000)
public void testRMRestartFailAppAttempt() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    int maxAttempt = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create app and launch the AM
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    ApplicationId applicationId = app0.getApplicationId();
    ApplicationAttemptId appAttemptId1 = app0.getCurrentAppAttempt().getAppAttemptId();
    Assert.assertEquals(1, appAttemptId1.getAttemptId());
    // fail the 1st app attempt.
    rm1.failApplicationAttempt(appAttemptId1);
    rm1.waitForState(appAttemptId1, RMAppAttemptState.FAILED);
    rm1.waitForState(applicationId, RMAppState.ACCEPTED);
    ApplicationAttemptId appAttemptId2 = app0.getCurrentAppAttempt().getAppAttemptId();
    Assert.assertEquals(2, appAttemptId2.getAttemptId());
    rm1.waitForState(appAttemptId2, RMAppAttemptState.SCHEDULED);
    // restart rm
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(applicationId);
    rm2.waitForState(applicationId, RMAppState.ACCEPTED);
    rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    //Wait to make sure the loadedApp0 has the right number of attempts
    //TODO explore a better way than sleeping for a while (YARN-4929)
    Thread.sleep(1000);
    Assert.assertEquals(2, loadedApp0.getAppAttempts().size());
    rm2.waitForState(appAttemptId2, RMAppAttemptState.SCHEDULED);
    appAttemptId2 = loadedApp0.getCurrentAppAttempt().getAppAttemptId();
    Assert.assertEquals(2, appAttemptId2.getAttemptId());
    // fail 2nd attempt
    rm2.failApplicationAttempt(appAttemptId2);
    rm2.waitForState(appAttemptId2, RMAppAttemptState.FAILED);
    rm2.waitForState(applicationId, RMAppState.FAILED);
    Assert.assertEquals(maxAttempt, loadedApp0.getAppAttempts().size());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 47 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestRMRestart method testSynchronouslyRenewDTOnRecovery.

// Test Delegation token is renewed synchronously so that recover events
// can be processed before any other external incoming events, specifically
// the ContainerFinished event on NM re-registraton.
@Test(timeout = 20000)
public void testSynchronouslyRenewDTOnRecovery() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    final MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    RMApp app0 = rm1.submitApp(200);
    final MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1);
    MockRM rm2 = new MockRM(conf, memStore) {

        @Override
        protected ResourceTrackerService createResourceTrackerService() {
            return new ResourceTrackerService(this.rmContext, this.nodesListManager, this.nmLivelinessMonitor, this.rmContext.getContainerTokenSecretManager(), this.rmContext.getNMTokenSecretManager()) {

                @Override
                protected void serviceStart() throws Exception {
                    // send the container_finished event as soon as the
                    // ResourceTrackerService is started.
                    super.serviceStart();
                    nm1.setResourceTrackerService(getResourceTrackerService());
                    NMContainerStatus status = TestRMRestart.createNMContainerStatus(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
                    nm1.registerNode(Arrays.asList(status), null);
                }
            };
        }
    };
    try {
        // Re-start RM
        rm2.start();
        // wait for the 2nd attempt to be started.
        RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
        int timeoutSecs = 0;
        while (loadedApp0.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {
            Thread.sleep(200);
        }
        MockAM am1 = MockRM.launchAndRegisterAM(loadedApp0, rm2, nm1);
        MockRM.finishAMAndVerifyAppState(loadedApp0, rm2, nm1, am1);
    } finally {
        rm2.stop();
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) Test(org.junit.Test)

Example 48 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestRMRestart method testRMStateStoreDispatcherDrainedOnRMStop.

@Test(timeout = 60000)
public void testRMStateStoreDispatcherDrainedOnRMStop() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore() {

        volatile boolean wait = true;

        @Override
        public void serviceStop() throws Exception {
            // Unblock app saving request.
            wait = false;
            super.serviceStop();
        }

        @Override
        protected void handleStoreEvent(RMStateStoreEvent event) {
            // Skip if synchronous updation of DTToken
            if (!(event instanceof RMStateStoreAMRMTokenEvent) && !(event instanceof RMStateStoreRMDTEvent) && !(event instanceof RMStateStoreRMDTMasterKeyEvent)) {
                while (wait) ;
            }
            super.handleStoreEvent(event);
        }
    };
    memStore.init(conf);
    // start RM
    final MockRM rm1 = createMockRM(conf, memStore);
    rm1.disableDrainEventsImplicitly();
    rm1.start();
    // create apps.
    final ArrayList<RMApp> appList = new ArrayList<RMApp>();
    final int NUM_APPS = 5;
    for (int i = 0; i < NUM_APPS; i++) {
        RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false);
        appList.add(app);
        rm1.waitForState(app.getApplicationId(), RMAppState.NEW_SAVING);
    }
    // all apps's saving request are now enqueued to RMStateStore's dispatcher
    // queue, and will be processed once rm.stop() is called.
    // Nothing exist in state store before stop is called.
    Map<ApplicationId, ApplicationStateData> rmAppState = memStore.getState().getApplicationState();
    Assert.assertTrue(rmAppState.size() == 0);
    // stop rm
    rm1.stop();
    // request on dispatcher.
    for (RMApp app : appList) {
        ApplicationStateData appState = rmAppState.get(app.getApplicationId());
        Assert.assertNotNull(appState);
        Assert.assertEquals(0, appState.getAttemptCount());
        Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app.getApplicationSubmissionContext().getApplicationId());
    }
    Assert.assertTrue(rmAppState.size() == NUM_APPS);
}
Also used : RMStateStoreAMRMTokenEvent(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreAMRMTokenEvent) RMStateStoreRMDTMasterKeyEvent(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreRMDTMasterKeyEvent) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) ArrayList(java.util.ArrayList) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) RMStateStoreEvent(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreEvent) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) RMStateStoreRMDTEvent(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreRMDTEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Test(org.junit.Test)

Example 49 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestRMRestart method testRMRestartKilledApp.

@Test(timeout = 60000)
public void testRMRestartKilledApp() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create app and launch the AM
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    // kill the app.
    rm1.killApp(app0.getApplicationId());
    rm1.waitForState(app0.getApplicationId(), RMAppState.KILLED);
    rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.KILLED);
    // killed state is saved.
    ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
    Assert.assertEquals(RMAppState.KILLED, appState.getState());
    Assert.assertEquals(RMAppAttemptState.KILLED, appState.getAttempt(am0.getApplicationAttemptId()).getState());
    String trackingUrl = app0.getCurrentAppAttempt().getOriginalTrackingUrl();
    Assert.assertNotNull(trackingUrl);
    // restart rm
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
    rm2.waitForState(app0.getApplicationId(), RMAppState.KILLED);
    rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.KILLED);
    // no new attempt is created.
    Assert.assertEquals(1, loadedApp0.getAppAttempts().size());
    ApplicationReport appReport = verifyAppReportAfterRMRestart(app0, rm2);
    Assert.assertEquals(app0.getDiagnostics().toString(), appReport.getDiagnostics());
    Assert.assertEquals(trackingUrl, loadedApp0.getCurrentAppAttempt().getOriginalTrackingUrl());
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 50 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestRMHA method testFailoverClearsRMContext.

@Test
public void testFailoverClearsRMContext() throws Exception {
    configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
    configuration.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
    Configuration conf = new YarnConfiguration(configuration);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // 1. start RM
    rm = new MockRM(conf, memStore);
    rm.init(conf);
    rm.start();
    StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
    checkMonitorHealth();
    checkStandbyRMFunctionality();
    // 2. Transition to active
    rm.adminService.transitionToActive(requestInfo);
    checkMonitorHealth();
    checkActiveRMFunctionality();
    verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
    assertEquals(1, rm.getRMContext().getRMNodes().size());
    assertEquals(1, rm.getRMContext().getRMApps().size());
    // 3. Create new RM
    rm = new MockRM(conf, memStore) {

        @Override
        protected ResourceTrackerService createResourceTrackerService() {
            return new ResourceTrackerService(this.rmContext, this.nodesListManager, this.nmLivelinessMonitor, this.rmContext.getContainerTokenSecretManager(), this.rmContext.getNMTokenSecretManager()) {

                @Override
                protected void serviceStart() throws Exception {
                    throw new Exception("ResourceTracker service failed");
                }
            };
        }
    };
    rm.init(conf);
    rm.start();
    checkMonitorHealth();
    checkStandbyRMFunctionality();
    // 4. Try Transition to active, throw exception
    try {
        rm.adminService.transitionToActive(requestInfo);
        Assert.fail("Transitioned to Active should throw exception.");
    } catch (Exception e) {
        assertTrue("Error when transitioning to Active mode".contains(e.getMessage()));
    }
    // 5. Clears the metrics
    verifyClusterMetrics(0, 0, 0, 0, 0, 0);
    assertEquals(0, rm.getRMContext().getRMNodes().size());
    assertEquals(0, rm.getRMContext().getRMApps().size());
}
Also used : MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) StateChangeRequestInfo(org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo) StoreFencedException(org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFencedException) ServiceFailedException(org.apache.hadoop.ha.ServiceFailedException) HealthCheckFailedException(org.apache.hadoop.ha.HealthCheckFailedException) IOException(java.io.IOException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) JSONException(org.codehaus.jettison.json.JSONException) AccessControlException(org.apache.hadoop.security.AccessControlException) Test(org.junit.Test)

Aggregations

MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)84 Test (org.junit.Test)81 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)68 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)28 ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)27 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)23 TestSecurityMockRM (org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM)22 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)21 RMState (org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState)21 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)20 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)20 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)19 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)16 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)16 IOException (java.io.IOException)15 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)14 Configuration (org.apache.hadoop.conf.Configuration)12 AbstractYarnScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler)10 ArrayList (java.util.ArrayList)9 ApplicationAccessType (org.apache.hadoop.yarn.api.records.ApplicationAccessType)9