Search in sources :

Example 11 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestRMRestart method testRMRestartTimelineCollectorContext.

@Test(timeout = 60000)
public void testRMRestartTimelineCollectorContext() throws Exception {
    conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
    conf.setFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION, 2.0f);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    MockRM rm1 = null;
    MockRM rm2 = null;
    try {
        rm1 = createMockRM(conf, memStore);
        rm1.start();
        MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
        nm1.registerNode();
        // submit an app.
        RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null);
        // Check if app info has been saved.
        ApplicationStateData appState = rmAppState.get(app.getApplicationId());
        Assert.assertNotNull(appState);
        Assert.assertEquals(0, appState.getAttemptCount());
        Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app.getApplicationSubmissionContext().getApplicationId());
        // Allocate the AM
        nm1.nodeHeartbeat(true);
        RMAppAttempt attempt = app.getCurrentAppAttempt();
        ApplicationAttemptId attemptId1 = attempt.getAppAttemptId();
        rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
        ApplicationId appId = app.getApplicationId();
        TimelineCollectorContext contextBeforeRestart = rm1.getRMContext().getRMTimelineCollectorManager().get(appId).getTimelineEntityContext();
        // Restart RM.
        rm2 = createMockRM(conf, memStore);
        rm2.start();
        Assert.assertEquals(1, rm2.getRMContext().getRMApps().size());
        rm2.waitForState(app.getApplicationId(), RMAppState.ACCEPTED);
        TimelineCollectorContext contextAfterRestart = rm2.getRMContext().getRMTimelineCollectorManager().get(appId).getTimelineEntityContext();
        Assert.assertEquals("Collector contexts for an app should be same " + "across restarts", contextBeforeRestart, contextAfterRestart);
    } finally {
        conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, false);
        if (rm1 != null) {
            rm1.close();
        }
        if (rm2 != null) {
            rm2.close();
        }
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) TimelineCollectorContext(org.apache.hadoop.yarn.server.timelineservice.collector.TimelineCollectorContext) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 12 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestRMRestart method testRMDelegationTokenRestoredOnRMRestart.

@Test(timeout = 60000)
public void testRMDelegationTokenRestoredOnRMRestart() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
    conf.set(YarnConfiguration.RM_ADDRESS, "localhost:8032");
    UserGroupInformation.setConfiguration(conf);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    Map<RMDelegationTokenIdentifier, Long> rmDTState = rmState.getRMDTSecretManagerState().getTokenState();
    Set<DelegationKey> rmDTMasterKeyState = rmState.getRMDTSecretManagerState().getMasterKeyState();
    MockRM rm1 = new TestSecurityMockRM(conf, memStore);
    rm1.start();
    // create an empty credential
    Credentials ts = new Credentials();
    // request a token and add into credential
    GetDelegationTokenRequest request1 = GetDelegationTokenRequest.newInstance("renewer1");
    UserGroupInformation.getCurrentUser().setAuthenticationMethod(AuthMethod.KERBEROS);
    GetDelegationTokenResponse response1 = rm1.getClientRMService().getDelegationToken(request1);
    org.apache.hadoop.yarn.api.records.Token delegationToken1 = response1.getRMDelegationToken();
    Token<RMDelegationTokenIdentifier> token1 = ConverterUtils.convertFromYarn(delegationToken1, rmAddr);
    RMDelegationTokenIdentifier dtId1 = token1.decodeIdentifier();
    HashSet<RMDelegationTokenIdentifier> tokenIdentSet = new HashSet<RMDelegationTokenIdentifier>();
    ts.addToken(token1.getService(), token1);
    tokenIdentSet.add(dtId1);
    // submit an app with customized credential
    RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", 1, ts);
    // assert app info is saved
    ApplicationStateData appState = rmAppState.get(app.getApplicationId());
    Assert.assertNotNull(appState);
    // assert all master keys are saved
    Set<DelegationKey> allKeysRM1 = rm1.getRMContext().getRMDelegationTokenSecretManager().getAllMasterKeys();
    Assert.assertEquals(allKeysRM1, rmDTMasterKeyState);
    // assert all tokens are saved
    Map<RMDelegationTokenIdentifier, Long> allTokensRM1 = rm1.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
    Assert.assertEquals(tokenIdentSet, allTokensRM1.keySet());
    Assert.assertEquals(allTokensRM1, rmDTState);
    // assert sequence number is saved
    Assert.assertEquals(rm1.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber(), rmState.getRMDTSecretManagerState().getDTSequenceNumber());
    // request one more token
    GetDelegationTokenRequest request2 = GetDelegationTokenRequest.newInstance("renewer2");
    GetDelegationTokenResponse response2 = rm1.getClientRMService().getDelegationToken(request2);
    org.apache.hadoop.yarn.api.records.Token delegationToken2 = response2.getRMDelegationToken();
    Token<RMDelegationTokenIdentifier> token2 = ConverterUtils.convertFromYarn(delegationToken2, rmAddr);
    RMDelegationTokenIdentifier dtId2 = token2.decodeIdentifier();
    // cancel token2
    try {
        rm1.getRMContext().getRMDelegationTokenSecretManager().cancelToken(token2, UserGroupInformation.getCurrentUser().getUserName());
    } catch (Exception e) {
        Assert.fail();
    }
    // Assert the token which has the latest delegationTokenSequenceNumber is removed
    Assert.assertEquals(rm1.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber(), dtId2.getSequenceNumber());
    Assert.assertFalse(rmDTState.containsKey(dtId2));
    // start new RM
    MockRM rm2 = new TestSecurityMockRM(conf, memStore);
    rm2.start();
    // assert master keys and tokens are populated back to DTSecretManager
    Map<RMDelegationTokenIdentifier, Long> allTokensRM2 = rm2.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
    Assert.assertEquals(allTokensRM2.keySet(), allTokensRM1.keySet());
    // rm2 has its own master keys when it starts, we use containsAll here
    Assert.assertTrue(rm2.getRMContext().getRMDelegationTokenSecretManager().getAllMasterKeys().containsAll(allKeysRM1));
    // assert sequenceNumber is properly recovered,
    // even though the token which has max sequenceNumber is not stored
    Assert.assertEquals(rm1.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber(), rm2.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber());
    // renewDate before renewing
    Long renewDateBeforeRenew = allTokensRM2.get(dtId1);
    try {
        // Sleep for one millisecond to make sure renewDataAfterRenew is greater
        Thread.sleep(1);
        // renew recovered token
        rm2.getRMContext().getRMDelegationTokenSecretManager().renewToken(token1, "renewer1");
    } catch (Exception e) {
        Assert.fail();
    }
    allTokensRM2 = rm2.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
    Long renewDateAfterRenew = allTokensRM2.get(dtId1);
    // assert token is renewed
    Assert.assertTrue(renewDateAfterRenew > renewDateBeforeRenew);
    // assert new token is added into state store
    Assert.assertTrue(rmDTState.containsValue(renewDateAfterRenew));
    // assert old token is removed from state store
    Assert.assertFalse(rmDTState.containsValue(renewDateBeforeRenew));
    try {
        rm2.getRMContext().getRMDelegationTokenSecretManager().cancelToken(token1, UserGroupInformation.getCurrentUser().getUserName());
    } catch (Exception e) {
        Assert.fail();
    }
    // assert token is removed from state after its cancelled
    allTokensRM2 = rm2.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
    Assert.assertFalse(allTokensRM2.containsKey(dtId1));
    Assert.assertFalse(rmDTState.containsKey(dtId1));
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) GetDelegationTokenResponse(org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenResponse) GetDelegationTokenRequest(org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenRequest) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) HashSet(java.util.HashSet) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) RMDelegationTokenIdentifier(org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier) IOException(java.io.IOException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) DelegationKey(org.apache.hadoop.security.token.delegation.DelegationKey) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) Matchers.anyLong(org.mockito.Matchers.anyLong) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Credentials(org.apache.hadoop.security.Credentials) Test(org.junit.Test)

Example 13 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestRMRestart method testFinishedAppRemovalAfterRMRestart.

@Test(timeout = 60000)
public void testFinishedAppRemovalAfterRMRestart() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 1);
    memStore.init(conf);
    RMState rmState = memStore.getState();
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create an app and finish the app.
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    finishApplicationMaster(app0, rm1, nm1, am0);
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    nm1 = rm2.registerNode("127.0.0.1:1234", 15120);
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    // app0 exits in both state store and rmContext
    Assert.assertEquals(RMAppState.FINISHED, rmAppState.get(app0.getApplicationId()).getState());
    rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);
    // create one more app and finish the app.
    RMApp app1 = rm2.submitApp(200);
    MockAM am1 = launchAM(app1, rm2, nm1);
    finishApplicationMaster(app1, rm2, nm1, am1);
    rm2.drainEvents();
    // the first app0 get kicked out from both rmContext and state store
    Assert.assertNull(rm2.getRMContext().getRMApps().get(app0.getApplicationId()));
    Assert.assertNull(rmAppState.get(app0.getApplicationId()));
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 14 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestRMRestart method testRMRestartOnMaxAppAttempts.

@Test(timeout = 60000)
public void testRMRestartOnMaxAppAttempts() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // submit an app with maxAppAttempts equals to 1
    RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", 1, null);
    // submit an app with maxAppAttempts equals to -1
    RMApp app2 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null);
    // assert app1 info is saved
    ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
    Assert.assertNotNull(appState);
    Assert.assertEquals(0, appState.getAttemptCount());
    Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app1.getApplicationSubmissionContext().getApplicationId());
    // Allocate the AM
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt = app1.getCurrentAppAttempt();
    ApplicationAttemptId attemptId1 = attempt.getAppAttemptId();
    rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    Assert.assertEquals(1, appState.getAttemptCount());
    ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
    // Setting AMLivelinessMonitor interval to be 3 Secs.
    conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 3000);
    // start new RM   
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    // verify that maxAppAttempts is set to global value
    Assert.assertEquals(2, rm2.getRMContext().getRMApps().get(app2.getApplicationId()).getMaxAppAttempts());
    // app1 and app2 are loaded back, but app1 failed because it's
    // hitting max-retry.
    Assert.assertEquals(2, rm2.getRMContext().getRMApps().size());
    rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
    rm2.waitForState(app2.getApplicationId(), RMAppState.ACCEPTED);
    // app1 failed state is saved in state store. app2 final saved state is not
    // determined yet.
    Assert.assertEquals(RMAppState.FAILED, rmAppState.get(app1.getApplicationId()).getState());
    Assert.assertNull(rmAppState.get(app2.getApplicationId()).getState());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) ApplicationAttemptStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData) Test(org.junit.Test)

Example 15 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestRMRestart method testRMRestartFailAppAttempt.

@Test(timeout = 60000)
public void testRMRestartFailAppAttempt() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    int maxAttempt = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create app and launch the AM
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    ApplicationId applicationId = app0.getApplicationId();
    ApplicationAttemptId appAttemptId1 = app0.getCurrentAppAttempt().getAppAttemptId();
    Assert.assertEquals(1, appAttemptId1.getAttemptId());
    // fail the 1st app attempt.
    rm1.failApplicationAttempt(appAttemptId1);
    rm1.waitForState(appAttemptId1, RMAppAttemptState.FAILED);
    rm1.waitForState(applicationId, RMAppState.ACCEPTED);
    ApplicationAttemptId appAttemptId2 = app0.getCurrentAppAttempt().getAppAttemptId();
    Assert.assertEquals(2, appAttemptId2.getAttemptId());
    rm1.waitForState(appAttemptId2, RMAppAttemptState.SCHEDULED);
    // restart rm
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(applicationId);
    rm2.waitForState(applicationId, RMAppState.ACCEPTED);
    rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    //Wait to make sure the loadedApp0 has the right number of attempts
    //TODO explore a better way than sleeping for a while (YARN-4929)
    Thread.sleep(1000);
    Assert.assertEquals(2, loadedApp0.getAppAttempts().size());
    rm2.waitForState(appAttemptId2, RMAppAttemptState.SCHEDULED);
    appAttemptId2 = loadedApp0.getCurrentAppAttempt().getAppAttemptId();
    Assert.assertEquals(2, appAttemptId2.getAttemptId());
    // fail 2nd attempt
    rm2.failApplicationAttempt(appAttemptId2);
    rm2.waitForState(appAttemptId2, RMAppAttemptState.FAILED);
    rm2.waitForState(applicationId, RMAppState.FAILED);
    Assert.assertEquals(maxAttempt, loadedApp0.getAppAttempts().size());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Aggregations

RMState (org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState)31 Test (org.junit.Test)24 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)21 ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)21 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)20 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)18 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)8 ApplicationAccessType (org.apache.hadoop.yarn.api.records.ApplicationAccessType)6 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)6 ReservationId (org.apache.hadoop.yarn.api.records.ReservationId)5 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)5 Configuration (org.apache.hadoop.conf.Configuration)4 ReservationAllocationStateProto (org.apache.hadoop.yarn.proto.YarnProtos.ReservationAllocationStateProto)4 TestSecurityMockRM (org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM)4 ApplicationAttemptStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData)4 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 Credentials (org.apache.hadoop.security.Credentials)3 DelegationKey (org.apache.hadoop.security.token.delegation.DelegationKey)3 ReservationSubmissionRequest (org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest)3