Search in sources :

Example 51 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestRMHA method testFailoverWhenTransitionToActiveThrowException.

@Test(timeout = 30000)
public void testFailoverWhenTransitionToActiveThrowException() throws Exception {
    configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
    Configuration conf = new YarnConfiguration(configuration);
    MemoryRMStateStore memStore = new MemoryRMStateStore() {

        int count = 0;

        @Override
        public synchronized void startInternal() throws Exception {
            // first time throw exception
            if (count++ == 0) {
                throw new Exception("Session Expired");
            }
        }
    };
    // start RM
    memStore.init(conf);
    rm = new MockRM(conf, memStore);
    rm.init(conf);
    StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
    assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService.getServiceStatus().getState());
    assertFalse("RM is ready to become active before being started", rm.adminService.getServiceStatus().isReadyToBecomeActive());
    checkMonitorHealth();
    rm.start();
    checkMonitorHealth();
    checkStandbyRMFunctionality();
    // 2. Try Transition to active, throw exception
    try {
        rm.adminService.transitionToActive(requestInfo);
        Assert.fail("Transitioned to Active should throw exception.");
    } catch (Exception e) {
        assertTrue("Error when transitioning to Active mode".contains(e.getMessage()));
    }
    // 3. Transition to active, success
    rm.adminService.transitionToActive(requestInfo);
    checkMonitorHealth();
    checkActiveRMFunctionality();
}
Also used : MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) StateChangeRequestInfo(org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo) StoreFencedException(org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFencedException) ServiceFailedException(org.apache.hadoop.ha.ServiceFailedException) HealthCheckFailedException(org.apache.hadoop.ha.HealthCheckFailedException) IOException(java.io.IOException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) JSONException(org.codehaus.jettison.json.JSONException) AccessControlException(org.apache.hadoop.security.AccessControlException) Test(org.junit.Test)

Example 52 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestRMRestart method testDelegationTokenRestoredInDelegationTokenRenewer.

@Test(timeout = 60000)
public void testDelegationTokenRestoredInDelegationTokenRenewer() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
    UserGroupInformation.setConfiguration(conf);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    MockRM rm1 = new TestSecurityMockRM(conf, memStore);
    rm1.start();
    HashSet<Token<RMDelegationTokenIdentifier>> tokenSet = new HashSet<Token<RMDelegationTokenIdentifier>>();
    // create an empty credential
    Credentials ts = new Credentials();
    // create tokens and add into credential
    Text userText1 = new Text("user1");
    RMDelegationTokenIdentifier dtId1 = new RMDelegationTokenIdentifier(userText1, new Text("renewer1"), userText1);
    Token<RMDelegationTokenIdentifier> token1 = new Token<RMDelegationTokenIdentifier>(dtId1, rm1.getRMContext().getRMDelegationTokenSecretManager());
    SecurityUtil.setTokenService(token1, rmAddr);
    ts.addToken(userText1, token1);
    tokenSet.add(token1);
    Text userText2 = new Text("user2");
    RMDelegationTokenIdentifier dtId2 = new RMDelegationTokenIdentifier(userText2, new Text("renewer2"), userText2);
    Token<RMDelegationTokenIdentifier> token2 = new Token<RMDelegationTokenIdentifier>(dtId2, rm1.getRMContext().getRMDelegationTokenSecretManager());
    SecurityUtil.setTokenService(token2, rmAddr);
    ts.addToken(userText2, token2);
    tokenSet.add(token2);
    // submit an app with customized credential
    RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", 1, ts);
    // assert app info is saved
    ApplicationStateData appState = rmAppState.get(app.getApplicationId());
    Assert.assertNotNull(appState);
    // assert delegation tokens exist in rm1 DelegationTokenRenewr
    Assert.assertEquals(tokenSet, rm1.getRMContext().getDelegationTokenRenewer().getDelegationTokens());
    // assert delegation tokens are saved
    DataOutputBuffer dob = new DataOutputBuffer();
    ts.writeTokenStorageToStream(dob);
    ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
    securityTokens.rewind();
    Assert.assertEquals(securityTokens, appState.getApplicationSubmissionContext().getAMContainerSpec().getTokens());
    // start new RM
    MockRM rm2 = new TestSecurityMockRM(conf, memStore);
    rm2.start();
    // Need to wait for a while as now token renewal happens on another thread
    // and is asynchronous in nature.
    waitForTokensToBeRenewed(rm2, tokenSet);
    // verify tokens are properly populated back to rm2 DelegationTokenRenewer
    Assert.assertEquals(tokenSet, rm2.getRMContext().getDelegationTokenRenewer().getDelegationTokens());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Token(org.apache.hadoop.security.token.Token) Text(org.apache.hadoop.io.Text) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) RMDelegationTokenIdentifier(org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier) ByteBuffer(java.nio.ByteBuffer) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Credentials(org.apache.hadoop.security.Credentials) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 53 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestRMRestart method testAppAttemptTokensRestoredOnRMRestart.

@Test(timeout = 60000)
public void testAppAttemptTokensRestoredOnRMRestart() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
    conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
    UserGroupInformation.setConfiguration(conf);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    MockRM rm1 = new TestSecurityMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("0.0.0.0:4321", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // submit an app
    RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), "default");
    // assert app info is saved
    ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
    Assert.assertNotNull(appState);
    // Allocate the AM
    nm1.nodeHeartbeat(true);
    RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
    ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
    rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
    // assert attempt info is saved
    ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
    Assert.assertNotNull(attemptState);
    Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
    // the clientTokenMasterKey that are generated when
    // RMAppAttempt is created,
    byte[] clientTokenMasterKey = attempt1.getClientTokenMasterKey().getEncoded();
    // assert application credentials are saved
    Credentials savedCredentials = attemptState.getAppAttemptTokens();
    Assert.assertArrayEquals("client token master key not saved", clientTokenMasterKey, savedCredentials.getSecretKey(RMStateStore.AM_CLIENT_TOKEN_MASTER_KEY_NAME));
    // start new RM
    MockRM rm2 = new TestSecurityMockRM(conf, memStore);
    rm2.start();
    RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
    RMAppAttempt loadedAttempt1 = loadedApp1.getRMAppAttempt(attemptId1);
    // assert loaded attempt recovered
    Assert.assertNotNull(loadedAttempt1);
    // assert client token master key is recovered back to api-versioned
    // client token master key
    Assert.assertEquals("client token master key not restored", attempt1.getClientTokenMasterKey(), loadedAttempt1.getClientTokenMasterKey());
    // assert ClientTokenSecretManager also knows about the key
    Assert.assertArrayEquals(clientTokenMasterKey, rm2.getClientToAMTokenSecretManager().getMasterKey(attemptId1).getEncoded());
    // assert AMRMTokenSecretManager also knows about the AMRMToken password
    Token<AMRMTokenIdentifier> amrmToken = loadedAttempt1.getAMRMToken();
    Assert.assertArrayEquals(amrmToken.getPassword(), rm2.getRMContext().getAMRMTokenSecretManager().retrievePassword(amrmToken.decodeIdentifier()));
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) AMRMTokenIdentifier(org.apache.hadoop.yarn.security.AMRMTokenIdentifier) ApplicationAccessType(org.apache.hadoop.yarn.api.records.ApplicationAccessType) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) ApplicationAttemptStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData) Credentials(org.apache.hadoop.security.Credentials) Test(org.junit.Test)

Example 54 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestRMRestart method testRMRestartSucceededApp.

@Test(timeout = 60000)
public void testRMRestartSucceededApp() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create an app and finish the app.
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    // unregister am
    FinishApplicationMasterRequest req = FinishApplicationMasterRequest.newInstance(FinalApplicationStatus.SUCCEEDED, "diagnostics", "trackingUrl");
    finishApplicationMaster(app0, rm1, nm1, am0, req);
    // check the state store about the unregistered info.
    ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
    ApplicationAttemptStateData attemptState0 = appState.getAttempt(am0.getApplicationAttemptId());
    Assert.assertEquals("diagnostics", attemptState0.getDiagnostics());
    Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, attemptState0.getFinalApplicationStatus());
    Assert.assertEquals("trackingUrl", attemptState0.getFinalTrackingUrl());
    Assert.assertEquals(app0.getFinishTime(), appState.getFinishTime());
    // restart rm
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    // verify application report returns the same app info as the app info
    // before RM restarts.
    ApplicationReport appReport = verifyAppReportAfterRMRestart(app0, rm2);
    Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, appReport.getFinalApplicationStatus());
    Assert.assertEquals("trackingUrl", appReport.getOriginalTrackingUrl());
}
Also used : ApplicationReport(org.apache.hadoop.yarn.api.records.ApplicationReport) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) ApplicationAttemptStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData) FinishApplicationMasterRequest(org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest) Test(org.junit.Test)

Example 55 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestFairScheduler method testRefreshQueuesWhenRMHA.

@Test(timeout = 120000)
public void testRefreshQueuesWhenRMHA() throws Exception {
    conf.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
    conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
    conf.setBoolean(FairSchedulerConfiguration.ALLOW_UNDECLARED_POOLS, false);
    conf.setBoolean(FairSchedulerConfiguration.USER_AS_DEFAULT_QUEUE, false);
    conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
    HAServiceProtocol.StateChangeRequestInfo requestInfo = new HAServiceProtocol.StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
    // 1. start a standby RM, file 'ALLOC_FILE' is empty, so there is no queues
    MockRM rm1 = new MockRM(conf, null);
    rm1.init(conf);
    rm1.start();
    rm1.getAdminService().transitionToStandby(requestInfo);
    // 2. add a new queue "test_queue"
    PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
    out.println("<?xml version=\"1.0\"?>");
    out.println("<allocations>");
    out.println("<queue name=\"test_queue\">");
    out.println("  <maxRunningApps>3</maxRunningApps>");
    out.println("</queue>");
    out.println("</allocations>");
    out.close();
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    // 3. start a active RM
    MockRM rm2 = new MockRM(conf, memStore);
    rm2.init(conf);
    rm2.start();
    MockNM nm = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());
    nm.registerNode();
    rm2.getAdminService().transitionToActive(requestInfo);
    // 4. submit a app to the new added queue "test_queue"
    RMApp app = rm2.submitApp(200, "test_app", "user", null, "test_queue");
    RMAppAttempt attempt0 = app.getCurrentAppAttempt();
    nm.nodeHeartbeat(true);
    MockAM am0 = rm2.sendAMLaunched(attempt0.getAppAttemptId());
    am0.registerAppAttempt();
    assertEquals("root.test_queue", app.getQueue());
    // 5. Transit rm1 to active, recover app
    ((RMContextImpl) rm1.getRMContext()).setStateStore(memStore);
    rm1.getAdminService().transitionToActive(requestInfo);
    rm1.drainEvents();
    assertEquals(1, rm1.getRMContext().getRMApps().size());
    RMApp recoveredApp = rm1.getRMContext().getRMApps().values().iterator().next();
    assertEquals("root.test_queue", recoveredApp.getQueue());
    rm1.stop();
    rm2.stop();
}
Also used : MockRMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) HAServiceProtocol(org.apache.hadoop.ha.HAServiceProtocol) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) FileWriter(java.io.FileWriter) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) RMContextImpl(org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl) PrintWriter(java.io.PrintWriter) Test(org.junit.Test)

Aggregations

MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)84 Test (org.junit.Test)81 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)68 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)28 ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)27 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)23 TestSecurityMockRM (org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM)22 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)21 RMState (org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState)21 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)20 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)20 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)19 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)16 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)16 IOException (java.io.IOException)15 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)14 Configuration (org.apache.hadoop.conf.Configuration)12 AbstractYarnScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler)10 ArrayList (java.util.ArrayList)9 ApplicationAccessType (org.apache.hadoop.yarn.api.records.ApplicationAccessType)9