use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMHA method testFailoverWhenTransitionToActiveThrowException.
@Test(timeout = 30000)
public void testFailoverWhenTransitionToActiveThrowException() throws Exception {
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
Configuration conf = new YarnConfiguration(configuration);
MemoryRMStateStore memStore = new MemoryRMStateStore() {
int count = 0;
@Override
public synchronized void startInternal() throws Exception {
// first time throw exception
if (count++ == 0) {
throw new Exception("Session Expired");
}
}
};
// start RM
memStore.init(conf);
rm = new MockRM(conf, memStore);
rm.init(conf);
StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService.getServiceStatus().getState());
assertFalse("RM is ready to become active before being started", rm.adminService.getServiceStatus().isReadyToBecomeActive());
checkMonitorHealth();
rm.start();
checkMonitorHealth();
checkStandbyRMFunctionality();
// 2. Try Transition to active, throw exception
try {
rm.adminService.transitionToActive(requestInfo);
Assert.fail("Transitioned to Active should throw exception.");
} catch (Exception e) {
assertTrue("Error when transitioning to Active mode".contains(e.getMessage()));
}
// 3. Transition to active, success
rm.adminService.transitionToActive(requestInfo);
checkMonitorHealth();
checkActiveRMFunctionality();
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testDelegationTokenRestoredInDelegationTokenRenewer.
@Test(timeout = 60000)
public void testDelegationTokenRestoredInDelegationTokenRenewer() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
UserGroupInformation.setConfiguration(conf);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
MockRM rm1 = new TestSecurityMockRM(conf, memStore);
rm1.start();
HashSet<Token<RMDelegationTokenIdentifier>> tokenSet = new HashSet<Token<RMDelegationTokenIdentifier>>();
// create an empty credential
Credentials ts = new Credentials();
// create tokens and add into credential
Text userText1 = new Text("user1");
RMDelegationTokenIdentifier dtId1 = new RMDelegationTokenIdentifier(userText1, new Text("renewer1"), userText1);
Token<RMDelegationTokenIdentifier> token1 = new Token<RMDelegationTokenIdentifier>(dtId1, rm1.getRMContext().getRMDelegationTokenSecretManager());
SecurityUtil.setTokenService(token1, rmAddr);
ts.addToken(userText1, token1);
tokenSet.add(token1);
Text userText2 = new Text("user2");
RMDelegationTokenIdentifier dtId2 = new RMDelegationTokenIdentifier(userText2, new Text("renewer2"), userText2);
Token<RMDelegationTokenIdentifier> token2 = new Token<RMDelegationTokenIdentifier>(dtId2, rm1.getRMContext().getRMDelegationTokenSecretManager());
SecurityUtil.setTokenService(token2, rmAddr);
ts.addToken(userText2, token2);
tokenSet.add(token2);
// submit an app with customized credential
RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", 1, ts);
// assert app info is saved
ApplicationStateData appState = rmAppState.get(app.getApplicationId());
Assert.assertNotNull(appState);
// assert delegation tokens exist in rm1 DelegationTokenRenewr
Assert.assertEquals(tokenSet, rm1.getRMContext().getDelegationTokenRenewer().getDelegationTokens());
// assert delegation tokens are saved
DataOutputBuffer dob = new DataOutputBuffer();
ts.writeTokenStorageToStream(dob);
ByteBuffer securityTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
securityTokens.rewind();
Assert.assertEquals(securityTokens, appState.getApplicationSubmissionContext().getAMContainerSpec().getTokens());
// start new RM
MockRM rm2 = new TestSecurityMockRM(conf, memStore);
rm2.start();
// Need to wait for a while as now token renewal happens on another thread
// and is asynchronous in nature.
waitForTokensToBeRenewed(rm2, tokenSet);
// verify tokens are properly populated back to rm2 DelegationTokenRenewer
Assert.assertEquals(tokenSet, rm2.getRMContext().getDelegationTokenRenewer().getDelegationTokens());
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testAppAttemptTokensRestoredOnRMRestart.
@Test(timeout = 60000)
public void testAppAttemptTokensRestoredOnRMRestart() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
UserGroupInformation.setConfiguration(conf);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
MockRM rm1 = new TestSecurityMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("0.0.0.0:4321", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// submit an app
RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), "default");
// assert app info is saved
ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
Assert.assertNotNull(appState);
// Allocate the AM
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
// assert attempt info is saved
ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
Assert.assertNotNull(attemptState);
Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
// the clientTokenMasterKey that are generated when
// RMAppAttempt is created,
byte[] clientTokenMasterKey = attempt1.getClientTokenMasterKey().getEncoded();
// assert application credentials are saved
Credentials savedCredentials = attemptState.getAppAttemptTokens();
Assert.assertArrayEquals("client token master key not saved", clientTokenMasterKey, savedCredentials.getSecretKey(RMStateStore.AM_CLIENT_TOKEN_MASTER_KEY_NAME));
// start new RM
MockRM rm2 = new TestSecurityMockRM(conf, memStore);
rm2.start();
RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
RMAppAttempt loadedAttempt1 = loadedApp1.getRMAppAttempt(attemptId1);
// assert loaded attempt recovered
Assert.assertNotNull(loadedAttempt1);
// assert client token master key is recovered back to api-versioned
// client token master key
Assert.assertEquals("client token master key not restored", attempt1.getClientTokenMasterKey(), loadedAttempt1.getClientTokenMasterKey());
// assert ClientTokenSecretManager also knows about the key
Assert.assertArrayEquals(clientTokenMasterKey, rm2.getClientToAMTokenSecretManager().getMasterKey(attemptId1).getEncoded());
// assert AMRMTokenSecretManager also knows about the AMRMToken password
Token<AMRMTokenIdentifier> amrmToken = loadedAttempt1.getAMRMToken();
Assert.assertArrayEquals(amrmToken.getPassword(), rm2.getRMContext().getAMRMTokenSecretManager().retrievePassword(amrmToken.decodeIdentifier()));
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testRMRestartSucceededApp.
@Test(timeout = 60000)
public void testRMRestartSucceededApp() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// create an app and finish the app.
RMApp app0 = rm1.submitApp(200);
MockAM am0 = launchAM(app0, rm1, nm1);
// unregister am
FinishApplicationMasterRequest req = FinishApplicationMasterRequest.newInstance(FinalApplicationStatus.SUCCEEDED, "diagnostics", "trackingUrl");
finishApplicationMaster(app0, rm1, nm1, am0, req);
// check the state store about the unregistered info.
ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
ApplicationAttemptStateData attemptState0 = appState.getAttempt(am0.getApplicationAttemptId());
Assert.assertEquals("diagnostics", attemptState0.getDiagnostics());
Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, attemptState0.getFinalApplicationStatus());
Assert.assertEquals("trackingUrl", attemptState0.getFinalTrackingUrl());
Assert.assertEquals(app0.getFinishTime(), appState.getFinishTime());
// restart rm
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
// verify application report returns the same app info as the app info
// before RM restarts.
ApplicationReport appReport = verifyAppReportAfterRMRestart(app0, rm2);
Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, appReport.getFinalApplicationStatus());
Assert.assertEquals("trackingUrl", appReport.getOriginalTrackingUrl());
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestFairScheduler method testRefreshQueuesWhenRMHA.
@Test(timeout = 120000)
public void testRefreshQueuesWhenRMHA() throws Exception {
conf.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
conf.setBoolean(FairSchedulerConfiguration.ALLOW_UNDECLARED_POOLS, false);
conf.setBoolean(FairSchedulerConfiguration.USER_AS_DEFAULT_QUEUE, false);
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
HAServiceProtocol.StateChangeRequestInfo requestInfo = new HAServiceProtocol.StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
// 1. start a standby RM, file 'ALLOC_FILE' is empty, so there is no queues
MockRM rm1 = new MockRM(conf, null);
rm1.init(conf);
rm1.start();
rm1.getAdminService().transitionToStandby(requestInfo);
// 2. add a new queue "test_queue"
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<queue name=\"test_queue\">");
out.println(" <maxRunningApps>3</maxRunningApps>");
out.println("</queue>");
out.println("</allocations>");
out.close();
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// 3. start a active RM
MockRM rm2 = new MockRM(conf, memStore);
rm2.init(conf);
rm2.start();
MockNM nm = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());
nm.registerNode();
rm2.getAdminService().transitionToActive(requestInfo);
// 4. submit a app to the new added queue "test_queue"
RMApp app = rm2.submitApp(200, "test_app", "user", null, "test_queue");
RMAppAttempt attempt0 = app.getCurrentAppAttempt();
nm.nodeHeartbeat(true);
MockAM am0 = rm2.sendAMLaunched(attempt0.getAppAttemptId());
am0.registerAppAttempt();
assertEquals("root.test_queue", app.getQueue());
// 5. Transit rm1 to active, recover app
((RMContextImpl) rm1.getRMContext()).setStateStore(memStore);
rm1.getAdminService().transitionToActive(requestInfo);
rm1.drainEvents();
assertEquals(1, rm1.getRMContext().getRMApps().size());
RMApp recoveredApp = rm1.getRMContext().getRMApps().values().iterator().next();
assertEquals("root.test_queue", recoveredApp.getQueue());
rm1.stop();
rm2.stop();
}
Aggregations