use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testRMRestartTimelineCollectorContext.
@Test(timeout = 60000)
public void testRMRestartTimelineCollectorContext() throws Exception {
conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
conf.setFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION, 2.0f);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
MockRM rm1 = null;
MockRM rm2 = null;
try {
rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// submit an app.
RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null);
// Check if app info has been saved.
ApplicationStateData appState = rmAppState.get(app.getApplicationId());
Assert.assertNotNull(appState);
Assert.assertEquals(0, appState.getAttemptCount());
Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app.getApplicationSubmissionContext().getApplicationId());
// Allocate the AM
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app.getCurrentAppAttempt();
ApplicationAttemptId attemptId1 = attempt.getAppAttemptId();
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
ApplicationId appId = app.getApplicationId();
TimelineCollectorContext contextBeforeRestart = rm1.getRMContext().getRMTimelineCollectorManager().get(appId).getTimelineEntityContext();
// Restart RM.
rm2 = createMockRM(conf, memStore);
rm2.start();
Assert.assertEquals(1, rm2.getRMContext().getRMApps().size());
rm2.waitForState(app.getApplicationId(), RMAppState.ACCEPTED);
TimelineCollectorContext contextAfterRestart = rm2.getRMContext().getRMTimelineCollectorManager().get(appId).getTimelineEntityContext();
Assert.assertEquals("Collector contexts for an app should be same " + "across restarts", contextBeforeRestart, contextAfterRestart);
} finally {
conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, false);
if (rm1 != null) {
rm1.close();
}
if (rm2 != null) {
rm2.close();
}
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testRMDelegationTokenRestoredOnRMRestart.
@Test(timeout = 60000)
public void testRMDelegationTokenRestoredOnRMRestart() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
conf.set(YarnConfiguration.RM_ADDRESS, "localhost:8032");
UserGroupInformation.setConfiguration(conf);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
Map<RMDelegationTokenIdentifier, Long> rmDTState = rmState.getRMDTSecretManagerState().getTokenState();
Set<DelegationKey> rmDTMasterKeyState = rmState.getRMDTSecretManagerState().getMasterKeyState();
MockRM rm1 = new TestSecurityMockRM(conf, memStore);
rm1.start();
// create an empty credential
Credentials ts = new Credentials();
// request a token and add into credential
GetDelegationTokenRequest request1 = GetDelegationTokenRequest.newInstance("renewer1");
UserGroupInformation.getCurrentUser().setAuthenticationMethod(AuthMethod.KERBEROS);
GetDelegationTokenResponse response1 = rm1.getClientRMService().getDelegationToken(request1);
org.apache.hadoop.yarn.api.records.Token delegationToken1 = response1.getRMDelegationToken();
Token<RMDelegationTokenIdentifier> token1 = ConverterUtils.convertFromYarn(delegationToken1, rmAddr);
RMDelegationTokenIdentifier dtId1 = token1.decodeIdentifier();
HashSet<RMDelegationTokenIdentifier> tokenIdentSet = new HashSet<RMDelegationTokenIdentifier>();
ts.addToken(token1.getService(), token1);
tokenIdentSet.add(dtId1);
// submit an app with customized credential
RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", 1, ts);
// assert app info is saved
ApplicationStateData appState = rmAppState.get(app.getApplicationId());
Assert.assertNotNull(appState);
// assert all master keys are saved
Set<DelegationKey> allKeysRM1 = rm1.getRMContext().getRMDelegationTokenSecretManager().getAllMasterKeys();
Assert.assertEquals(allKeysRM1, rmDTMasterKeyState);
// assert all tokens are saved
Map<RMDelegationTokenIdentifier, Long> allTokensRM1 = rm1.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
Assert.assertEquals(tokenIdentSet, allTokensRM1.keySet());
Assert.assertEquals(allTokensRM1, rmDTState);
// assert sequence number is saved
Assert.assertEquals(rm1.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber(), rmState.getRMDTSecretManagerState().getDTSequenceNumber());
// request one more token
GetDelegationTokenRequest request2 = GetDelegationTokenRequest.newInstance("renewer2");
GetDelegationTokenResponse response2 = rm1.getClientRMService().getDelegationToken(request2);
org.apache.hadoop.yarn.api.records.Token delegationToken2 = response2.getRMDelegationToken();
Token<RMDelegationTokenIdentifier> token2 = ConverterUtils.convertFromYarn(delegationToken2, rmAddr);
RMDelegationTokenIdentifier dtId2 = token2.decodeIdentifier();
// cancel token2
try {
rm1.getRMContext().getRMDelegationTokenSecretManager().cancelToken(token2, UserGroupInformation.getCurrentUser().getUserName());
} catch (Exception e) {
Assert.fail();
}
// Assert the token which has the latest delegationTokenSequenceNumber is removed
Assert.assertEquals(rm1.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber(), dtId2.getSequenceNumber());
Assert.assertFalse(rmDTState.containsKey(dtId2));
// start new RM
MockRM rm2 = new TestSecurityMockRM(conf, memStore);
rm2.start();
// assert master keys and tokens are populated back to DTSecretManager
Map<RMDelegationTokenIdentifier, Long> allTokensRM2 = rm2.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
Assert.assertEquals(allTokensRM2.keySet(), allTokensRM1.keySet());
// rm2 has its own master keys when it starts, we use containsAll here
Assert.assertTrue(rm2.getRMContext().getRMDelegationTokenSecretManager().getAllMasterKeys().containsAll(allKeysRM1));
// assert sequenceNumber is properly recovered,
// even though the token which has max sequenceNumber is not stored
Assert.assertEquals(rm1.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber(), rm2.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber());
// renewDate before renewing
Long renewDateBeforeRenew = allTokensRM2.get(dtId1);
try {
// Sleep for one millisecond to make sure renewDataAfterRenew is greater
Thread.sleep(1);
// renew recovered token
rm2.getRMContext().getRMDelegationTokenSecretManager().renewToken(token1, "renewer1");
} catch (Exception e) {
Assert.fail();
}
allTokensRM2 = rm2.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
Long renewDateAfterRenew = allTokensRM2.get(dtId1);
// assert token is renewed
Assert.assertTrue(renewDateAfterRenew > renewDateBeforeRenew);
// assert new token is added into state store
Assert.assertTrue(rmDTState.containsValue(renewDateAfterRenew));
// assert old token is removed from state store
Assert.assertFalse(rmDTState.containsValue(renewDateBeforeRenew));
try {
rm2.getRMContext().getRMDelegationTokenSecretManager().cancelToken(token1, UserGroupInformation.getCurrentUser().getUserName());
} catch (Exception e) {
Assert.fail();
}
// assert token is removed from state after its cancelled
allTokensRM2 = rm2.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
Assert.assertFalse(allTokensRM2.containsKey(dtId1));
Assert.assertFalse(rmDTState.containsKey(dtId1));
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testRMRestartAfterPreemption.
@Test(timeout = 120000)
public void testRMRestartAfterPreemption() throws Exception {
Configuration conf = new Configuration();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
if (!getSchedulerType().equals(SchedulerType.CAPACITY)) {
return;
}
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// start RM
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
int CONTAINER_MEMORY = 1024;
// create app and launch the AM
RMApp app0 = rm1.submitApp(CONTAINER_MEMORY);
MockAM am0 = MockRM.launchAM(app0, rm1, nm1);
nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(cs, am0.getApplicationAttemptId());
for (int i = 0; i < 4; i++) {
am0 = MockRM.launchAM(app0, rm1, nm1);
am0.registerAppAttempt();
// get scheduler app
FiCaSchedulerApp schedulerAppAttempt = cs.getSchedulerApplications().get(app0.getApplicationId()).getCurrentAppAttempt();
// kill app0-attempt
cs.markContainerForKillable(schedulerAppAttempt.getRMContainer(app0.getCurrentAppAttempt().getMasterContainer().getId()));
rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(cs, am0.getApplicationAttemptId());
}
am0 = MockRM.launchAM(app0, rm1, nm1);
am0.registerAppAttempt();
rm1.killApp(app0.getApplicationId());
rm1.waitForState(app0.getCurrentAppAttempt().getAppAttemptId(), RMAppAttemptState.KILLED);
MockRM rm2 = null;
// start RM2
try {
rm2 = new MockRM(conf, memStore);
rm2.start();
Assert.assertTrue("RM start successfully", true);
} catch (Exception e) {
LOG.debug("Exception on start", e);
Assert.fail("RM should start with out any issue");
} finally {
rm1.stop();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testFinishedAppRemovalAfterRMRestart.
@Test(timeout = 60000)
public void testFinishedAppRemovalAfterRMRestart() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore();
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 1);
memStore.init(conf);
RMState rmState = memStore.getState();
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// create an app and finish the app.
RMApp app0 = rm1.submitApp(200);
MockAM am0 = launchAM(app0, rm1, nm1);
finishApplicationMaster(app0, rm1, nm1, am0);
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
nm1 = rm2.registerNode("127.0.0.1:1234", 15120);
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
// app0 exits in both state store and rmContext
Assert.assertEquals(RMAppState.FINISHED, rmAppState.get(app0.getApplicationId()).getState());
rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);
// create one more app and finish the app.
RMApp app1 = rm2.submitApp(200);
MockAM am1 = launchAM(app1, rm2, nm1);
finishApplicationMaster(app1, rm2, nm1, am1);
rm2.drainEvents();
// the first app0 get kicked out from both rmContext and state store
Assert.assertNull(rm2.getRMContext().getRMApps().get(app0.getApplicationId()));
Assert.assertNull(rmAppState.get(app0.getApplicationId()));
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testRMRestartOnMaxAppAttempts.
@Test(timeout = 60000)
public void testRMRestartOnMaxAppAttempts() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// submit an app with maxAppAttempts equals to 1
RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", 1, null);
// submit an app with maxAppAttempts equals to -1
RMApp app2 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null);
// assert app1 info is saved
ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
Assert.assertNotNull(appState);
Assert.assertEquals(0, appState.getAttemptCount());
Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app1.getApplicationSubmissionContext().getApplicationId());
// Allocate the AM
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app1.getCurrentAppAttempt();
ApplicationAttemptId attemptId1 = attempt.getAppAttemptId();
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
Assert.assertEquals(1, appState.getAttemptCount());
ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
Assert.assertNotNull(attemptState);
Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
// Setting AMLivelinessMonitor interval to be 3 Secs.
conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 3000);
// start new RM
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
// verify that maxAppAttempts is set to global value
Assert.assertEquals(2, rm2.getRMContext().getRMApps().get(app2.getApplicationId()).getMaxAppAttempts());
// app1 and app2 are loaded back, but app1 failed because it's
// hitting max-retry.
Assert.assertEquals(2, rm2.getRMContext().getRMApps().size());
rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
rm2.waitForState(app2.getApplicationId(), RMAppState.ACCEPTED);
// app1 failed state is saved in state store. app2 final saved state is not
// determined yet.
Assert.assertEquals(RMAppState.FAILED, rmAppState.get(app1.getApplicationId()).getState());
Assert.assertNull(rmAppState.get(app2.getApplicationId()).getState());
}
Aggregations