use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData in project hadoop by apache.
the class TestRMRestart method finishApplicationMaster.
private void finishApplicationMaster(RMApp rmApp, MockRM rm, MockNM nm, MockAM am, FinishApplicationMasterRequest req) throws Exception {
RMState rmState = ((MemoryRMStateStore) rm.getRMContext().getStateStore()).getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
am.unregisterAppAttempt(req, true);
rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.FINISHING);
nm.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
rm.waitForState(rmApp.getApplicationId(), RMAppState.FINISHED);
// check that app/attempt is saved with the final state
ApplicationStateData appState = rmAppState.get(rmApp.getApplicationId());
Assert.assertEquals(RMAppState.FINISHED, appState.getState());
Assert.assertEquals(RMAppAttemptState.FINISHED, appState.getAttempt(am.getApplicationAttemptId()).getState());
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData in project hadoop by apache.
the class TestRMRestart method testRMRestartOnMissingAttempts.
@Test(timeout = 60000)
public void testRMRestartOnMissingAttempts() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 5);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// create an app and finish the app.
RMApp app0 = rm1.submitApp(200);
ApplicationStateData app0State = memStore.getState().getApplicationState().get(app0.getApplicationId());
MockAM am0 = launchAndFailAM(app0, rm1, nm1);
MockAM am1 = launchAndFailAM(app0, rm1, nm1);
MockAM am2 = launchAndFailAM(app0, rm1, nm1);
MockAM am3 = launchAM(app0, rm1, nm1);
// am1 is missed from MemoryRMStateStore
memStore.removeApplicationAttemptInternal(am1.getApplicationAttemptId());
ApplicationAttemptStateData am2State = app0State.getAttempt(am2.getApplicationAttemptId());
// am2's state is not consistent: MemoryRMStateStore just saved its initial
// state and failed to store its final state
am2State.setState(null);
// restart rm
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
Assert.assertEquals(1, rm2.getRMContext().getRMApps().size());
RMApp recoveredApp0 = rm2.getRMContext().getRMApps().values().iterator().next();
Map<ApplicationAttemptId, RMAppAttempt> recoveredAppAttempts = recoveredApp0.getAppAttempts();
Assert.assertEquals(3, recoveredAppAttempts.size());
Assert.assertEquals(RMAppAttemptState.FAILED, recoveredAppAttempts.get(am0.getApplicationAttemptId()).getAppAttemptState());
Assert.assertEquals(RMAppAttemptState.FAILED, recoveredAppAttempts.get(am2.getApplicationAttemptId()).getAppAttemptState());
Assert.assertEquals(RMAppAttemptState.LAUNCHED, recoveredAppAttempts.get(am3.getApplicationAttemptId()).getAppAttemptState());
Assert.assertEquals(5, ((RMAppImpl) app0).getNextAttemptId());
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData in project hadoop by apache.
the class TestRMRestart method testRMRestartWaitForPreviousSucceededAttempt.
// Test RM restarts after previous attempt succeeded and was saved into state
// store but before the RMAppAttempt notifies RMApp that it has succeeded. On
// recovery, RMAppAttempt should send the AttemptFinished event to RMApp so
// that RMApp can recover its state.
@Test(timeout = 60000)
public void testRMRestartWaitForPreviousSucceededAttempt() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
MemoryRMStateStore memStore = new MemoryRMStateStore() {
int count = 0;
@Override
public void updateApplicationStateInternal(ApplicationId appId, ApplicationStateData appStateData) throws Exception {
if (count == 0) {
// do nothing; simulate app final state is not saved.
LOG.info(appId + " final state is not saved.");
count++;
} else {
super.updateApplicationStateInternal(appId, appStateData);
}
}
};
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = rm1.registerNode("127.0.0.1:1234", 15120);
RMApp app0 = rm1.submitApp(200);
MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1);
FinishApplicationMasterRequest req = FinishApplicationMasterRequest.newInstance(FinalApplicationStatus.SUCCEEDED, "", "");
am0.unregisterAppAttempt(req, true);
rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FINISHING);
// app final state is not saved. This guarantees that RMApp cannot be
// recovered via its own saved state, but only via the event notification
// from the RMAppAttempt on recovery.
Assert.assertNull(rmAppState.get(app0.getApplicationId()).getState());
// start RM
MockRM rm2 = createMockRM(conf, memStore);
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
rm2.start();
rm2.waitForState(app0.getCurrentAppAttempt().getAppAttemptId(), RMAppAttemptState.FINISHED);
rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);
// app final state is saved via the finish event from attempt.
Assert.assertEquals(RMAppState.FINISHED, rmAppState.get(app0.getApplicationId()).getState());
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData in project hadoop by apache.
the class TestRMRestart method testRMRestartTimelineCollectorContext.
@Test(timeout = 60000)
public void testRMRestartTimelineCollectorContext() throws Exception {
conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
conf.setFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION, 2.0f);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
MockRM rm1 = null;
MockRM rm2 = null;
try {
rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// submit an app.
RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null);
// Check if app info has been saved.
ApplicationStateData appState = rmAppState.get(app.getApplicationId());
Assert.assertNotNull(appState);
Assert.assertEquals(0, appState.getAttemptCount());
Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app.getApplicationSubmissionContext().getApplicationId());
// Allocate the AM
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app.getCurrentAppAttempt();
ApplicationAttemptId attemptId1 = attempt.getAppAttemptId();
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
ApplicationId appId = app.getApplicationId();
TimelineCollectorContext contextBeforeRestart = rm1.getRMContext().getRMTimelineCollectorManager().get(appId).getTimelineEntityContext();
// Restart RM.
rm2 = createMockRM(conf, memStore);
rm2.start();
Assert.assertEquals(1, rm2.getRMContext().getRMApps().size());
rm2.waitForState(app.getApplicationId(), RMAppState.ACCEPTED);
TimelineCollectorContext contextAfterRestart = rm2.getRMContext().getRMTimelineCollectorManager().get(appId).getTimelineEntityContext();
Assert.assertEquals("Collector contexts for an app should be same " + "across restarts", contextBeforeRestart, contextAfterRestart);
} finally {
conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, false);
if (rm1 != null) {
rm1.close();
}
if (rm2 != null) {
rm2.close();
}
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData in project hadoop by apache.
the class TestRMRestart method testRMDelegationTokenRestoredOnRMRestart.
@Test(timeout = 60000)
public void testRMDelegationTokenRestoredOnRMRestart() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
conf.set(YarnConfiguration.RM_ADDRESS, "localhost:8032");
UserGroupInformation.setConfiguration(conf);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
Map<RMDelegationTokenIdentifier, Long> rmDTState = rmState.getRMDTSecretManagerState().getTokenState();
Set<DelegationKey> rmDTMasterKeyState = rmState.getRMDTSecretManagerState().getMasterKeyState();
MockRM rm1 = new TestSecurityMockRM(conf, memStore);
rm1.start();
// create an empty credential
Credentials ts = new Credentials();
// request a token and add into credential
GetDelegationTokenRequest request1 = GetDelegationTokenRequest.newInstance("renewer1");
UserGroupInformation.getCurrentUser().setAuthenticationMethod(AuthMethod.KERBEROS);
GetDelegationTokenResponse response1 = rm1.getClientRMService().getDelegationToken(request1);
org.apache.hadoop.yarn.api.records.Token delegationToken1 = response1.getRMDelegationToken();
Token<RMDelegationTokenIdentifier> token1 = ConverterUtils.convertFromYarn(delegationToken1, rmAddr);
RMDelegationTokenIdentifier dtId1 = token1.decodeIdentifier();
HashSet<RMDelegationTokenIdentifier> tokenIdentSet = new HashSet<RMDelegationTokenIdentifier>();
ts.addToken(token1.getService(), token1);
tokenIdentSet.add(dtId1);
// submit an app with customized credential
RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", 1, ts);
// assert app info is saved
ApplicationStateData appState = rmAppState.get(app.getApplicationId());
Assert.assertNotNull(appState);
// assert all master keys are saved
Set<DelegationKey> allKeysRM1 = rm1.getRMContext().getRMDelegationTokenSecretManager().getAllMasterKeys();
Assert.assertEquals(allKeysRM1, rmDTMasterKeyState);
// assert all tokens are saved
Map<RMDelegationTokenIdentifier, Long> allTokensRM1 = rm1.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
Assert.assertEquals(tokenIdentSet, allTokensRM1.keySet());
Assert.assertEquals(allTokensRM1, rmDTState);
// assert sequence number is saved
Assert.assertEquals(rm1.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber(), rmState.getRMDTSecretManagerState().getDTSequenceNumber());
// request one more token
GetDelegationTokenRequest request2 = GetDelegationTokenRequest.newInstance("renewer2");
GetDelegationTokenResponse response2 = rm1.getClientRMService().getDelegationToken(request2);
org.apache.hadoop.yarn.api.records.Token delegationToken2 = response2.getRMDelegationToken();
Token<RMDelegationTokenIdentifier> token2 = ConverterUtils.convertFromYarn(delegationToken2, rmAddr);
RMDelegationTokenIdentifier dtId2 = token2.decodeIdentifier();
// cancel token2
try {
rm1.getRMContext().getRMDelegationTokenSecretManager().cancelToken(token2, UserGroupInformation.getCurrentUser().getUserName());
} catch (Exception e) {
Assert.fail();
}
// Assert the token which has the latest delegationTokenSequenceNumber is removed
Assert.assertEquals(rm1.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber(), dtId2.getSequenceNumber());
Assert.assertFalse(rmDTState.containsKey(dtId2));
// start new RM
MockRM rm2 = new TestSecurityMockRM(conf, memStore);
rm2.start();
// assert master keys and tokens are populated back to DTSecretManager
Map<RMDelegationTokenIdentifier, Long> allTokensRM2 = rm2.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
Assert.assertEquals(allTokensRM2.keySet(), allTokensRM1.keySet());
// rm2 has its own master keys when it starts, we use containsAll here
Assert.assertTrue(rm2.getRMContext().getRMDelegationTokenSecretManager().getAllMasterKeys().containsAll(allKeysRM1));
// assert sequenceNumber is properly recovered,
// even though the token which has max sequenceNumber is not stored
Assert.assertEquals(rm1.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber(), rm2.getRMContext().getRMDelegationTokenSecretManager().getLatestDTSequenceNumber());
// renewDate before renewing
Long renewDateBeforeRenew = allTokensRM2.get(dtId1);
try {
// Sleep for one millisecond to make sure renewDataAfterRenew is greater
Thread.sleep(1);
// renew recovered token
rm2.getRMContext().getRMDelegationTokenSecretManager().renewToken(token1, "renewer1");
} catch (Exception e) {
Assert.fail();
}
allTokensRM2 = rm2.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
Long renewDateAfterRenew = allTokensRM2.get(dtId1);
// assert token is renewed
Assert.assertTrue(renewDateAfterRenew > renewDateBeforeRenew);
// assert new token is added into state store
Assert.assertTrue(rmDTState.containsValue(renewDateAfterRenew));
// assert old token is removed from state store
Assert.assertFalse(rmDTState.containsValue(renewDateBeforeRenew));
try {
rm2.getRMContext().getRMDelegationTokenSecretManager().cancelToken(token1, UserGroupInformation.getCurrentUser().getUserName());
} catch (Exception e) {
Assert.fail();
}
// assert token is removed from state after its cancelled
allTokensRM2 = rm2.getRMContext().getRMDelegationTokenSecretManager().getAllTokens();
Assert.assertFalse(allTokensRM2.containsKey(dtId1));
Assert.assertFalse(rmDTState.containsKey(dtId1));
}
Aggregations