use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData in project hadoop by apache.
the class TestRMRestart method testRMRestartOnMaxAppAttempts.
@Test(timeout = 60000)
public void testRMRestartOnMaxAppAttempts() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// submit an app with maxAppAttempts equals to 1
RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", 1, null);
// submit an app with maxAppAttempts equals to -1
RMApp app2 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null);
// assert app1 info is saved
ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
Assert.assertNotNull(appState);
Assert.assertEquals(0, appState.getAttemptCount());
Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app1.getApplicationSubmissionContext().getApplicationId());
// Allocate the AM
nm1.nodeHeartbeat(true);
RMAppAttempt attempt = app1.getCurrentAppAttempt();
ApplicationAttemptId attemptId1 = attempt.getAppAttemptId();
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
Assert.assertEquals(1, appState.getAttemptCount());
ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
Assert.assertNotNull(attemptState);
Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
// Setting AMLivelinessMonitor interval to be 3 Secs.
conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 3000);
// start new RM
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
// verify that maxAppAttempts is set to global value
Assert.assertEquals(2, rm2.getRMContext().getRMApps().get(app2.getApplicationId()).getMaxAppAttempts());
// app1 and app2 are loaded back, but app1 failed because it's
// hitting max-retry.
Assert.assertEquals(2, rm2.getRMContext().getRMApps().size());
rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED);
rm2.waitForState(app2.getApplicationId(), RMAppState.ACCEPTED);
// app1 failed state is saved in state store. app2 final saved state is not
// determined yet.
Assert.assertEquals(RMAppState.FAILED, rmAppState.get(app1.getApplicationId()).getState());
Assert.assertNull(rmAppState.get(app2.getApplicationId()).getState());
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData in project hadoop by apache.
the class TestRMRestart method testAppAttemptTokensRestoredOnRMRestart.
@Test(timeout = 60000)
public void testAppAttemptTokensRestoredOnRMRestart() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
UserGroupInformation.setConfiguration(conf);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
MockRM rm1 = new TestSecurityMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("0.0.0.0:4321", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// submit an app
RMApp app1 = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), "default");
// assert app info is saved
ApplicationStateData appState = rmAppState.get(app1.getApplicationId());
Assert.assertNotNull(appState);
// Allocate the AM
nm1.nodeHeartbeat(true);
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
// assert attempt info is saved
ApplicationAttemptStateData attemptState = appState.getAttempt(attemptId1);
Assert.assertNotNull(attemptState);
Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), attemptState.getMasterContainer().getId());
// the clientTokenMasterKey that are generated when
// RMAppAttempt is created,
byte[] clientTokenMasterKey = attempt1.getClientTokenMasterKey().getEncoded();
// assert application credentials are saved
Credentials savedCredentials = attemptState.getAppAttemptTokens();
Assert.assertArrayEquals("client token master key not saved", clientTokenMasterKey, savedCredentials.getSecretKey(RMStateStore.AM_CLIENT_TOKEN_MASTER_KEY_NAME));
// start new RM
MockRM rm2 = new TestSecurityMockRM(conf, memStore);
rm2.start();
RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
RMAppAttempt loadedAttempt1 = loadedApp1.getRMAppAttempt(attemptId1);
// assert loaded attempt recovered
Assert.assertNotNull(loadedAttempt1);
// assert client token master key is recovered back to api-versioned
// client token master key
Assert.assertEquals("client token master key not restored", attempt1.getClientTokenMasterKey(), loadedAttempt1.getClientTokenMasterKey());
// assert ClientTokenSecretManager also knows about the key
Assert.assertArrayEquals(clientTokenMasterKey, rm2.getClientToAMTokenSecretManager().getMasterKey(attemptId1).getEncoded());
// assert AMRMTokenSecretManager also knows about the AMRMToken password
Token<AMRMTokenIdentifier> amrmToken = loadedAttempt1.getAMRMToken();
Assert.assertArrayEquals(amrmToken.getPassword(), rm2.getRMContext().getAMRMTokenSecretManager().retrievePassword(amrmToken.decodeIdentifier()));
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData in project hadoop by apache.
the class TestRMRestart method testRMRestartSucceededApp.
@Test(timeout = 60000)
public void testRMRestartSucceededApp() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// create an app and finish the app.
RMApp app0 = rm1.submitApp(200);
MockAM am0 = launchAM(app0, rm1, nm1);
// unregister am
FinishApplicationMasterRequest req = FinishApplicationMasterRequest.newInstance(FinalApplicationStatus.SUCCEEDED, "diagnostics", "trackingUrl");
finishApplicationMaster(app0, rm1, nm1, am0, req);
// check the state store about the unregistered info.
ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
ApplicationAttemptStateData attemptState0 = appState.getAttempt(am0.getApplicationAttemptId());
Assert.assertEquals("diagnostics", attemptState0.getDiagnostics());
Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, attemptState0.getFinalApplicationStatus());
Assert.assertEquals("trackingUrl", attemptState0.getFinalTrackingUrl());
Assert.assertEquals(app0.getFinishTime(), appState.getFinishTime());
// restart rm
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
// verify application report returns the same app info as the app info
// before RM restarts.
ApplicationReport appReport = verifyAppReportAfterRMRestart(app0, rm2);
Assert.assertEquals(FinalApplicationStatus.SUCCEEDED, appReport.getFinalApplicationStatus());
Assert.assertEquals("trackingUrl", appReport.getOriginalTrackingUrl());
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData in project hadoop by apache.
the class TestZKRMStateStore method testFencedState.
@Test
public void testFencedState() throws Exception {
TestZKRMStateStoreTester zkTester = new TestZKRMStateStoreTester();
RMStateStore store = zkTester.getRMStateStore();
// Move state to FENCED from ACTIVE
store.updateFencedState();
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
long submitTime = System.currentTimeMillis();
long startTime = submitTime + 1000;
// Add a new app
RMApp mockApp = mock(RMApp.class);
ApplicationSubmissionContext context = new ApplicationSubmissionContextPBImpl();
when(mockApp.getSubmitTime()).thenReturn(submitTime);
when(mockApp.getStartTime()).thenReturn(startTime);
when(mockApp.getApplicationSubmissionContext()).thenReturn(context);
when(mockApp.getUser()).thenReturn("test");
store.storeNewApplication(mockApp);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// Add a new attempt
ClientToAMTokenSecretManagerInRM clientToAMTokenMgr = new ClientToAMTokenSecretManagerInRM();
ApplicationAttemptId attemptId = ApplicationAttemptId.fromString("appattempt_1234567894321_0001_000001");
SecretKey clientTokenMasterKey = clientToAMTokenMgr.createMasterKey(attemptId);
RMAppAttemptMetrics mockRmAppAttemptMetrics = mock(RMAppAttemptMetrics.class);
Container container = new ContainerPBImpl();
container.setId(ContainerId.fromString("container_1234567891234_0001_01_000001"));
RMAppAttempt mockAttempt = mock(RMAppAttempt.class);
when(mockAttempt.getAppAttemptId()).thenReturn(attemptId);
when(mockAttempt.getMasterContainer()).thenReturn(container);
when(mockAttempt.getClientTokenMasterKey()).thenReturn(clientTokenMasterKey);
when(mockAttempt.getRMAppAttemptMetrics()).thenReturn(mockRmAppAttemptMetrics);
when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage()).thenReturn(new AggregateAppResourceUsage(0, 0));
store.storeNewApplicationAttempt(mockAttempt);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
long finishTime = submitTime + 1000;
// Update attempt
ApplicationAttemptStateData newAttemptState = ApplicationAttemptStateData.newInstance(attemptId, container, store.getCredentialsFromAppAttempt(mockAttempt), startTime, RMAppAttemptState.FINISHED, "testUrl", "test", FinalApplicationStatus.SUCCEEDED, 100, finishTime, 0, 0, 0, 0);
store.updateApplicationAttemptState(newAttemptState);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// Update app
ApplicationStateData appState = ApplicationStateData.newInstance(submitTime, startTime, context, "test");
store.updateApplicationState(appState);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// Remove app
store.removeApplication(mockApp);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// store RM delegation token;
RMDelegationTokenIdentifier dtId1 = new RMDelegationTokenIdentifier(new Text("owner1"), new Text("renewer1"), new Text("realuser1"));
Long renewDate1 = new Long(System.currentTimeMillis());
dtId1.setSequenceNumber(1111);
store.storeRMDelegationToken(dtId1, renewDate1);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
store.updateRMDelegationToken(dtId1, renewDate1);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// remove delegation key;
store.removeRMDelegationToken(dtId1);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// store delegation master key;
DelegationKey key = new DelegationKey(1234, 4321, "keyBytes".getBytes());
store.storeRMDTMasterKey(key);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// remove delegation master key;
store.removeRMDTMasterKey(key);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
// store or update AMRMToken;
store.storeOrUpdateAMRMTokenSecretManager(null, false);
assertEquals("RMStateStore should have been in fenced state", true, store.isFencedState());
store.close();
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData in project hadoop by apache.
the class TestWorkPreservingRMRestart method testAppStateSavedButAttemptStateNotSaved.
// Test that if application state was saved, but attempt state was not saved.
// RM should start correctly.
@Test(timeout = 20000)
public void testAppStateSavedButAttemptStateNotSaved() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore() {
@Override
public synchronized void updateApplicationAttemptStateInternal(ApplicationAttemptId appAttemptId, ApplicationAttemptStateData attemptState) {
// do nothing;
// simulate the failure that attempt final state is not saved.
}
};
memStore.init(conf);
rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
RMApp app1 = rm1.submitApp(200);
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
MockRM.finishAMAndVerifyAppState(app1, rm1, nm1, am1);
ApplicationStateData appSavedState = memStore.getState().getApplicationState().get(app1.getApplicationId());
// check that app state is saved.
assertEquals(RMAppState.FINISHED, appSavedState.getState());
// check that attempt state is not saved.
assertNull(appSavedState.getAttempt(am1.getApplicationAttemptId()).getState());
rm2 = new MockRM(conf, memStore);
rm2.start();
RMApp recoveredApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
assertEquals(RMAppState.FINISHED, recoveredApp1.getState());
// check that attempt state is recovered correctly.
assertEquals(RMAppAttemptState.FINISHED, recoveredApp1.getCurrentAppAttempt().getState());
}
Aggregations