use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.
the class TestRMRestart method testRMRestartFailedApp.
@Test(timeout = 60000)
public void testRMRestartFailedApp() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// create app and launch the AM
RMApp app0 = rm1.submitApp(200);
MockAM am0 = launchAM(app0, rm1, nm1);
// fail the AM by sending CONTAINER_FINISHED event without registering.
nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
rm1.waitForState(app0.getApplicationId(), RMAppState.FAILED);
// assert the app/attempt failed state is saved.
ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
Assert.assertEquals(RMAppState.FAILED, appState.getState());
Assert.assertEquals(RMAppAttemptState.FAILED, appState.getAttempt(am0.getApplicationAttemptId()).getState());
// start new RM
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
rm2.waitForState(app0.getApplicationId(), RMAppState.FAILED);
rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
// no new attempt is created.
Assert.assertEquals(1, loadedApp0.getAppAttempts().size());
verifyAppReportAfterRMRestart(app0, rm2);
Assert.assertTrue(app0.getDiagnostics().toString().contains("Failing the application."));
// failed diagnostics from attempt is lost because the diagnostics from
// attempt is not yet available by the time app is saving the app state.
}
Aggregations