use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testRMRestartFailAppAttempt.
@Test(timeout = 60000)
public void testRMRestartFailAppAttempt() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
int maxAttempt = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// create app and launch the AM
RMApp app0 = rm1.submitApp(200);
MockAM am0 = launchAM(app0, rm1, nm1);
ApplicationId applicationId = app0.getApplicationId();
ApplicationAttemptId appAttemptId1 = app0.getCurrentAppAttempt().getAppAttemptId();
Assert.assertEquals(1, appAttemptId1.getAttemptId());
// fail the 1st app attempt.
rm1.failApplicationAttempt(appAttemptId1);
rm1.waitForState(appAttemptId1, RMAppAttemptState.FAILED);
rm1.waitForState(applicationId, RMAppState.ACCEPTED);
ApplicationAttemptId appAttemptId2 = app0.getCurrentAppAttempt().getAppAttemptId();
Assert.assertEquals(2, appAttemptId2.getAttemptId());
rm1.waitForState(appAttemptId2, RMAppAttemptState.SCHEDULED);
// restart rm
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(applicationId);
rm2.waitForState(applicationId, RMAppState.ACCEPTED);
rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
//Wait to make sure the loadedApp0 has the right number of attempts
//TODO explore a better way than sleeping for a while (YARN-4929)
Thread.sleep(1000);
Assert.assertEquals(2, loadedApp0.getAppAttempts().size());
rm2.waitForState(appAttemptId2, RMAppAttemptState.SCHEDULED);
appAttemptId2 = loadedApp0.getCurrentAppAttempt().getAppAttemptId();
Assert.assertEquals(2, appAttemptId2.getAttemptId());
// fail 2nd attempt
rm2.failApplicationAttempt(appAttemptId2);
rm2.waitForState(appAttemptId2, RMAppAttemptState.FAILED);
rm2.waitForState(applicationId, RMAppState.FAILED);
Assert.assertEquals(maxAttempt, loadedApp0.getAppAttempts().size());
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testSynchronouslyRenewDTOnRecovery.
// Test Delegation token is renewed synchronously so that recover events
// can be processed before any other external incoming events, specifically
// the ContainerFinished event on NM re-registraton.
@Test(timeout = 20000)
public void testSynchronouslyRenewDTOnRecovery() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
final MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
RMApp app0 = rm1.submitApp(200);
final MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1);
MockRM rm2 = new MockRM(conf, memStore) {
@Override
protected ResourceTrackerService createResourceTrackerService() {
return new ResourceTrackerService(this.rmContext, this.nodesListManager, this.nmLivelinessMonitor, this.rmContext.getContainerTokenSecretManager(), this.rmContext.getNMTokenSecretManager()) {
@Override
protected void serviceStart() throws Exception {
// send the container_finished event as soon as the
// ResourceTrackerService is started.
super.serviceStart();
nm1.setResourceTrackerService(getResourceTrackerService());
NMContainerStatus status = TestRMRestart.createNMContainerStatus(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
nm1.registerNode(Arrays.asList(status), null);
}
};
}
};
try {
// Re-start RM
rm2.start();
// wait for the 2nd attempt to be started.
RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
int timeoutSecs = 0;
while (loadedApp0.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {
Thread.sleep(200);
}
MockAM am1 = MockRM.launchAndRegisterAM(loadedApp0, rm2, nm1);
MockRM.finishAMAndVerifyAppState(loadedApp0, rm2, nm1, am1);
} finally {
rm2.stop();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testRMStateStoreDispatcherDrainedOnRMStop.
@Test(timeout = 60000)
public void testRMStateStoreDispatcherDrainedOnRMStop() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore() {
volatile boolean wait = true;
@Override
public void serviceStop() throws Exception {
// Unblock app saving request.
wait = false;
super.serviceStop();
}
@Override
protected void handleStoreEvent(RMStateStoreEvent event) {
// Skip if synchronous updation of DTToken
if (!(event instanceof RMStateStoreAMRMTokenEvent) && !(event instanceof RMStateStoreRMDTEvent) && !(event instanceof RMStateStoreRMDTMasterKeyEvent)) {
while (wait) ;
}
super.handleStoreEvent(event);
}
};
memStore.init(conf);
// start RM
final MockRM rm1 = createMockRM(conf, memStore);
rm1.disableDrainEventsImplicitly();
rm1.start();
// create apps.
final ArrayList<RMApp> appList = new ArrayList<RMApp>();
final int NUM_APPS = 5;
for (int i = 0; i < NUM_APPS; i++) {
RMApp app = rm1.submitApp(200, "name", "user", new HashMap<ApplicationAccessType, String>(), false, "default", -1, null, "MAPREDUCE", false);
appList.add(app);
rm1.waitForState(app.getApplicationId(), RMAppState.NEW_SAVING);
}
// all apps's saving request are now enqueued to RMStateStore's dispatcher
// queue, and will be processed once rm.stop() is called.
// Nothing exist in state store before stop is called.
Map<ApplicationId, ApplicationStateData> rmAppState = memStore.getState().getApplicationState();
Assert.assertTrue(rmAppState.size() == 0);
// stop rm
rm1.stop();
// request on dispatcher.
for (RMApp app : appList) {
ApplicationStateData appState = rmAppState.get(app.getApplicationId());
Assert.assertNotNull(appState);
Assert.assertEquals(0, appState.getAttemptCount());
Assert.assertEquals(appState.getApplicationSubmissionContext().getApplicationId(), app.getApplicationSubmissionContext().getApplicationId());
}
Assert.assertTrue(rmAppState.size() == NUM_APPS);
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testRMRestartKilledApp.
@Test(timeout = 60000)
public void testRMRestartKilledApp() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// create app and launch the AM
RMApp app0 = rm1.submitApp(200);
MockAM am0 = launchAM(app0, rm1, nm1);
// kill the app.
rm1.killApp(app0.getApplicationId());
rm1.waitForState(app0.getApplicationId(), RMAppState.KILLED);
rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.KILLED);
// killed state is saved.
ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
Assert.assertEquals(RMAppState.KILLED, appState.getState());
Assert.assertEquals(RMAppAttemptState.KILLED, appState.getAttempt(am0.getApplicationAttemptId()).getState());
String trackingUrl = app0.getCurrentAppAttempt().getOriginalTrackingUrl();
Assert.assertNotNull(trackingUrl);
// restart rm
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
rm2.waitForState(app0.getApplicationId(), RMAppState.KILLED);
rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.KILLED);
// no new attempt is created.
Assert.assertEquals(1, loadedApp0.getAppAttempts().size());
ApplicationReport appReport = verifyAppReportAfterRMRestart(app0, rm2);
Assert.assertEquals(app0.getDiagnostics().toString(), appReport.getDiagnostics());
Assert.assertEquals(trackingUrl, loadedApp0.getCurrentAppAttempt().getOriginalTrackingUrl());
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMHA method testFailoverClearsRMContext.
@Test
public void testFailoverClearsRMContext() throws Exception {
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
configuration.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
Configuration conf = new YarnConfiguration(configuration);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// 1. start RM
rm = new MockRM(conf, memStore);
rm.init(conf);
rm.start();
StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
checkMonitorHealth();
checkStandbyRMFunctionality();
// 2. Transition to active
rm.adminService.transitionToActive(requestInfo);
checkMonitorHealth();
checkActiveRMFunctionality();
verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
assertEquals(1, rm.getRMContext().getRMNodes().size());
assertEquals(1, rm.getRMContext().getRMApps().size());
// 3. Create new RM
rm = new MockRM(conf, memStore) {
@Override
protected ResourceTrackerService createResourceTrackerService() {
return new ResourceTrackerService(this.rmContext, this.nodesListManager, this.nmLivelinessMonitor, this.rmContext.getContainerTokenSecretManager(), this.rmContext.getNMTokenSecretManager()) {
@Override
protected void serviceStart() throws Exception {
throw new Exception("ResourceTracker service failed");
}
};
}
};
rm.init(conf);
rm.start();
checkMonitorHealth();
checkStandbyRMFunctionality();
// 4. Try Transition to active, throw exception
try {
rm.adminService.transitionToActive(requestInfo);
Assert.fail("Transitioned to Active should throw exception.");
} catch (Exception e) {
assertTrue("Error when transitioning to Active mode".contains(e.getMessage()));
}
// 5. Clears the metrics
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
assertEquals(0, rm.getRMContext().getRMNodes().size());
assertEquals(0, rm.getRMContext().getRMApps().size());
}
Aggregations