use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testAppRecoveredInOrderOnRMRestart.
@Test(timeout = 20000)
public void testAppRecoveredInOrderOnRMRestart() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
for (int i = 10; i > 0; i--) {
ApplicationStateData appState = mock(ApplicationStateData.class);
ApplicationSubmissionContext context = mock(ApplicationSubmissionContext.class);
when(appState.getApplicationSubmissionContext()).thenReturn(context);
when(context.getApplicationId()).thenReturn(ApplicationId.newInstance(1234, i));
memStore.getState().getApplicationState().put(appState.getApplicationSubmissionContext().getApplicationId(), appState);
}
MockRM rm1 = new MockRM(conf, memStore) {
@Override
protected RMAppManager createRMAppManager() {
return new TestRMAppManager(this.rmContext, this.scheduler, this.masterService, this.applicationACLsManager, conf);
}
class TestRMAppManager extends RMAppManager {
ApplicationId prevId = ApplicationId.newInstance(1234, 0);
public TestRMAppManager(RMContext context, YarnScheduler scheduler, ApplicationMasterService masterService, ApplicationACLsManager applicationACLsManager, Configuration conf) {
super(context, scheduler, masterService, applicationACLsManager, conf);
}
@Override
protected void recoverApplication(ApplicationStateData appState, RMState rmState) throws Exception {
// check application is recovered in order.
Assert.assertTrue(rmState.getApplicationState().size() > 0);
Assert.assertTrue(appState.getApplicationSubmissionContext().getApplicationId().compareTo(prevId) > 0);
prevId = appState.getApplicationSubmissionContext().getApplicationId();
}
}
};
try {
rm1.start();
} finally {
rm1.stop();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testRMRestartFailedApp.
@Test(timeout = 60000)
public void testRMRestartFailedApp() throws Exception {
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
RMState rmState = memStore.getState();
Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
// start RM
MockRM rm1 = createMockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// create app and launch the AM
RMApp app0 = rm1.submitApp(200);
MockAM am0 = launchAM(app0, rm1, nm1);
// fail the AM by sending CONTAINER_FINISHED event without registering.
nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
rm1.waitForState(app0.getApplicationId(), RMAppState.FAILED);
// assert the app/attempt failed state is saved.
ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
Assert.assertEquals(RMAppState.FAILED, appState.getState());
Assert.assertEquals(RMAppAttemptState.FAILED, appState.getAttempt(am0.getApplicationAttemptId()).getState());
// start new RM
MockRM rm2 = createMockRM(conf, memStore);
rm2.start();
RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
rm2.waitForState(app0.getApplicationId(), RMAppState.FAILED);
rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
// no new attempt is created.
Assert.assertEquals(1, loadedApp0.getAppAttempts().size());
verifyAppReportAfterRMRestart(app0, rm2);
Assert.assertTrue(app0.getDiagnostics().toString().contains("Failing the application."));
// failed diagnostics from attempt is lost because the diagnostics from
// attempt is not yet available by the time app is saving the app state.
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestRMRestart method testRMShutdown.
// This is to test RM does not get hang on shutdown.
@Test(timeout = 10000)
public void testRMShutdown() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore() {
@Override
public synchronized void checkVersion() throws Exception {
throw new Exception("Invalid version.");
}
};
// start RM
memStore.init(conf);
MockRM rm1 = null;
try {
rm1 = createMockRM(conf, memStore);
rm1.start();
Assert.fail();
} catch (Exception e) {
Assert.assertTrue(e.getMessage().contains("Invalid version."));
}
Assert.assertTrue(rm1.getServiceState() == STATE.STOPPED);
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestApplicationLifetimeMonitor method testUpdateApplicationTimeoutForStateStoreUpdateFail.
@Test(timeout = 60000)
public void testUpdateApplicationTimeoutForStateStoreUpdateFail() throws Exception {
MockRM rm1 = null;
try {
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
MemoryRMStateStore memStore = new MemoryRMStateStore() {
private int count = 0;
@Override
public synchronized void updateApplicationStateInternal(ApplicationId appId, ApplicationStateData appState) throws Exception {
// fail only 1 time.
if (count++ == 0) {
throw new Exception("State-store update failed");
}
super.updateApplicationStateInternal(appId, appState);
}
};
memStore.init(conf);
rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
nm1.nodeHeartbeat(true);
long appLifetime = 30L;
Map<ApplicationTimeoutType, Long> timeouts = new HashMap<ApplicationTimeoutType, Long>();
timeouts.put(ApplicationTimeoutType.LIFETIME, appLifetime);
RMApp app1 = rm1.submitApp(200, Priority.newInstance(0), timeouts);
Map<ApplicationTimeoutType, String> updateTimeout = new HashMap<ApplicationTimeoutType, String>();
long newLifetime = 10L;
// update 10L seconds more to timeout i.e 30L seconds overall
updateTimeout.put(ApplicationTimeoutType.LIFETIME, Times.formatISO8601(System.currentTimeMillis() + newLifetime * 1000));
UpdateApplicationTimeoutsRequest request = UpdateApplicationTimeoutsRequest.newInstance(app1.getApplicationId(), updateTimeout);
Map<ApplicationTimeoutType, Long> applicationTimeouts = app1.getApplicationTimeouts();
// has old timeout time
long beforeUpdate = applicationTimeouts.get(ApplicationTimeoutType.LIFETIME);
try {
// update app2 lifetime to new time i.e now + timeout
rm1.getRMContext().getClientRMService().updateApplicationTimeouts(request);
fail("Update application should fail.");
} catch (YarnException e) {
// expected
assertTrue("State-store exception does not containe appId", e.getMessage().contains(app1.getApplicationId().toString()));
}
applicationTimeouts = app1.getApplicationTimeouts();
// has old timeout time
long afterUpdate = applicationTimeouts.get(ApplicationTimeoutType.LIFETIME);
Assert.assertEquals("Application timeout is updated", beforeUpdate, afterUpdate);
rm1.waitForState(app1.getApplicationId(), RMAppState.KILLED);
// verify for app killed with updated lifetime
Assert.assertTrue("Application killed before lifetime value", app1.getFinishTime() > afterUpdate);
} finally {
stopRM(rm1);
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.
the class TestAbstractYarnScheduler method testReleasedContainerIfAppAttemptisNull.
/*
* This test case is to test the pending containers are cleared from the
* attempt even if one of the application in the list have current attempt as
* null (no attempt).
*/
@SuppressWarnings({ "rawtypes" })
@Test(timeout = 10000)
public void testReleasedContainerIfAppAttemptisNull() throws Exception {
YarnConfiguration conf = getConf();
conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
MockRM rm1 = new MockRM(conf, memStore);
try {
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm1.getResourceScheduler();
// Mock App without attempt
RMApp mockAPp = new MockRMApp(125, System.currentTimeMillis(), RMAppState.NEW);
SchedulerApplication<FiCaSchedulerApp> application = new SchedulerApplication<FiCaSchedulerApp>(null, mockAPp.getUser());
// Second app with one app attempt
RMApp app = rm1.submitApp(200);
MockAM am1 = MockRM.launchAndRegisterAM(app, rm1, nm1);
final ContainerId runningContainer = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
am1.allocate(null, Arrays.asList(runningContainer));
Map schedulerApplications = scheduler.getSchedulerApplications();
SchedulerApplication schedulerApp = (SchedulerApplication) scheduler.getSchedulerApplications().get(app.getApplicationId());
schedulerApplications.put(mockAPp.getApplicationId(), application);
scheduler.clearPendingContainerCache();
Assert.assertEquals("Pending containers are not released " + "when one of the application attempt is null !", schedulerApp.getCurrentAppAttempt().getPendingRelease().size(), 0);
} finally {
if (rm1 != null) {
rm1.stop();
}
}
}
Aggregations