use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.
the class TestWorkPreservingRMRestart method testCapacityLeafQueueBecomesParentOnRecovery.
//Test behavior of an app if queue is changed from leaf to parent during
//recovery. Test case does following:
//1. Add an app to QueueB and start the attempt.
//2. Add 2 subqueues(QueueB1 and QueueB2) to QueueB, restart the RM, once with
// fail fast config as false and once with fail fast as true.
//3. Verify that app was killed if fail fast is false.
//4. Verify that QueueException was thrown if fail fast is true.
@Test(timeout = 30000)
public void testCapacityLeafQueueBecomesParentOnRecovery() throws Exception {
if (getSchedulerType() != SchedulerType.CAPACITY) {
return;
}
conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(conf);
setupQueueConfiguration(csConf);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(csConf);
rm1 = new MockRM(csConf, memStore);
rm1.start();
MockNM nm = new MockNM("127.1.1.1:4321", 8192, rm1.getResourceTrackerService());
nm.registerNode();
// Submit an app to QueueB.
RMApp app = rm1.submitApp(1024, "app", USER_2, null, B);
MockRM.launchAndRegisterAM(app, rm1, nm);
assertEquals(rm1.getApplicationReport(app.getApplicationId()).getYarnApplicationState(), YarnApplicationState.RUNNING);
// Take a copy of state store so that it can be reset to this state.
RMState state = memStore.loadState();
// Change scheduler config with child queues added to QueueB.
csConf = new CapacitySchedulerConfiguration(conf);
setupQueueConfigurationChildOfB(csConf);
String diags = "Application killed on recovery as it was submitted to " + "queue QueueB which is no longer a leaf queue after restart.";
verifyAppRecoveryWithWrongQueueConfig(csConf, app, diags, memStore, state);
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.
the class TestWorkPreservingRMRestart method testCapacitySchedulerQueueRemovedRecovery.
//Test behavior of an app if queue is removed during recovery. Test case does
//following:
//1. Add some apps to two queues, attempt to add an app to a non-existant
// queue to verify that the new logic is not in effect during normal app
// submission
//2. Remove one of the queues, restart the RM, once with fail fast config as
// false and once with fail fast as true.
//3. Verify that app was killed if fail fast is false.
//4. Verify that QueueException was thrown if fail fast is true.
@Test(timeout = 30000)
public void testCapacitySchedulerQueueRemovedRecovery() throws Exception {
if (getSchedulerType() != SchedulerType.CAPACITY) {
return;
}
conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(conf);
setupQueueConfiguration(csConf);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(csConf);
rm1 = new MockRM(csConf, memStore);
rm1.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
MockNM nm2 = new MockNM("127.1.1.1:4321", 8192, rm1.getResourceTrackerService());
nm1.registerNode();
nm2.registerNode();
RMApp app1_1 = rm1.submitApp(1024, "app1_1", USER_1, null, A);
MockAM am1_1 = MockRM.launchAndRegisterAM(app1_1, rm1, nm1);
RMApp app1_2 = rm1.submitApp(1024, "app1_2", USER_1, null, A);
MockAM am1_2 = MockRM.launchAndRegisterAM(app1_2, rm1, nm2);
RMApp app2 = rm1.submitApp(1024, "app2", USER_2, null, B);
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
assertEquals(rm1.getApplicationReport(app2.getApplicationId()).getYarnApplicationState(), YarnApplicationState.RUNNING);
//Submit an app with a non existant queue to make sure it does not
//cause a fatal failure in the non-recovery case
RMApp appNA = rm1.submitApp(1024, "app1_2", USER_1, null, QUEUE_DOESNT_EXIST, false);
// clear queue metrics
rm1.clearQueueMetrics(app1_1);
rm1.clearQueueMetrics(app1_2);
rm1.clearQueueMetrics(app2);
// Take a copy of state store so that it can be reset to this state.
RMState state = memStore.loadState();
// Set new configuration with QueueB removed.
csConf = new CapacitySchedulerConfiguration(conf);
setupQueueConfigurationOnlyA(csConf);
String diags = "Application killed on recovery as it was submitted to " + "queue QueueB which no longer exists after restart.";
verifyAppRecoveryWithWrongQueueConfig(csConf, app2, diags, memStore, state);
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.
the class TestRMAppTransitions method testAppsRecoveringStates.
@Test(timeout = 30000)
public void testAppsRecoveringStates() throws Exception {
RMState state = new RMState();
Map<ApplicationId, ApplicationStateData> applicationState = state.getApplicationState();
createRMStateForApplications(applicationState, RMAppState.FINISHED);
createRMStateForApplications(applicationState, RMAppState.KILLED);
createRMStateForApplications(applicationState, RMAppState.FAILED);
for (ApplicationStateData appState : applicationState.values()) {
testRecoverApplication(appState, state);
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.
the class TestRMAppTransitions method testCreateAppSubmittedRecovery.
protected RMApp testCreateAppSubmittedRecovery(ApplicationSubmissionContext submissionContext) throws IOException {
RMApp application = createNewTestApp(submissionContext);
// NEW => SUBMITTED event RMAppEventType.RECOVER
RMState state = new RMState();
ApplicationStateData appState = ApplicationStateData.newInstance(123, 123, null, "user", null);
state.getApplicationState().put(application.getApplicationId(), appState);
RMAppEvent event = new RMAppRecoverEvent(application.getApplicationId(), state);
application.handle(event);
assertStartTimeSet(application);
assertAppState(RMAppState.SUBMITTED, application);
return application;
}
use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.
the class TestRMDelegationTokens method testRemoveExpiredMasterKeyInRMStateStore.
// Test all expired keys are removed from state-store.
@Test(timeout = 15000)
public void testRemoveExpiredMasterKeyInRMStateStore() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(testConf);
RMState rmState = memStore.getState();
Set<DelegationKey> rmDTMasterKeyState = rmState.getRMDTSecretManagerState().getMasterKeyState();
MockRM rm1 = new MyMockRM(testConf, memStore);
rm1.start();
RMDelegationTokenSecretManager dtSecretManager = rm1.getRMContext().getRMDelegationTokenSecretManager();
// assert all master keys are saved
Assert.assertEquals(dtSecretManager.getAllMasterKeys(), rmDTMasterKeyState);
Set<DelegationKey> expiringKeys = new HashSet<DelegationKey>();
expiringKeys.addAll(dtSecretManager.getAllMasterKeys());
// wait for expiringKeys to expire
while (true) {
boolean allExpired = true;
for (DelegationKey key : expiringKeys) {
if (rmDTMasterKeyState.contains(key)) {
allExpired = false;
}
}
if (allExpired)
break;
Thread.sleep(500);
}
rm1.stop();
}
Aggregations