Search in sources :

Example 1 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestWorkPreservingRMRestart method testCapacityLeafQueueBecomesParentOnRecovery.

//Test behavior of an app if queue is changed from leaf to parent during
//recovery. Test case does following:
//1. Add an app to QueueB and start the attempt.
//2. Add 2 subqueues(QueueB1 and QueueB2) to QueueB, restart the RM, once with
//   fail fast config as false and once with fail fast as true.
//3. Verify that app was killed if fail fast is false.
//4. Verify that QueueException was thrown if fail fast is true.
@Test(timeout = 30000)
public void testCapacityLeafQueueBecomesParentOnRecovery() throws Exception {
    if (getSchedulerType() != SchedulerType.CAPACITY) {
        return;
    }
    conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
    conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
    CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(conf);
    setupQueueConfiguration(csConf);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(csConf);
    rm1 = new MockRM(csConf, memStore);
    rm1.start();
    MockNM nm = new MockNM("127.1.1.1:4321", 8192, rm1.getResourceTrackerService());
    nm.registerNode();
    // Submit an app to QueueB.
    RMApp app = rm1.submitApp(1024, "app", USER_2, null, B);
    MockRM.launchAndRegisterAM(app, rm1, nm);
    assertEquals(rm1.getApplicationReport(app.getApplicationId()).getYarnApplicationState(), YarnApplicationState.RUNNING);
    // Take a copy of state store so that it can be reset to this state.
    RMState state = memStore.loadState();
    // Change scheduler config with child queues added to QueueB.
    csConf = new CapacitySchedulerConfiguration(conf);
    setupQueueConfigurationChildOfB(csConf);
    String diags = "Application killed on recovery as it was submitted to " + "queue QueueB which is no longer a leaf queue after restart.";
    verifyAppRecoveryWithWrongQueueConfig(csConf, app, diags, memStore, state);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) DominantResourceCalculator(org.apache.hadoop.yarn.util.resource.DominantResourceCalculator) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) Test(org.junit.Test)

Example 2 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestWorkPreservingRMRestart method testCapacitySchedulerQueueRemovedRecovery.

//Test behavior of an app if queue is removed during recovery. Test case does
//following:
//1. Add some apps to two queues, attempt to add an app to a non-existant
//   queue to verify that the new logic is not in effect during normal app
//   submission
//2. Remove one of the queues, restart the RM, once with fail fast config as
//   false and once with fail fast as true.
//3. Verify that app was killed if fail fast is false.
//4. Verify that QueueException was thrown if fail fast is true.
@Test(timeout = 30000)
public void testCapacitySchedulerQueueRemovedRecovery() throws Exception {
    if (getSchedulerType() != SchedulerType.CAPACITY) {
        return;
    }
    conf.setBoolean(CapacitySchedulerConfiguration.ENABLE_USER_METRICS, true);
    conf.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName());
    CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(conf);
    setupQueueConfiguration(csConf);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(csConf);
    rm1 = new MockRM(csConf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
    MockNM nm2 = new MockNM("127.1.1.1:4321", 8192, rm1.getResourceTrackerService());
    nm1.registerNode();
    nm2.registerNode();
    RMApp app1_1 = rm1.submitApp(1024, "app1_1", USER_1, null, A);
    MockAM am1_1 = MockRM.launchAndRegisterAM(app1_1, rm1, nm1);
    RMApp app1_2 = rm1.submitApp(1024, "app1_2", USER_1, null, A);
    MockAM am1_2 = MockRM.launchAndRegisterAM(app1_2, rm1, nm2);
    RMApp app2 = rm1.submitApp(1024, "app2", USER_2, null, B);
    MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm2);
    assertEquals(rm1.getApplicationReport(app2.getApplicationId()).getYarnApplicationState(), YarnApplicationState.RUNNING);
    //Submit an app with a non existant queue to make sure it does not
    //cause a fatal failure in the non-recovery case
    RMApp appNA = rm1.submitApp(1024, "app1_2", USER_1, null, QUEUE_DOESNT_EXIST, false);
    // clear queue metrics
    rm1.clearQueueMetrics(app1_1);
    rm1.clearQueueMetrics(app1_2);
    rm1.clearQueueMetrics(app2);
    // Take a copy of state store so that it can be reset to this state.
    RMState state = memStore.loadState();
    // Set new configuration with QueueB removed.
    csConf = new CapacitySchedulerConfiguration(conf);
    setupQueueConfigurationOnlyA(csConf);
    String diags = "Application killed on recovery as it was submitted to " + "queue QueueB which no longer exists after restart.";
    verifyAppRecoveryWithWrongQueueConfig(csConf, app2, diags, memStore, state);
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) DominantResourceCalculator(org.apache.hadoop.yarn.util.resource.DominantResourceCalculator) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) Test(org.junit.Test)

Example 3 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestRMAppTransitions method testAppsRecoveringStates.

@Test(timeout = 30000)
public void testAppsRecoveringStates() throws Exception {
    RMState state = new RMState();
    Map<ApplicationId, ApplicationStateData> applicationState = state.getApplicationState();
    createRMStateForApplications(applicationState, RMAppState.FINISHED);
    createRMStateForApplications(applicationState, RMAppState.KILLED);
    createRMStateForApplications(applicationState, RMAppState.FAILED);
    for (ApplicationStateData appState : applicationState.values()) {
        testRecoverApplication(appState, state);
    }
}
Also used : ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 4 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestRMAppTransitions method testCreateAppSubmittedRecovery.

protected RMApp testCreateAppSubmittedRecovery(ApplicationSubmissionContext submissionContext) throws IOException {
    RMApp application = createNewTestApp(submissionContext);
    // NEW => SUBMITTED event RMAppEventType.RECOVER
    RMState state = new RMState();
    ApplicationStateData appState = ApplicationStateData.newInstance(123, 123, null, "user", null);
    state.getApplicationState().put(application.getApplicationId(), appState);
    RMAppEvent event = new RMAppRecoverEvent(application.getApplicationId(), state);
    application.handle(event);
    assertStartTimeSet(application);
    assertAppState(RMAppState.SUBMITTED, application);
    return application;
}
Also used : ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState)

Example 5 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestRMDelegationTokens method testRemoveExpiredMasterKeyInRMStateStore.

// Test all expired keys are removed from state-store.
@Test(timeout = 15000)
public void testRemoveExpiredMasterKeyInRMStateStore() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(testConf);
    RMState rmState = memStore.getState();
    Set<DelegationKey> rmDTMasterKeyState = rmState.getRMDTSecretManagerState().getMasterKeyState();
    MockRM rm1 = new MyMockRM(testConf, memStore);
    rm1.start();
    RMDelegationTokenSecretManager dtSecretManager = rm1.getRMContext().getRMDelegationTokenSecretManager();
    // assert all master keys are saved
    Assert.assertEquals(dtSecretManager.getAllMasterKeys(), rmDTMasterKeyState);
    Set<DelegationKey> expiringKeys = new HashSet<DelegationKey>();
    expiringKeys.addAll(dtSecretManager.getAllMasterKeys());
    // wait for expiringKeys to expire
    while (true) {
        boolean allExpired = true;
        for (DelegationKey key : expiringKeys) {
            if (rmDTMasterKeyState.contains(key)) {
                allExpired = false;
            }
        }
        if (allExpired)
            break;
        Thread.sleep(500);
    }
    rm1.stop();
}
Also used : MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) DelegationKey(org.apache.hadoop.security.token.delegation.DelegationKey) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) TestSecurityMockRM(org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

RMState (org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState)31 Test (org.junit.Test)24 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)21 ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)21 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)20 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)18 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)8 ApplicationAccessType (org.apache.hadoop.yarn.api.records.ApplicationAccessType)6 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)6 ReservationId (org.apache.hadoop.yarn.api.records.ReservationId)5 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)5 Configuration (org.apache.hadoop.conf.Configuration)4 ReservationAllocationStateProto (org.apache.hadoop.yarn.proto.YarnProtos.ReservationAllocationStateProto)4 TestSecurityMockRM (org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM)4 ApplicationAttemptStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData)4 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 Credentials (org.apache.hadoop.security.Credentials)3 DelegationKey (org.apache.hadoop.security.token.delegation.DelegationKey)3 ReservationSubmissionRequest (org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest)3