Search in sources :

Example 26 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestReservationSystemWithRMHA method testDeleteReservationAndCheckAfterFailover.

@Test
public void testDeleteReservationAndCheckAfterFailover() throws Exception {
    startRMs();
    addNodeCapacityToPlan(rm1, 102400, 100);
    ClientRMService clientService = rm1.getClientRMService();
    ReservationId reservationID = getNewReservation(clientService).getReservationId();
    // create a reservation
    ReservationSubmissionRequest request = createReservationSubmissionRequest(reservationID);
    ReservationSubmissionResponse response = null;
    try {
        response = clientService.submitReservation(request);
    } catch (Exception e) {
        Assert.fail(e.getMessage());
    }
    Assert.assertNotNull(response);
    Assert.assertNotNull(reservationID);
    // Delete the reservation
    ReservationDeleteRequest deleteRequest = ReservationDeleteRequest.newInstance(reservationID);
    clientService.deleteReservation(deleteRequest);
    // Do the failover
    explicitFailover();
    rm2.registerNode("127.0.0.1:1", 102400, 100);
    RMState state = rm2.getRMContext().getStateStore().loadState();
    Assert.assertNull(state.getReservationState().get(ReservationSystemTestUtil.reservationQ));
}
Also used : ReservationId(org.apache.hadoop.yarn.api.records.ReservationId) ReservationSubmissionRequest(org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest) ReservationDeleteRequest(org.apache.hadoop.yarn.api.protocolrecords.ReservationDeleteRequest) ReservationSubmissionResponse(org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionResponse) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 27 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestReservationSystemWithRMHA method testSubmitReservationAndCheckAfterFailover.

@Test
public void testSubmitReservationAndCheckAfterFailover() throws Exception {
    startRMs();
    addNodeCapacityToPlan(rm1, 102400, 100);
    ClientRMService clientService = rm1.getClientRMService();
    ReservationId reservationID = getNewReservation(clientService).getReservationId();
    // create a reservation
    ReservationSubmissionRequest request = createReservationSubmissionRequest(reservationID);
    ReservationSubmissionResponse response = null;
    try {
        response = clientService.submitReservation(request);
    } catch (Exception e) {
        Assert.fail(e.getMessage());
    }
    Assert.assertNotNull(response);
    Assert.assertNotNull(reservationID);
    LOG.info("Submit reservation response: " + reservationID);
    // Do the failover
    explicitFailover();
    rm2.registerNode("127.0.0.1:1", 102400, 100);
    RMState state = rm2.getRMContext().getStateStore().loadState();
    Map<ReservationId, ReservationAllocationStateProto> reservationStateMap = state.getReservationState().get(ReservationSystemTestUtil.reservationQ);
    Assert.assertNotNull(reservationStateMap);
    Assert.assertNotNull(reservationStateMap.get(reservationID));
}
Also used : ReservationId(org.apache.hadoop.yarn.api.records.ReservationId) ReservationSubmissionRequest(org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest) ReservationSubmissionResponse(org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionResponse) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) ReservationAllocationStateProto(org.apache.hadoop.yarn.proto.YarnProtos.ReservationAllocationStateProto) Test(org.junit.Test)

Example 28 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestReservationSystemWithRMHA method testUpdateReservationAndCheckAfterFailover.

@Test
public void testUpdateReservationAndCheckAfterFailover() throws Exception {
    startRMs();
    addNodeCapacityToPlan(rm1, 102400, 100);
    ClientRMService clientService = rm1.getClientRMService();
    ReservationId reservationID = getNewReservation(clientService).getReservationId();
    // create a reservation
    ReservationSubmissionRequest request = createReservationSubmissionRequest(reservationID);
    ReservationSubmissionResponse response = null;
    try {
        response = clientService.submitReservation(request);
    } catch (Exception e) {
        Assert.fail(e.getMessage());
    }
    Assert.assertNotNull(response);
    Assert.assertNotNull(reservationID);
    LOG.info("Submit reservation response: " + reservationID);
    ReservationDefinition reservationDefinition = request.getReservationDefinition();
    // Change any field
    long newDeadline = reservationDefinition.getDeadline() + 100;
    reservationDefinition.setDeadline(newDeadline);
    ReservationUpdateRequest updateRequest = ReservationUpdateRequest.newInstance(reservationDefinition, reservationID);
    rm1.updateReservationState(updateRequest);
    // Do the failover
    explicitFailover();
    rm2.registerNode("127.0.0.1:1", 102400, 100);
    RMState state = rm2.getRMContext().getStateStore().loadState();
    Map<ReservationId, ReservationAllocationStateProto> reservationStateMap = state.getReservationState().get(ReservationSystemTestUtil.reservationQ);
    Assert.assertNotNull(reservationStateMap);
    ReservationAllocationStateProto reservationState = reservationStateMap.get(reservationID);
    Assert.assertEquals(newDeadline, reservationState.getReservationDefinition().getDeadline());
}
Also used : ReservationUpdateRequest(org.apache.hadoop.yarn.api.protocolrecords.ReservationUpdateRequest) ReservationId(org.apache.hadoop.yarn.api.records.ReservationId) ReservationDefinition(org.apache.hadoop.yarn.api.records.ReservationDefinition) ReservationSubmissionRequest(org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest) ReservationSubmissionResponse(org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionResponse) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) ReservationAllocationStateProto(org.apache.hadoop.yarn.proto.YarnProtos.ReservationAllocationStateProto) Test(org.junit.Test)

Example 29 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestRMRestart method testRMRestartWaitForPreviousAMToFinish.

@Test(timeout = 60000)
public void testRMRestartWaitForPreviousAMToFinish() throws Exception {
    // testing 3 cases
    // After RM restarts
    // 1) New application attempt is not started until previous AM container
    // finish event is reported back to RM as a part of nm registration.
    // 2) If previous AM container finish event is never reported back (i.e.
    // node manager on which this AM container was running also went down) in
    // that case AMLivenessMonitor should time out previous attempt and start
    // new attempt.
    // 3) If all the stored attempts had finished then new attempt should
    // be started immediately.
    YarnConfiguration conf = new YarnConfiguration(this.conf);
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 40);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    // start RM
    final MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    AbstractYarnScheduler ys = (AbstractYarnScheduler) rm1.getResourceScheduler();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 16382, rm1.getResourceTrackerService());
    nm1.registerNode();
    // submitting app
    RMApp app1 = rm1.submitApp(200);
    rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    MockAM am1 = launchAM(app1, rm1, nm1);
    nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    // Fail first AM.
    rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(ys, am1.getApplicationAttemptId());
    // launch another AM.
    MockAM am2 = launchAM(app1, rm1, nm1);
    Assert.assertEquals(1, rmAppState.size());
    Assert.assertEquals(app1.getState(), RMAppState.RUNNING);
    Assert.assertEquals(app1.getAppAttempts().get(app1.getCurrentAppAttempt().getAppAttemptId()).getAppAttemptState(), RMAppAttemptState.RUNNING);
    //  start new RM.
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    nm1.setResourceTrackerService(rm2.getResourceTrackerService());
    NodeHeartbeatResponse res = nm1.nodeHeartbeat(true);
    Assert.assertEquals(NodeAction.RESYNC, res.getNodeAction());
    RMApp rmApp = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
    // application should be in ACCEPTED state
    rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    Assert.assertEquals(RMAppState.ACCEPTED, rmApp.getState());
    // new attempt should not be started
    Assert.assertEquals(2, rmApp.getAppAttempts().size());
    // am1 attempt should be in FAILED state where as am2 attempt should be in
    // LAUNCHED state
    rm2.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    rm2.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.LAUNCHED);
    Assert.assertEquals(RMAppAttemptState.FAILED, rmApp.getAppAttempts().get(am1.getApplicationAttemptId()).getAppAttemptState());
    Assert.assertEquals(RMAppAttemptState.LAUNCHED, rmApp.getAppAttempts().get(am2.getApplicationAttemptId()).getAppAttemptState());
    NMContainerStatus status = TestRMRestart.createNMContainerStatus(am2.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    nm1.registerNode(Arrays.asList(status), null);
    rm2.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    ys = (AbstractYarnScheduler) rm2.getResourceScheduler();
    TestSchedulerUtils.waitSchedulerApplicationAttemptStopped(ys, am2.getApplicationAttemptId());
    launchAM(rmApp, rm2, nm1);
    Assert.assertEquals(3, rmApp.getAppAttempts().size());
    rm2.waitForState(rmApp.getCurrentAppAttempt().getAppAttemptId(), RMAppAttemptState.RUNNING);
    // Now restart RM ...
    // Setting AMLivelinessMonitor interval to be 10 Secs. 
    conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 10000);
    MockRM rm3 = createMockRM(conf, memStore);
    rm3.start();
    // Wait for RM to process all the events as a part of rm recovery.
    nm1.setResourceTrackerService(rm3.getResourceTrackerService());
    rmApp = rm3.getRMContext().getRMApps().get(app1.getApplicationId());
    // application should be in ACCEPTED state
    rm3.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
    Assert.assertEquals(rmApp.getState(), RMAppState.ACCEPTED);
    // new attempt should not be started
    Assert.assertEquals(3, rmApp.getAppAttempts().size());
    // am1 and am2 attempts should be in FAILED state where as am3 should be
    // in LAUNCHED state
    rm3.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    rm3.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    ApplicationAttemptId latestAppAttemptId = rmApp.getCurrentAppAttempt().getAppAttemptId();
    rm3.waitForState(latestAppAttemptId, RMAppAttemptState.LAUNCHED);
    Assert.assertEquals(RMAppAttemptState.FAILED, rmApp.getAppAttempts().get(am1.getApplicationAttemptId()).getAppAttemptState());
    Assert.assertEquals(RMAppAttemptState.FAILED, rmApp.getAppAttempts().get(am2.getApplicationAttemptId()).getAppAttemptState());
    Assert.assertEquals(RMAppAttemptState.LAUNCHED, rmApp.getAppAttempts().get(latestAppAttemptId).getAppAttemptState());
    rm3.waitForState(latestAppAttemptId, RMAppAttemptState.FAILED);
    rm3.waitForState(rmApp.getApplicationId(), RMAppState.ACCEPTED);
    final int maxRetry = 10;
    final RMApp rmAppForCheck = rmApp;
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            return new Boolean(rmAppForCheck.getAppAttempts().size() == 4);
        }
    }, 100, maxRetry);
    Assert.assertEquals(RMAppAttemptState.FAILED, rmApp.getAppAttempts().get(latestAppAttemptId).getAppAttemptState());
    latestAppAttemptId = rmApp.getCurrentAppAttempt().getAppAttemptId();
    // The 4th attempt has started but is not yet saved into RMStateStore
    // It will be saved only when we launch AM.
    // submitting app but not starting AM for it.
    RMApp app2 = rm3.submitApp(200);
    rm3.waitForState(app2.getApplicationId(), RMAppState.ACCEPTED);
    Assert.assertEquals(1, app2.getAppAttempts().size());
    Assert.assertEquals(0, memStore.getState().getApplicationState().get(app2.getApplicationId()).getAttemptCount());
    MockRM rm4 = createMockRM(conf, memStore);
    rm4.start();
    rmApp = rm4.getRMContext().getRMApps().get(app1.getApplicationId());
    rm4.waitForState(rmApp.getApplicationId(), RMAppState.ACCEPTED);
    // wait for the attempt to be created.
    int timeoutSecs = 0;
    while (rmApp.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {
        Thread.sleep(200);
    }
    Assert.assertEquals(4, rmApp.getAppAttempts().size());
    Assert.assertEquals(RMAppState.ACCEPTED, rmApp.getState());
    rm4.waitForState(latestAppAttemptId, RMAppAttemptState.SCHEDULED);
    Assert.assertEquals(RMAppAttemptState.SCHEDULED, rmApp.getAppAttempts().get(latestAppAttemptId).getAppAttemptState());
    // The initial application for which an AM was not started should be in
    // ACCEPTED state with one application attempt started.
    app2 = rm4.getRMContext().getRMApps().get(app2.getApplicationId());
    rm4.waitForState(app2.getApplicationId(), RMAppState.ACCEPTED);
    Assert.assertEquals(RMAppState.ACCEPTED, app2.getState());
    Assert.assertEquals(1, app2.getAppAttempts().size());
    rm4.waitForState(app2.getCurrentAppAttempt().getAppAttemptId(), RMAppAttemptState.SCHEDULED);
    Assert.assertEquals(RMAppAttemptState.SCHEDULED, app2.getCurrentAppAttempt().getAppAttemptState());
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) NodeHeartbeatResponse(org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 30 with RMState

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState in project hadoop by apache.

the class TestRMRestart method testAppRecoveredInOrderOnRMRestart.

@Test(timeout = 20000)
public void testAppRecoveredInOrderOnRMRestart() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    for (int i = 10; i > 0; i--) {
        ApplicationStateData appState = mock(ApplicationStateData.class);
        ApplicationSubmissionContext context = mock(ApplicationSubmissionContext.class);
        when(appState.getApplicationSubmissionContext()).thenReturn(context);
        when(context.getApplicationId()).thenReturn(ApplicationId.newInstance(1234, i));
        memStore.getState().getApplicationState().put(appState.getApplicationSubmissionContext().getApplicationId(), appState);
    }
    MockRM rm1 = new MockRM(conf, memStore) {

        @Override
        protected RMAppManager createRMAppManager() {
            return new TestRMAppManager(this.rmContext, this.scheduler, this.masterService, this.applicationACLsManager, conf);
        }

        class TestRMAppManager extends RMAppManager {

            ApplicationId prevId = ApplicationId.newInstance(1234, 0);

            public TestRMAppManager(RMContext context, YarnScheduler scheduler, ApplicationMasterService masterService, ApplicationACLsManager applicationACLsManager, Configuration conf) {
                super(context, scheduler, masterService, applicationACLsManager, conf);
            }

            @Override
            protected void recoverApplication(ApplicationStateData appState, RMState rmState) throws Exception {
                // check application is recovered in order.
                Assert.assertTrue(rmState.getApplicationState().size() > 0);
                Assert.assertTrue(appState.getApplicationSubmissionContext().getApplicationId().compareTo(prevId) > 0);
                prevId = appState.getApplicationSubmissionContext().getApplicationId();
            }
        }
    };
    try {
        rm1.start();
    } finally {
        rm1.stop();
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) IOException(java.io.IOException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) ApplicationACLsManager(org.apache.hadoop.yarn.server.security.ApplicationACLsManager) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) YarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler) ApplicationSubmissionContext(org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Aggregations

RMState (org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState)31 Test (org.junit.Test)24 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)21 ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)21 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)20 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)18 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)8 ApplicationAccessType (org.apache.hadoop.yarn.api.records.ApplicationAccessType)6 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)6 ReservationId (org.apache.hadoop.yarn.api.records.ReservationId)5 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)5 Configuration (org.apache.hadoop.conf.Configuration)4 ReservationAllocationStateProto (org.apache.hadoop.yarn.proto.YarnProtos.ReservationAllocationStateProto)4 TestSecurityMockRM (org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM)4 ApplicationAttemptStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData)4 HashMap (java.util.HashMap)3 HashSet (java.util.HashSet)3 Credentials (org.apache.hadoop.security.Credentials)3 DelegationKey (org.apache.hadoop.security.token.delegation.DelegationKey)3 ReservationSubmissionRequest (org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest)3