Search in sources :

Example 76 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestRMRestart method testAppRecoveredInOrderOnRMRestart.

@Test(timeout = 20000)
public void testAppRecoveredInOrderOnRMRestart() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    for (int i = 10; i > 0; i--) {
        ApplicationStateData appState = mock(ApplicationStateData.class);
        ApplicationSubmissionContext context = mock(ApplicationSubmissionContext.class);
        when(appState.getApplicationSubmissionContext()).thenReturn(context);
        when(context.getApplicationId()).thenReturn(ApplicationId.newInstance(1234, i));
        memStore.getState().getApplicationState().put(appState.getApplicationSubmissionContext().getApplicationId(), appState);
    }
    MockRM rm1 = new MockRM(conf, memStore) {

        @Override
        protected RMAppManager createRMAppManager() {
            return new TestRMAppManager(this.rmContext, this.scheduler, this.masterService, this.applicationACLsManager, conf);
        }

        class TestRMAppManager extends RMAppManager {

            ApplicationId prevId = ApplicationId.newInstance(1234, 0);

            public TestRMAppManager(RMContext context, YarnScheduler scheduler, ApplicationMasterService masterService, ApplicationACLsManager applicationACLsManager, Configuration conf) {
                super(context, scheduler, masterService, applicationACLsManager, conf);
            }

            @Override
            protected void recoverApplication(ApplicationStateData appState, RMState rmState) throws Exception {
                // check application is recovered in order.
                Assert.assertTrue(rmState.getApplicationState().size() > 0);
                Assert.assertTrue(appState.getApplicationSubmissionContext().getApplicationId().compareTo(prevId) > 0);
                prevId = appState.getApplicationSubmissionContext().getApplicationId();
            }
        }
    };
    try {
        rm1.start();
    } finally {
        rm1.stop();
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) IOException(java.io.IOException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) ApplicationACLsManager(org.apache.hadoop.yarn.server.security.ApplicationACLsManager) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) AbstractYarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler) YarnScheduler(org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler) ApplicationSubmissionContext(org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 77 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestRMRestart method testRMRestartFailedApp.

@Test(timeout = 60000)
public void testRMRestartFailedApp() throws Exception {
    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1);
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    RMState rmState = memStore.getState();
    Map<ApplicationId, ApplicationStateData> rmAppState = rmState.getApplicationState();
    // start RM
    MockRM rm1 = createMockRM(conf, memStore);
    rm1.start();
    MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
    nm1.registerNode();
    // create app and launch the AM
    RMApp app0 = rm1.submitApp(200);
    MockAM am0 = launchAM(app0, rm1, nm1);
    // fail the AM by sending CONTAINER_FINISHED event without registering.
    nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
    rm1.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    rm1.waitForState(app0.getApplicationId(), RMAppState.FAILED);
    // assert the app/attempt failed state is saved.
    ApplicationStateData appState = rmAppState.get(app0.getApplicationId());
    Assert.assertEquals(RMAppState.FAILED, appState.getState());
    Assert.assertEquals(RMAppAttemptState.FAILED, appState.getAttempt(am0.getApplicationAttemptId()).getState());
    // start new RM
    MockRM rm2 = createMockRM(conf, memStore);
    rm2.start();
    RMApp loadedApp0 = rm2.getRMContext().getRMApps().get(app0.getApplicationId());
    rm2.waitForState(app0.getApplicationId(), RMAppState.FAILED);
    rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
    // no new attempt is created.
    Assert.assertEquals(1, loadedApp0.getAppAttempts().size());
    verifyAppReportAfterRMRestart(app0, rm2);
    Assert.assertTrue(app0.getDiagnostics().toString().contains("Failing the application."));
// failed diagnostics from attempt is lost because the diagnostics from
// attempt is not yet available by the time app is saving the app state.
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) RMState(org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState) Test(org.junit.Test)

Example 78 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestRMRestart method testRMShutdown.

// This is to test RM does not get hang on shutdown.
@Test(timeout = 10000)
public void testRMShutdown() throws Exception {
    MemoryRMStateStore memStore = new MemoryRMStateStore() {

        @Override
        public synchronized void checkVersion() throws Exception {
            throw new Exception("Invalid version.");
        }
    };
    // start RM
    memStore.init(conf);
    MockRM rm1 = null;
    try {
        rm1 = createMockRM(conf, memStore);
        rm1.start();
        Assert.fail();
    } catch (Exception e) {
        Assert.assertTrue(e.getMessage().contains("Invalid version."));
    }
    Assert.assertTrue(rm1.getServiceState() == STATE.STOPPED);
}
Also used : MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) IOException(java.io.IOException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) Test(org.junit.Test)

Example 79 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestApplicationLifetimeMonitor method testUpdateApplicationTimeoutForStateStoreUpdateFail.

@Test(timeout = 60000)
public void testUpdateApplicationTimeoutForStateStoreUpdateFail() throws Exception {
    MockRM rm1 = null;
    try {
        conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
        MemoryRMStateStore memStore = new MemoryRMStateStore() {

            private int count = 0;

            @Override
            public synchronized void updateApplicationStateInternal(ApplicationId appId, ApplicationStateData appState) throws Exception {
                // fail only 1 time.
                if (count++ == 0) {
                    throw new Exception("State-store update failed");
                }
                super.updateApplicationStateInternal(appId, appState);
            }
        };
        memStore.init(conf);
        rm1 = new MockRM(conf, memStore);
        rm1.start();
        MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
        nm1.registerNode();
        nm1.nodeHeartbeat(true);
        long appLifetime = 30L;
        Map<ApplicationTimeoutType, Long> timeouts = new HashMap<ApplicationTimeoutType, Long>();
        timeouts.put(ApplicationTimeoutType.LIFETIME, appLifetime);
        RMApp app1 = rm1.submitApp(200, Priority.newInstance(0), timeouts);
        Map<ApplicationTimeoutType, String> updateTimeout = new HashMap<ApplicationTimeoutType, String>();
        long newLifetime = 10L;
        // update 10L seconds more to timeout i.e 30L seconds overall
        updateTimeout.put(ApplicationTimeoutType.LIFETIME, Times.formatISO8601(System.currentTimeMillis() + newLifetime * 1000));
        UpdateApplicationTimeoutsRequest request = UpdateApplicationTimeoutsRequest.newInstance(app1.getApplicationId(), updateTimeout);
        Map<ApplicationTimeoutType, Long> applicationTimeouts = app1.getApplicationTimeouts();
        // has old timeout time
        long beforeUpdate = applicationTimeouts.get(ApplicationTimeoutType.LIFETIME);
        try {
            // update app2 lifetime to new time i.e now + timeout
            rm1.getRMContext().getClientRMService().updateApplicationTimeouts(request);
            fail("Update application should fail.");
        } catch (YarnException e) {
            // expected
            assertTrue("State-store exception does not containe appId", e.getMessage().contains(app1.getApplicationId().toString()));
        }
        applicationTimeouts = app1.getApplicationTimeouts();
        // has old timeout time
        long afterUpdate = applicationTimeouts.get(ApplicationTimeoutType.LIFETIME);
        Assert.assertEquals("Application timeout is updated", beforeUpdate, afterUpdate);
        rm1.waitForState(app1.getApplicationId(), RMAppState.KILLED);
        // verify for app killed with updated lifetime
        Assert.assertTrue("Application killed before lifetime value", app1.getFinishTime() > afterUpdate);
    } finally {
        stopRM(rm1);
    }
}
Also used : HashMap(java.util.HashMap) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) ApplicationTimeoutType(org.apache.hadoop.yarn.api.records.ApplicationTimeoutType) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) UpdateApplicationTimeoutsRequest(org.apache.hadoop.yarn.api.protocolrecords.UpdateApplicationTimeoutsRequest) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Test(org.junit.Test)

Example 80 with MemoryRMStateStore

use of org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore in project hadoop by apache.

the class TestAbstractYarnScheduler method testReleasedContainerIfAppAttemptisNull.

/*
   * This test case is to test the pending containers are cleared from the
   * attempt even if one of the application in the list have current attempt as
   * null (no attempt).
   */
@SuppressWarnings({ "rawtypes" })
@Test(timeout = 10000)
public void testReleasedContainerIfAppAttemptisNull() throws Exception {
    YarnConfiguration conf = getConf();
    conf.set(YarnConfiguration.RM_STORE, MemoryRMStateStore.class.getName());
    MemoryRMStateStore memStore = new MemoryRMStateStore();
    memStore.init(conf);
    MockRM rm1 = new MockRM(conf, memStore);
    try {
        rm1.start();
        MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService());
        nm1.registerNode();
        AbstractYarnScheduler scheduler = (AbstractYarnScheduler) rm1.getResourceScheduler();
        // Mock App without attempt
        RMApp mockAPp = new MockRMApp(125, System.currentTimeMillis(), RMAppState.NEW);
        SchedulerApplication<FiCaSchedulerApp> application = new SchedulerApplication<FiCaSchedulerApp>(null, mockAPp.getUser());
        // Second app with one app attempt
        RMApp app = rm1.submitApp(200);
        MockAM am1 = MockRM.launchAndRegisterAM(app, rm1, nm1);
        final ContainerId runningContainer = ContainerId.newContainerId(am1.getApplicationAttemptId(), 2);
        am1.allocate(null, Arrays.asList(runningContainer));
        Map schedulerApplications = scheduler.getSchedulerApplications();
        SchedulerApplication schedulerApp = (SchedulerApplication) scheduler.getSchedulerApplications().get(app.getApplicationId());
        schedulerApplications.put(mockAPp.getApplicationId(), application);
        scheduler.clearPendingContainerCache();
        Assert.assertEquals("Pending containers are not released " + "when one of the application attempt is null !", schedulerApp.getCurrentAppAttempt().getPendingRelease().size(), 0);
    } finally {
        if (rm1 != null) {
            rm1.stop();
        }
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) MockRMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp) MockRMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp) MockNM(org.apache.hadoop.yarn.server.resourcemanager.MockNM) MockRM(org.apache.hadoop.yarn.server.resourcemanager.MockRM) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) MockAM(org.apache.hadoop.yarn.server.resourcemanager.MockAM) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Aggregations

MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)84 Test (org.junit.Test)81 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)68 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)28 ApplicationStateData (org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData)27 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)23 TestSecurityMockRM (org.apache.hadoop.yarn.server.resourcemanager.TestRMRestart.TestSecurityMockRM)22 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)21 RMState (org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState)21 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)20 MockNM (org.apache.hadoop.yarn.server.resourcemanager.MockNM)20 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)19 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)16 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)16 IOException (java.io.IOException)15 MockAM (org.apache.hadoop.yarn.server.resourcemanager.MockAM)14 Configuration (org.apache.hadoop.conf.Configuration)12 AbstractYarnScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler)10 ArrayList (java.util.ArrayList)9 ApplicationAccessType (org.apache.hadoop.yarn.api.records.ApplicationAccessType)9