Search in sources :

Example 6 with StateChangeRequestInfo

use of org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo in project hadoop by apache.

the class TestRMStoreCommands method testRemoveApplicationFromStateStoreCmdForZK.

@Test
public void testRemoveApplicationFromStateStoreCmdForZK() throws Exception {
    StateChangeRequestInfo req = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
    try (TestingServer curatorTestingServer = TestZKRMStateStore.setupCuratorServer();
        CuratorFramework curatorFramework = TestZKRMStateStore.setupCuratorFramework(curatorTestingServer)) {
        Configuration conf = TestZKRMStateStore.createHARMConf("rm1,rm2", "rm1", 1234, false, curatorTestingServer);
        ResourceManager rm = new MockRM(conf);
        rm.start();
        rm.getRMContext().getRMAdminService().transitionToActive(req);
        rm.close();
        String appId = ApplicationId.newInstance(System.currentTimeMillis(), 1).toString();
        String appRootPath = YarnConfiguration.DEFAULT_ZK_RM_STATE_STORE_PARENT_PATH + "/" + ZKRMStateStore.ROOT_ZNODE_NAME + "/" + RMStateStore.RM_APP_ROOT;
        String appIdPath = appRootPath + "/" + appId;
        curatorFramework.create().forPath(appIdPath);
        assertEquals("Application node for " + appId + "should exist", appId, curatorFramework.getChildren().forPath(appRootPath).get(0));
        try {
            ResourceManager.removeApplication(conf, appId);
        } catch (Exception e) {
            fail("Exception should not be thrown while removing app from " + "rm state store.");
        }
        assertTrue("After remove app from store there should be no child nodes" + " in app root path", curatorFramework.getChildren().forPath(appRootPath).isEmpty());
    }
}
Also used : TestingServer(org.apache.curator.test.TestingServer) CuratorFramework(org.apache.curator.framework.CuratorFramework) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) StateChangeRequestInfo(org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo) Test(org.junit.Test)

Example 7 with StateChangeRequestInfo

use of org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo in project hadoop by apache.

the class TestRMAdminService method testRMHAWithFileSystemBasedConfiguration.

@Test
public void testRMHAWithFileSystemBasedConfiguration() throws IOException, YarnException {
    StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
    updateConfigurationForRMHA();
    Configuration conf1 = new Configuration(configuration);
    conf1.set(YarnConfiguration.RM_HA_ID, "rm1");
    Configuration conf2 = new Configuration(configuration);
    conf2.set(YarnConfiguration.RM_HA_ID, "rm2");
    // upload default configurations
    uploadDefaultConfiguration();
    MockRM rm1 = null;
    MockRM rm2 = null;
    try {
        rm1 = new MockRM(conf1);
        rm1.init(conf1);
        rm1.start();
        Assert.assertTrue(rm1.getRMContext().getHAServiceState() == HAServiceState.STANDBY);
        rm2 = new MockRM(conf2);
        rm2.init(conf1);
        rm2.start();
        Assert.assertTrue(rm2.getRMContext().getHAServiceState() == HAServiceState.STANDBY);
        rm1.adminService.transitionToActive(requestInfo);
        Assert.assertTrue(rm1.getRMContext().getHAServiceState() == HAServiceState.ACTIVE);
        CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
        csConf.set(CapacitySchedulerConfiguration.MAXIMUM_SYSTEM_APPLICATIONS, "5000");
        uploadConfiguration(csConf, "capacity-scheduler.xml");
        rm1.adminService.refreshQueues(RefreshQueuesRequest.newInstance());
        int maxApps = ((CapacityScheduler) rm1.getRMContext().getScheduler()).getConfiguration().getMaximumSystemApplications();
        Assert.assertEquals(maxApps, 5000);
        // Before failover happens, the maxApps is
        // still the default value on the standby rm : rm2
        int maxAppsBeforeFailOver = ((CapacityScheduler) rm2.getRMContext().getScheduler()).getConfiguration().getMaximumSystemApplications();
        Assert.assertEquals(maxAppsBeforeFailOver, 10000);
        // Do the failover
        rm1.adminService.transitionToStandby(requestInfo);
        rm2.adminService.transitionToActive(requestInfo);
        Assert.assertTrue(rm1.getRMContext().getHAServiceState() == HAServiceState.STANDBY);
        Assert.assertTrue(rm2.getRMContext().getHAServiceState() == HAServiceState.ACTIVE);
        int maxAppsAfter = ((CapacityScheduler) rm2.getRMContext().getScheduler()).getConfiguration().getMaximumSystemApplications();
        Assert.assertEquals(maxAppsAfter, 5000);
    } finally {
        if (rm1 != null) {
            rm1.stop();
        }
        if (rm2 != null) {
            rm2.stop();
        }
    }
}
Also used : CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) Configuration(org.apache.hadoop.conf.Configuration) DynamicResourceConfiguration(org.apache.hadoop.yarn.server.resourcemanager.resource.DynamicResourceConfiguration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) StateChangeRequestInfo(org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo) CapacitySchedulerConfiguration(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration) Test(org.junit.Test)

Example 8 with StateChangeRequestInfo

use of org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo in project hadoop by apache.

the class TestRMHA method testTransitionedToActiveRefreshFail.

@Test(timeout = 9000000)
public void testTransitionedToActiveRefreshFail() throws Exception {
    configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
    rm = new MockRM(configuration) {

        @Override
        protected AdminService createAdminService() {
            return new AdminService(this, getRMContext()) {

                int counter = 0;

                @Override
                protected void setConfig(Configuration conf) {
                    super.setConfig(configuration);
                }

                @Override
                protected void refreshAll() throws ServiceFailedException {
                    if (counter == 0) {
                        counter++;
                        throw new ServiceFailedException("Simulate RefreshFail");
                    } else {
                        super.refreshAll();
                    }
                }
            };
        }

        @Override
        protected Dispatcher createDispatcher() {
            return new FailFastDispatcher();
        }
    };
    rm.init(configuration);
    rm.start();
    final StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
    FailFastDispatcher dispatcher = ((FailFastDispatcher) rm.rmContext.getDispatcher());
    // Verify transition to transitionToStandby
    rm.adminService.transitionToStandby(requestInfo);
    assertEquals("Fatal Event should be 0", 0, dispatcher.getEventCount());
    assertEquals("HA state should be in standBy State", HAServiceState.STANDBY, rm.getRMContext().getHAServiceState());
    try {
        // Verify refreshAll call failure and check fail Event is dispatched
        rm.adminService.transitionToActive(requestInfo);
        Assert.fail("Transition to Active should have failed for refreshAll()");
    } catch (Exception e) {
        assertTrue("Service fail Exception expected", e instanceof ServiceFailedException);
    }
    // Since refreshAll failed we are expecting fatal event to be send
    // Then fatal event is send RM will shutdown
    dispatcher.await();
    assertEquals("Fatal Event to be received", 1, dispatcher.getEventCount());
    // Check of refreshAll success HA can be active
    rm.adminService.transitionToActive(requestInfo);
    assertEquals(HAServiceState.ACTIVE, rm.getRMContext().getHAServiceState());
    rm.adminService.transitionToStandby(requestInfo);
    assertEquals(HAServiceState.STANDBY, rm.getRMContext().getHAServiceState());
}
Also used : YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) StateChangeRequestInfo(org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo) ServiceFailedException(org.apache.hadoop.ha.ServiceFailedException) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) StoreFencedException(org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFencedException) ServiceFailedException(org.apache.hadoop.ha.ServiceFailedException) HealthCheckFailedException(org.apache.hadoop.ha.HealthCheckFailedException) IOException(java.io.IOException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) JSONException(org.codehaus.jettison.json.JSONException) AccessControlException(org.apache.hadoop.security.AccessControlException) Test(org.junit.Test)

Example 9 with StateChangeRequestInfo

use of org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo in project hadoop by apache.

the class TestRMHA method testTransitionedToStandbyShouldNotHang.

@Test
public void testTransitionedToStandbyShouldNotHang() throws Exception {
    configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
    Configuration conf = new YarnConfiguration(configuration);
    MemoryRMStateStore memStore = new MemoryRMStateStore() {

        @Override
        public void updateApplicationState(ApplicationStateData appState) {
            notifyStoreOperationFailed(new StoreFencedException());
        }
    };
    memStore.init(conf);
    rm = new MockRM(conf, memStore) {

        @Override
        void stopActiveServices() {
            try {
                Thread.sleep(10000);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
            super.stopActiveServices();
        }
    };
    rm.init(conf);
    final StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
    assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService.getServiceStatus().getState());
    assertFalse("RM is ready to become active before being started", rm.adminService.getServiceStatus().isReadyToBecomeActive());
    checkMonitorHealth();
    rm.start();
    checkMonitorHealth();
    checkStandbyRMFunctionality();
    // 2. Transition to Active.
    rm.adminService.transitionToActive(requestInfo);
    // 3. Try Transition to standby
    Thread t = new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                rm.transitionToStandby(true);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    });
    t.start();
    rm.getRMContext().getStateStore().updateApplicationState(null);
    // wait for thread to finish
    t.join();
    rm.adminService.transitionToStandby(requestInfo);
    checkStandbyRMFunctionality();
    rm.stop();
}
Also used : YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) ApplicationStateData(org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData) IOException(java.io.IOException) StoreFencedException(org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFencedException) ServiceFailedException(org.apache.hadoop.ha.ServiceFailedException) HealthCheckFailedException(org.apache.hadoop.ha.HealthCheckFailedException) IOException(java.io.IOException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) JSONException(org.codehaus.jettison.json.JSONException) AccessControlException(org.apache.hadoop.security.AccessControlException) StoreFencedException(org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFencedException) MemoryRMStateStore(org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) StateChangeRequestInfo(org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo) Test(org.junit.Test)

Example 10 with StateChangeRequestInfo

use of org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo in project hadoop by apache.

the class TestRMHA method testRMDispatcherForHA.

@Test
public void testRMDispatcherForHA() throws IOException {
    String errorMessageForEventHandler = "Expect to get the same number of handlers";
    String errorMessageForService = "Expect to get the same number of services";
    configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
    Configuration conf = new YarnConfiguration(configuration);
    rm = new MockRM(conf) {

        @Override
        protected Dispatcher createDispatcher() {
            return new MyCountingDispatcher();
        }
    };
    rm.init(conf);
    int expectedEventHandlerCount = ((MyCountingDispatcher) rm.getRMContext().getDispatcher()).getEventHandlerCount();
    int expectedServiceCount = rm.getServices().size();
    assertTrue(expectedEventHandlerCount != 0);
    StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
    assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService.getServiceStatus().getState());
    assertFalse("RM is ready to become active before being started", rm.adminService.getServiceStatus().isReadyToBecomeActive());
    rm.start();
    //call transitions to standby and active a couple of times
    rm.adminService.transitionToStandby(requestInfo);
    rm.adminService.transitionToActive(requestInfo);
    rm.adminService.transitionToStandby(requestInfo);
    rm.adminService.transitionToActive(requestInfo);
    rm.adminService.transitionToStandby(requestInfo);
    MyCountingDispatcher dispatcher = (MyCountingDispatcher) rm.getRMContext().getDispatcher();
    assertTrue(!dispatcher.isStopped());
    rm.adminService.transitionToActive(requestInfo);
    assertEquals(errorMessageForEventHandler, expectedEventHandlerCount, ((MyCountingDispatcher) rm.getRMContext().getDispatcher()).getEventHandlerCount());
    assertEquals(errorMessageForService, expectedServiceCount, rm.getServices().size());
    // Keep the dispatcher reference before transitioning to standby
    dispatcher = (MyCountingDispatcher) rm.getRMContext().getDispatcher();
    rm.adminService.transitionToStandby(requestInfo);
    assertEquals(errorMessageForEventHandler, expectedEventHandlerCount, ((MyCountingDispatcher) rm.getRMContext().getDispatcher()).getEventHandlerCount());
    assertEquals(errorMessageForService, expectedServiceCount, rm.getServices().size());
    assertTrue(dispatcher.isStopped());
    rm.stop();
}
Also used : YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) StateChangeRequestInfo(org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) Test(org.junit.Test)

Aggregations

StateChangeRequestInfo (org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo)17 Test (org.junit.Test)15 Configuration (org.apache.hadoop.conf.Configuration)12 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)12 AccessControlException (org.apache.hadoop.security.AccessControlException)5 IOException (java.io.IOException)4 HealthCheckFailedException (org.apache.hadoop.ha.HealthCheckFailedException)4 ServiceFailedException (org.apache.hadoop.ha.ServiceFailedException)4 YarnRuntimeException (org.apache.hadoop.yarn.exceptions.YarnRuntimeException)4 StoreFencedException (org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFencedException)4 JSONException (org.codehaus.jettison.json.JSONException)4 MemoryRMStateStore (org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore)3 CuratorFramework (org.apache.curator.framework.CuratorFramework)2 TestingServer (org.apache.curator.test.TestingServer)2 Path (org.apache.hadoop.fs.Path)2 Dispatcher (org.apache.hadoop.yarn.event.Dispatcher)2 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)2 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)2 ResourceManager (org.apache.hadoop.yarn.server.resourcemanager.ResourceManager)2 FileSystem (org.apache.hadoop.fs.FileSystem)1