Search in sources :

Example 36 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestRMHA method testTransitionedToActiveRefreshFail.

@Test(timeout = 9000000)
public void testTransitionedToActiveRefreshFail() throws Exception {
    configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
    rm = new MockRM(configuration) {

        @Override
        protected AdminService createAdminService() {
            return new AdminService(this, getRMContext()) {

                int counter = 0;

                @Override
                protected void setConfig(Configuration conf) {
                    super.setConfig(configuration);
                }

                @Override
                protected void refreshAll() throws ServiceFailedException {
                    if (counter == 0) {
                        counter++;
                        throw new ServiceFailedException("Simulate RefreshFail");
                    } else {
                        super.refreshAll();
                    }
                }
            };
        }

        @Override
        protected Dispatcher createDispatcher() {
            return new FailFastDispatcher();
        }
    };
    rm.init(configuration);
    rm.start();
    final StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
    FailFastDispatcher dispatcher = ((FailFastDispatcher) rm.rmContext.getDispatcher());
    // Verify transition to transitionToStandby
    rm.adminService.transitionToStandby(requestInfo);
    assertEquals("Fatal Event should be 0", 0, dispatcher.getEventCount());
    assertEquals("HA state should be in standBy State", HAServiceState.STANDBY, rm.getRMContext().getHAServiceState());
    try {
        // Verify refreshAll call failure and check fail Event is dispatched
        rm.adminService.transitionToActive(requestInfo);
        Assert.fail("Transition to Active should have failed for refreshAll()");
    } catch (Exception e) {
        assertTrue("Service fail Exception expected", e instanceof ServiceFailedException);
    }
    // Since refreshAll failed we are expecting fatal event to be send
    // Then fatal event is send RM will shutdown
    dispatcher.await();
    assertEquals("Fatal Event to be received", 1, dispatcher.getEventCount());
    // Check of refreshAll success HA can be active
    rm.adminService.transitionToActive(requestInfo);
    assertEquals(HAServiceState.ACTIVE, rm.getRMContext().getHAServiceState());
    rm.adminService.transitionToStandby(requestInfo);
    assertEquals(HAServiceState.STANDBY, rm.getRMContext().getHAServiceState());
}
Also used : YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) StateChangeRequestInfo(org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo) ServiceFailedException(org.apache.hadoop.ha.ServiceFailedException) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) StoreFencedException(org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFencedException) ServiceFailedException(org.apache.hadoop.ha.ServiceFailedException) HealthCheckFailedException(org.apache.hadoop.ha.HealthCheckFailedException) IOException(java.io.IOException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) JSONException(org.codehaus.jettison.json.JSONException) AccessControlException(org.apache.hadoop.security.AccessControlException) Test(org.junit.Test)

Example 37 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestRMHA method testRMDispatcherForHA.

@Test
public void testRMDispatcherForHA() throws IOException {
    String errorMessageForEventHandler = "Expect to get the same number of handlers";
    String errorMessageForService = "Expect to get the same number of services";
    configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
    Configuration conf = new YarnConfiguration(configuration);
    rm = new MockRM(conf) {

        @Override
        protected Dispatcher createDispatcher() {
            return new MyCountingDispatcher();
        }
    };
    rm.init(conf);
    int expectedEventHandlerCount = ((MyCountingDispatcher) rm.getRMContext().getDispatcher()).getEventHandlerCount();
    int expectedServiceCount = rm.getServices().size();
    assertTrue(expectedEventHandlerCount != 0);
    StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(HAServiceProtocol.RequestSource.REQUEST_BY_USER);
    assertEquals(STATE_ERR, HAServiceState.INITIALIZING, rm.adminService.getServiceStatus().getState());
    assertFalse("RM is ready to become active before being started", rm.adminService.getServiceStatus().isReadyToBecomeActive());
    rm.start();
    //call transitions to standby and active a couple of times
    rm.adminService.transitionToStandby(requestInfo);
    rm.adminService.transitionToActive(requestInfo);
    rm.adminService.transitionToStandby(requestInfo);
    rm.adminService.transitionToActive(requestInfo);
    rm.adminService.transitionToStandby(requestInfo);
    MyCountingDispatcher dispatcher = (MyCountingDispatcher) rm.getRMContext().getDispatcher();
    assertTrue(!dispatcher.isStopped());
    rm.adminService.transitionToActive(requestInfo);
    assertEquals(errorMessageForEventHandler, expectedEventHandlerCount, ((MyCountingDispatcher) rm.getRMContext().getDispatcher()).getEventHandlerCount());
    assertEquals(errorMessageForService, expectedServiceCount, rm.getServices().size());
    // Keep the dispatcher reference before transitioning to standby
    dispatcher = (MyCountingDispatcher) rm.getRMContext().getDispatcher();
    rm.adminService.transitionToStandby(requestInfo);
    assertEquals(errorMessageForEventHandler, expectedEventHandlerCount, ((MyCountingDispatcher) rm.getRMContext().getDispatcher()).getEventHandlerCount());
    assertEquals(errorMessageForService, expectedServiceCount, rm.getServices().size());
    assertTrue(dispatcher.isStopped());
    rm.stop();
}
Also used : YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) StateChangeRequestInfo(org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) Test(org.junit.Test)

Example 38 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestNodeStatusUpdater method testCompletedContainerStatusBackup.

/**
   * Test completed containerStatus get back up when heart beat lost, and will
   * be sent via next heart beat.
   */
@Test(timeout = 200000)
public void testCompletedContainerStatusBackup() throws Exception {
    nm = new NodeManager() {

        @Override
        protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
            MyNodeStatusUpdater2 myNodeStatusUpdater = new MyNodeStatusUpdater2(context, dispatcher, healthChecker, metrics);
            return myNodeStatusUpdater;
        }

        @Override
        protected NMContext createNMContext(NMContainerTokenSecretManager containerTokenSecretManager, NMTokenSecretManagerInNM nmTokenSecretManager, NMStateStoreService store, boolean isDistributedSchedulingEnabled, Configuration config) {
            return new MyNMContext(containerTokenSecretManager, nmTokenSecretManager, config);
        }
    };
    YarnConfiguration conf = createNMConfig();
    nm.init(conf);
    nm.start();
    int waitCount = 0;
    while (heartBeatID <= 4 && waitCount++ != 20) {
        Thread.sleep(500);
    }
    if (heartBeatID <= 4) {
        Assert.fail("Failed to get all heartbeats in time, " + "heartbeatID:" + heartBeatID);
    }
    if (assertionFailedInThread.get()) {
        Assert.fail("ContainerStatus Backup failed");
    }
    Assert.assertNotNull(nm.getNMContext().getSystemCredentialsForApps().get(ApplicationId.newInstance(1234, 1)).getToken(new Text("token1")));
    nm.stop();
}
Also used : FileContext(org.apache.hadoop.fs.FileContext) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) Configuration(org.apache.hadoop.conf.Configuration) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) NMTokenSecretManagerInNM(org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM) Text(org.apache.hadoop.io.Text) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) NMStateStoreService(org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) NMContainerTokenSecretManager(org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager) Test(org.junit.Test)

Example 39 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestNodeStatusUpdater method testRMVersionLessThanMinimum.

@Test
public void testRMVersionLessThanMinimum() throws InterruptedException, IOException {
    final AtomicInteger numCleanups = new AtomicInteger(0);
    YarnConfiguration conf = createNMConfig();
    conf.set(YarnConfiguration.NM_RESOURCEMANAGER_MINIMUM_VERSION, "3.0.0");
    nm = new NodeManager() {

        @Override
        protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
            MyNodeStatusUpdater myNodeStatusUpdater = new MyNodeStatusUpdater(context, dispatcher, healthChecker, metrics);
            MyResourceTracker2 myResourceTracker2 = new MyResourceTracker2();
            myResourceTracker2.heartBeatNodeAction = NodeAction.NORMAL;
            myResourceTracker2.rmVersion = "3.0.0";
            myNodeStatusUpdater.resourceTracker = myResourceTracker2;
            return myNodeStatusUpdater;
        }

        @Override
        protected ContainerManagerImpl createContainerManager(Context context, ContainerExecutor exec, DeletionService del, NodeStatusUpdater nodeStatusUpdater, ApplicationACLsManager aclsManager, LocalDirsHandlerService dirsHandler) {
            return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater, metrics, dirsHandler) {

                @Override
                public void cleanUpApplicationsOnNMShutDown() {
                    super.cleanUpApplicationsOnNMShutDown();
                    numCleanups.incrementAndGet();
                }
            };
        }
    };
    nm.init(conf);
    nm.start();
    // NM takes a while to reach the STARTED state.
    int waitCount = 0;
    while (nm.getServiceState() != STATE.STARTED && waitCount++ != 20) {
        LOG.info("Waiting for NM to stop..");
        Thread.sleep(1000);
    }
    Assert.assertTrue(nm.getServiceState() == STATE.STARTED);
    nm.stop();
}
Also used : FileContext(org.apache.hadoop.fs.FileContext) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) ContainerManagerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl) ApplicationACLsManager(org.apache.hadoop.yarn.server.security.ApplicationACLsManager) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Test(org.junit.Test)

Example 40 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestNodeStatusUpdater method testSignalContainerToContainerManager.

//Verify that signalContainer request can be dispatched from
//NodeStatusUpdaterImpl to ContainerManagerImpl.
@Test
public void testSignalContainerToContainerManager() throws Exception {
    nm = new NodeManager() {

        @Override
        protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
            return new MyNodeStatusUpdater(context, dispatcher, healthChecker, metrics, true);
        }

        @Override
        protected ContainerManagerImpl createContainerManager(Context context, ContainerExecutor exec, DeletionService del, NodeStatusUpdater nodeStatusUpdater, ApplicationACLsManager aclsManager, LocalDirsHandlerService diskhandler) {
            return new MyContainerManager(context, exec, del, nodeStatusUpdater, metrics, diskhandler);
        }
    };
    YarnConfiguration conf = createNMConfig();
    nm.init(conf);
    nm.start();
    System.out.println(" ----- thread already started.." + nm.getServiceState());
    int waitCount = 0;
    while (nm.getServiceState() == STATE.INITED && waitCount++ != 20) {
        LOG.info("Waiting for NM to start..");
        if (nmStartError != null) {
            LOG.error("Error during startup. ", nmStartError);
            Assert.fail(nmStartError.getCause().getMessage());
        }
        Thread.sleep(1000);
    }
    if (nm.getServiceState() != STATE.STARTED) {
        // NM could have failed.
        Assert.fail("NodeManager failed to start");
    }
    waitCount = 0;
    while (heartBeatID <= 3 && waitCount++ != 20) {
        Thread.sleep(500);
    }
    Assert.assertFalse(heartBeatID <= 3);
    Assert.assertEquals("Number of registered NMs is wrong!!", 1, this.registeredNodes.size());
    MyContainerManager containerManager = (MyContainerManager) nm.getContainerManager();
    Assert.assertTrue(containerManager.signaled);
    nm.stop();
}
Also used : FileContext(org.apache.hadoop.fs.FileContext) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) ContainerManagerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl) ApplicationACLsManager(org.apache.hadoop.yarn.server.security.ApplicationACLsManager) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Test(org.junit.Test)

Aggregations

Dispatcher (org.apache.hadoop.yarn.event.Dispatcher)56 Test (org.junit.Test)35 Configuration (org.apache.hadoop.conf.Configuration)26 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)21 Event (org.apache.hadoop.yarn.event.Event)18 AsyncDispatcher (org.apache.hadoop.yarn.event.AsyncDispatcher)15 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)14 FileContext (org.apache.hadoop.fs.FileContext)11 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)11 EventHandler (org.apache.hadoop.yarn.event.EventHandler)10 NMContext (org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext)10 RMContext (org.apache.hadoop.yarn.server.resourcemanager.RMContext)10 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)9 IOException (java.io.IOException)8 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)7 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)7 Container (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container)7 Path (org.apache.hadoop.fs.Path)6 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)6 ArrayList (java.util.ArrayList)5