Search in sources :

Example 11 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class ResourceManager method resetDispatcher.

private void resetDispatcher() {
    Dispatcher dispatcher = setupDispatcher();
    ((Service) dispatcher).init(this.conf);
    ((Service) dispatcher).start();
    removeService((Service) rmDispatcher);
    // Need to stop previous rmDispatcher before assigning new dispatcher
    // otherwise causes "AsyncDispatcher event handler" thread leak
    ((Service) rmDispatcher).stop();
    rmDispatcher = dispatcher;
    addIfService(rmDispatcher);
    rmContext.setDispatcher(rmDispatcher);
}
Also used : Service(org.apache.hadoop.service.Service) CompositeService(org.apache.hadoop.service.CompositeService) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) EventDispatcher(org.apache.hadoop.yarn.event.EventDispatcher)

Example 12 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class RMHATestBase method startRMs.

protected void startRMs() throws IOException {
    rm1 = new MockRM(confForRM1, null, false, false) {

        @Override
        protected Dispatcher createDispatcher() {
            return new DrainDispatcher();
        }
    };
    rm2 = new MockRM(confForRM2, null, false, false) {

        @Override
        protected Dispatcher createDispatcher() {
            return new DrainDispatcher();
        }
    };
    startRMs(rm1, confForRM1, rm2, confForRM2);
}
Also used : DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) DrainDispatcher(org.apache.hadoop.yarn.event.DrainDispatcher)

Example 13 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestEventFlow method testSuccessfulContainerLaunch.

@Test
public void testSuccessfulContainerLaunch() throws InterruptedException, IOException, YarnException {
    FileContext localFS = FileContext.getLocalFSFileContext();
    localFS.delete(new Path(localDir.getAbsolutePath()), true);
    localFS.delete(new Path(localLogDir.getAbsolutePath()), true);
    localFS.delete(new Path(remoteLogDir.getAbsolutePath()), true);
    localDir.mkdir();
    localLogDir.mkdir();
    remoteLogDir.mkdir();
    YarnConfiguration conf = new YarnConfiguration();
    Context context = new NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, null, new NMNullStateStoreService(), false, conf) {

        @Override
        public int getHttpPort() {
            return 1234;
        }
    };
    conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath());
    conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
    conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogDir.getAbsolutePath());
    conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, "0.0.0.0:" + ServerSocketUtil.getPort(8040, 10));
    ContainerExecutor exec = new DefaultContainerExecutor();
    exec.setConf(conf);
    DeletionService del = new DeletionService(exec);
    Dispatcher dispatcher = new AsyncDispatcher();
    LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
    NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
    healthChecker.init(conf);
    NodeManagerMetrics metrics = NodeManagerMetrics.create();
    NodeStatusUpdater nodeStatusUpdater = new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics) {

        @Override
        protected ResourceTracker getRMClient() {
            return new LocalRMInterface();
        }

        ;

        @Override
        protected void stopRMProxy() {
            return;
        }

        @Override
        protected void startStatusUpdater() {
            // Don't start any updating thread.
            return;
        }

        @Override
        public long getRMIdentifier() {
            return SIMULATED_RM_IDENTIFIER;
        }
    };
    DummyContainerManager containerManager = new DummyContainerManager(context, exec, del, nodeStatusUpdater, metrics, dirsHandler);
    nodeStatusUpdater.init(conf);
    ((NMContext) context).setContainerManager(containerManager);
    nodeStatusUpdater.start();
    ((NMContext) context).setNodeStatusUpdater(nodeStatusUpdater);
    containerManager.init(conf);
    containerManager.start();
    ContainerLaunchContext launchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
    ApplicationId applicationId = ApplicationId.newInstance(0, 0);
    ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(applicationId, 0);
    ContainerId cID = ContainerId.newContainerId(applicationAttemptId, 0);
    String user = "testing";
    StartContainerRequest scRequest = StartContainerRequest.newInstance(launchContext, TestContainerManager.createContainerToken(cID, SIMULATED_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
    List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
    list.add(scRequest);
    StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
    containerManager.startContainers(allRequests);
    BaseContainerManagerTest.waitForContainerState(containerManager, cID, Arrays.asList(ContainerState.RUNNING, ContainerState.SCHEDULED), 20);
    List<ContainerId> containerIds = new ArrayList<ContainerId>();
    containerIds.add(cID);
    StopContainersRequest stopRequest = StopContainersRequest.newInstance(containerIds);
    containerManager.stopContainers(stopRequest);
    BaseContainerManagerTest.waitForContainerState(containerManager, cID, ContainerState.COMPLETE);
    containerManager.stop();
}
Also used : ArrayList(java.util.ArrayList) NMTokenSecretManagerInNM(org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) NMNullStateStoreService(org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NMContainerTokenSecretManager(org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager) NodeManagerMetrics(org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics) StopContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest) Path(org.apache.hadoop.fs.Path) FileContext(org.apache.hadoop.fs.FileContext) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) StartContainersRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) StartContainerRequest(org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) AsyncDispatcher(org.apache.hadoop.yarn.event.AsyncDispatcher) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) FileContext(org.apache.hadoop.fs.FileContext) BaseContainerManagerTest(org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest) Test(org.junit.Test)

Example 14 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestNodeStatusUpdater method testNMConnectionToRM.

@Test(timeout = 150000)
public void testNMConnectionToRM() throws Exception {
    final long delta = 50000;
    final long connectionWaitMs = 5000;
    final long connectionRetryIntervalMs = 1000;
    //Waiting for rmStartIntervalMS, RM will be started
    final long rmStartIntervalMS = 2 * 1000;
    conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, connectionWaitMs);
    conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, connectionRetryIntervalMs);
    //Test NM try to connect to RM Several times, but finally fail
    NodeManagerWithCustomNodeStatusUpdater nmWithUpdater;
    nm = nmWithUpdater = new NodeManagerWithCustomNodeStatusUpdater() {

        @Override
        protected NodeStatusUpdater createUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
            NodeStatusUpdater nodeStatusUpdater = new MyNodeStatusUpdater4(context, dispatcher, healthChecker, metrics, rmStartIntervalMS, true);
            return nodeStatusUpdater;
        }
    };
    nm.init(conf);
    long waitStartTime = System.currentTimeMillis();
    try {
        nm.start();
        Assert.fail("NM should have failed to start due to RM connect failure");
    } catch (Exception e) {
        long t = System.currentTimeMillis();
        long duration = t - waitStartTime;
        boolean waitTimeValid = (duration >= connectionWaitMs) && (duration < (connectionWaitMs + delta));
        if (!waitTimeValid) {
            //reject with the inner stack trace
            throw new Exception("NM should have tried re-connecting to RM during " + "period of at least " + connectionWaitMs + " ms, but " + "stopped retrying within " + (connectionWaitMs + delta) + " ms: " + e, e);
        }
    }
    //Test NM connect to RM, fail at first several attempts,
    //but finally success.
    nm = nmWithUpdater = new NodeManagerWithCustomNodeStatusUpdater() {

        @Override
        protected NodeStatusUpdater createUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
            NodeStatusUpdater nodeStatusUpdater = new MyNodeStatusUpdater4(context, dispatcher, healthChecker, metrics, rmStartIntervalMS, false);
            return nodeStatusUpdater;
        }
    };
    nm.init(conf);
    NodeStatusUpdater updater = nmWithUpdater.getUpdater();
    Assert.assertNotNull("Updater not yet created ", updater);
    waitStartTime = System.currentTimeMillis();
    try {
        nm.start();
    } catch (Exception ex) {
        LOG.error("NM should have started successfully " + "after connecting to RM.", ex);
        throw ex;
    }
    long duration = System.currentTimeMillis() - waitStartTime;
    MyNodeStatusUpdater4 myUpdater = (MyNodeStatusUpdater4) updater;
    Assert.assertTrue("NM started before updater triggered", myUpdater.isTriggered());
    Assert.assertTrue("NM should have connected to RM after " + "the start interval of " + rmStartIntervalMS + ": actual " + duration + " " + myUpdater, (duration >= rmStartIntervalMS));
    Assert.assertTrue("NM should have connected to RM less than " + (rmStartIntervalMS + delta) + " milliseconds of RM starting up: actual " + duration + " " + myUpdater, (duration < (rmStartIntervalMS + delta)));
}
Also used : FileContext(org.apache.hadoop.fs.FileContext) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) EOFException(java.io.EOFException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) Test(org.junit.Test)

Example 15 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestNodeStatusUpdater method testNoRegistrationWhenNMServicesFail.

/**
   * Verifies that if for some reason NM fails to start ContainerManager RPC
   * server, RM is oblivious to NM's presence. The behaviour is like this
   * because otherwise, NM will report to RM even if all its servers are not
   * started properly, RM will think that the NM is alive and will retire the NM
   * only after NM_EXPIRY interval. See MAPREDUCE-2749.
   */
@Test
public void testNoRegistrationWhenNMServicesFail() throws Exception {
    nm = new NodeManager() {

        @Override
        protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
            return new MyNodeStatusUpdater(context, dispatcher, healthChecker, metrics);
        }

        @Override
        protected ContainerManagerImpl createContainerManager(Context context, ContainerExecutor exec, DeletionService del, NodeStatusUpdater nodeStatusUpdater, ApplicationACLsManager aclsManager, LocalDirsHandlerService diskhandler) {
            return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater, metrics, diskhandler) {

                @Override
                protected void serviceStart() {
                    // Simulating failure of starting RPC server
                    throw new YarnRuntimeException("Starting of RPC Server failed");
                }
            };
        }
    };
    verifyNodeStartFailure("Starting of RPC Server failed");
}
Also used : FileContext(org.apache.hadoop.fs.FileContext) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) ContainerManagerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) ApplicationACLsManager(org.apache.hadoop.yarn.server.security.ApplicationACLsManager) Test(org.junit.Test)

Aggregations

Dispatcher (org.apache.hadoop.yarn.event.Dispatcher)56 Test (org.junit.Test)35 Configuration (org.apache.hadoop.conf.Configuration)26 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)21 Event (org.apache.hadoop.yarn.event.Event)18 AsyncDispatcher (org.apache.hadoop.yarn.event.AsyncDispatcher)15 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)14 FileContext (org.apache.hadoop.fs.FileContext)11 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)11 EventHandler (org.apache.hadoop.yarn.event.EventHandler)10 NMContext (org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext)10 RMContext (org.apache.hadoop.yarn.server.resourcemanager.RMContext)10 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)9 IOException (java.io.IOException)8 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)7 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)7 Container (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container)7 Path (org.apache.hadoop.fs.Path)6 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)6 ArrayList (java.util.ArrayList)5