Search in sources :

Example 41 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestNodeStatusUpdater method testStopReentrant.

@Test
public void testStopReentrant() throws Exception {
    final AtomicInteger numCleanups = new AtomicInteger(0);
    nm = new NodeManager() {

        @Override
        protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
            MyNodeStatusUpdater myNodeStatusUpdater = new MyNodeStatusUpdater(context, dispatcher, healthChecker, metrics);
            MyResourceTracker2 myResourceTracker2 = new MyResourceTracker2();
            myResourceTracker2.heartBeatNodeAction = NodeAction.SHUTDOWN;
            myNodeStatusUpdater.resourceTracker = myResourceTracker2;
            return myNodeStatusUpdater;
        }

        @Override
        protected ContainerManagerImpl createContainerManager(Context context, ContainerExecutor exec, DeletionService del, NodeStatusUpdater nodeStatusUpdater, ApplicationACLsManager aclsManager, LocalDirsHandlerService dirsHandler) {
            return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater, metrics, dirsHandler) {

                @Override
                public void cleanUpApplicationsOnNMShutDown() {
                    super.cleanUpApplicationsOnNMShutDown();
                    numCleanups.incrementAndGet();
                }
            };
        }
    };
    YarnConfiguration conf = createNMConfig();
    nm.init(conf);
    nm.start();
    int waitCount = 0;
    while (heartBeatID < 1 && waitCount++ != 200) {
        Thread.sleep(500);
    }
    Assert.assertFalse(heartBeatID < 1);
    // Meanwhile call stop directly as the shutdown hook would
    nm.stop();
    // NM takes a while to reach the STOPPED state.
    waitCount = 0;
    while (nm.getServiceState() != STATE.STOPPED && waitCount++ != 20) {
        LOG.info("Waiting for NM to stop..");
        Thread.sleep(1000);
    }
    Assert.assertEquals(STATE.STOPPED, nm.getServiceState());
    // It further takes a while after NM reached the STOPPED state.
    waitCount = 0;
    while (numCleanups.get() == 0 && waitCount++ != 20) {
        LOG.info("Waiting for NM shutdown..");
        Thread.sleep(1000);
    }
    Assert.assertEquals(1, numCleanups.get());
}
Also used : FileContext(org.apache.hadoop.fs.FileContext) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) ContainerManagerImpl(org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl) ApplicationACLsManager(org.apache.hadoop.yarn.server.security.ApplicationACLsManager) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Test(org.junit.Test)

Example 42 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestNodeStatusUpdater method testNMRegistration.

@Test
public void testNMRegistration() throws InterruptedException, IOException {
    nm = new NodeManager() {

        @Override
        protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
            return new MyNodeStatusUpdater(context, dispatcher, healthChecker, metrics);
        }
    };
    YarnConfiguration conf = createNMConfig();
    nm.init(conf);
    // verify that the last service is the nodeStatusUpdater (ie registration
    // with RM)
    Object[] services = nm.getServices().toArray();
    Object lastService = services[services.length - 1];
    Assert.assertTrue("last service is NOT the node status updater", lastService instanceof NodeStatusUpdater);
    new Thread() {

        public void run() {
            try {
                nm.start();
            } catch (Throwable e) {
                TestNodeStatusUpdater.this.nmStartError = e;
                throw new YarnRuntimeException(e);
            }
        }
    }.start();
    System.out.println(" ----- thread already started.." + nm.getServiceState());
    int waitCount = 0;
    while (nm.getServiceState() == STATE.INITED && waitCount++ != 50) {
        LOG.info("Waiting for NM to start..");
        if (nmStartError != null) {
            LOG.error("Error during startup. ", nmStartError);
            Assert.fail(nmStartError.getCause().getMessage());
        }
        Thread.sleep(2000);
    }
    if (nm.getServiceState() != STATE.STARTED) {
        // NM could have failed.
        Assert.fail("NodeManager failed to start");
    }
    waitCount = 0;
    while (heartBeatID <= 3 && waitCount++ != 200) {
        Thread.sleep(1000);
    }
    Assert.assertFalse(heartBeatID <= 3);
    Assert.assertEquals("Number of registered NMs is wrong!!", 1, this.registeredNodes.size());
    nm.stop();
}
Also used : FileContext(org.apache.hadoop.fs.FileContext) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Test(org.junit.Test)

Example 43 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestNodeStatusUpdater method testNMRMConnectionConf.

@Test(timeout = 100000)
public void testNMRMConnectionConf() throws Exception {
    final long delta = 50000;
    final long nmRmConnectionWaitMs = 100;
    final long nmRmRetryInterval = 100;
    final long connectionWaitMs = -1;
    final long connectionRetryIntervalMs = 1000;
    //Waiting for rmStartIntervalMS, RM will be started
    final long rmStartIntervalMS = 2 * 1000;
    conf.setLong(YarnConfiguration.NM_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, nmRmConnectionWaitMs);
    conf.setLong(YarnConfiguration.NM_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, nmRmRetryInterval);
    conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, connectionWaitMs);
    conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, connectionRetryIntervalMs);
    conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 1);
    //Test NM try to connect to RM Several times, but finally fail
    NodeManagerWithCustomNodeStatusUpdater nmWithUpdater;
    nm = nmWithUpdater = new NodeManagerWithCustomNodeStatusUpdater() {

        @Override
        protected NodeStatusUpdater createUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
            NodeStatusUpdater nodeStatusUpdater = new MyNodeStatusUpdater6(context, dispatcher, healthChecker, metrics, rmStartIntervalMS, true);
            return nodeStatusUpdater;
        }
    };
    nm.init(conf);
    long waitStartTime = System.currentTimeMillis();
    try {
        nm.start();
        Assert.fail("NM should have failed to start due to RM connect failure");
    } catch (Exception e) {
        long t = System.currentTimeMillis();
        long duration = t - waitStartTime;
        boolean waitTimeValid = (duration >= nmRmConnectionWaitMs) && (duration < (nmRmConnectionWaitMs + delta));
        if (!waitTimeValid) {
            // throw exception if NM doesn't retry long enough
            throw new Exception("NM should have tried re-connecting to RM during " + "period of at least " + nmRmConnectionWaitMs + " ms, but " + "stopped retrying within " + (nmRmConnectionWaitMs + delta) + " ms: " + e, e);
        }
    }
}
Also used : FileContext(org.apache.hadoop.fs.FileContext) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) EOFException(java.io.EOFException) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) Test(org.junit.Test)

Example 44 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestNodeStatusUpdaterForLabels method testNodeStatusUpdaterForNodeLabels.

@Test(timeout = 20000)
public void testNodeStatusUpdaterForNodeLabels() throws InterruptedException, IOException {
    final ResourceTrackerForLabels resourceTracker = new ResourceTrackerForLabels();
    nm = new NodeManager() {

        @Override
        protected NodeLabelsProvider createNodeLabelsProvider(Configuration conf) throws IOException {
            return dummyLabelsProviderRef;
        }

        @Override
        protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker, NodeLabelsProvider labelsProvider) {
            return new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics, labelsProvider) {

                @Override
                protected ResourceTracker getRMClient() {
                    return resourceTracker;
                }

                @Override
                protected void stopRMProxy() {
                    return;
                }
            };
        }
    };
    YarnConfiguration conf = createNMConfigForDistributeNodeLabels();
    conf.setLong(YarnConfiguration.NM_NODE_LABELS_RESYNC_INTERVAL, 2000);
    conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, "0.0.0.0:" + ServerSocketUtil.getPort(8040, 10));
    nm.init(conf);
    resourceTracker.resetNMHeartbeatReceiveFlag();
    nm.start();
    resourceTracker.waitTillRegister();
    assertNLCollectionEquals(dummyLabelsProviderRef.getNodeLabels(), resourceTracker.labels);
    // wait till the first heartbeat
    resourceTracker.waitTillHeartbeat();
    resourceTracker.resetNMHeartbeatReceiveFlag();
    // heartbeat with updated labels
    dummyLabelsProviderRef.setNodeLabels(toNodeLabelSet("P"));
    sendOutofBandHeartBeat();
    resourceTracker.waitTillHeartbeat();
    assertNLCollectionEquals(dummyLabelsProviderRef.getNodeLabels(), resourceTracker.labels);
    resourceTracker.resetNMHeartbeatReceiveFlag();
    // heartbeat without updating labels
    sendOutofBandHeartBeat();
    resourceTracker.waitTillHeartbeat();
    resourceTracker.resetNMHeartbeatReceiveFlag();
    assertNull("If no change in labels then null should be sent as part of request", resourceTracker.labels);
    // provider return with null labels
    dummyLabelsProviderRef.setNodeLabels(null);
    sendOutofBandHeartBeat();
    resourceTracker.waitTillHeartbeat();
    assertNotNull("If provider sends null then empty label set should be sent and not null", resourceTracker.labels);
    assertTrue("If provider sends null then empty labels should be sent", resourceTracker.labels.isEmpty());
    resourceTracker.resetNMHeartbeatReceiveFlag();
    // Since the resync interval is set to 2 sec in every alternate heartbeat
    // the labels will be send along with heartbeat.In loop we sleep for 1 sec
    // so that every sec 1 heartbeat is send.
    int nullLabels = 0;
    int nonNullLabels = 0;
    dummyLabelsProviderRef.setNodeLabels(toNodeLabelSet("P1"));
    for (int i = 0; i < 5; i++) {
        sendOutofBandHeartBeat();
        resourceTracker.waitTillHeartbeat();
        if (null == resourceTracker.labels) {
            nullLabels++;
        } else {
            Assert.assertEquals("In heartbeat PI labels should be send", toNodeLabelSet("P1"), resourceTracker.labels);
            nonNullLabels++;
        }
        resourceTracker.resetNMHeartbeatReceiveFlag();
        Thread.sleep(1000);
    }
    Assert.assertTrue("More than one heartbeat with empty labels expected", nullLabels > 1);
    Assert.assertTrue("More than one heartbeat with labels expected", nonNullLabels > 1);
    nm.stop();
}
Also used : YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.hadoop.conf.Configuration) NodeLabelsProvider(org.apache.hadoop.yarn.server.nodemanager.nodelabels.NodeLabelsProvider) ResourceTracker(org.apache.hadoop.yarn.server.api.ResourceTracker) IOException(java.io.IOException) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Test(org.junit.Test)

Example 45 with Dispatcher

use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.

the class TestContainerLaunch method verifyTailErrorLogOnContainerExit.

private void verifyTailErrorLogOnContainerExit(Configuration conf, String errorFileName, boolean testForMultipleErrFiles) throws Exception {
    Container container = mock(Container.class);
    ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
    ContainerId containerId = ContainerId.newContainerId(ApplicationAttemptId.newInstance(appId, 1), 1);
    when(container.getContainerId()).thenReturn(containerId);
    when(container.getUser()).thenReturn("test");
    String relativeContainerLogDir = ContainerLaunch.getRelativeContainerLogDir(appId.toString(), containerId.toString());
    Path containerLogDir = dirsHandler.getLogPathForWrite(relativeContainerLogDir, false);
    ContainerLaunchContext clc = mock(ContainerLaunchContext.class);
    List<String> invalidCommand = new ArrayList<String>();
    invalidCommand.add("$JAVA_HOME/bin/java");
    invalidCommand.add("-Djava.io.tmpdir=$PWD/tmp");
    invalidCommand.add("-Dlog4j.configuration=container-log4j.properties");
    invalidCommand.add("-Dyarn.app.container.log.dir=" + containerLogDir);
    invalidCommand.add("-Dyarn.app.container.log.filesize=0");
    invalidCommand.add("-Dhadoop.root.logger=INFO,CLA");
    invalidCommand.add("-Dhadoop.root.logfile=syslog");
    invalidCommand.add("-Xmx1024m");
    invalidCommand.add("org.apache.hadoop.mapreduce.v2.app.MRAppMaster");
    invalidCommand.add("1>" + containerLogDir + "/stdout");
    invalidCommand.add("2>" + containerLogDir + errorFileName);
    when(clc.getCommands()).thenReturn(invalidCommand);
    Map<String, String> userSetEnv = new HashMap<String, String>();
    userSetEnv.put(Environment.CONTAINER_ID.name(), "user_set_container_id");
    userSetEnv.put("JAVA_HOME", INVALID_JAVA_HOME);
    userSetEnv.put(Environment.NM_HOST.name(), "user_set_NM_HOST");
    userSetEnv.put(Environment.NM_PORT.name(), "user_set_NM_PORT");
    userSetEnv.put(Environment.NM_HTTP_PORT.name(), "user_set_NM_HTTP_PORT");
    userSetEnv.put(Environment.LOCAL_DIRS.name(), "user_set_LOCAL_DIR");
    userSetEnv.put(Environment.USER.key(), "user_set_" + Environment.USER.key());
    userSetEnv.put(Environment.LOGNAME.name(), "user_set_LOGNAME");
    userSetEnv.put(Environment.PWD.name(), "user_set_PWD");
    userSetEnv.put(Environment.HOME.name(), "user_set_HOME");
    userSetEnv.put(Environment.CLASSPATH.name(), "APATH");
    when(clc.getEnvironment()).thenReturn(userSetEnv);
    when(container.getLaunchContext()).thenReturn(clc);
    when(container.getLocalizedResources()).thenReturn(Collections.<Path, List<String>>emptyMap());
    Dispatcher dispatcher = mock(Dispatcher.class);
    @SuppressWarnings("rawtypes") ContainerExitHandler eventHandler = new ContainerExitHandler(testForMultipleErrFiles);
    when(dispatcher.getEventHandler()).thenReturn(eventHandler);
    Application app = mock(Application.class);
    when(app.getAppId()).thenReturn(appId);
    when(app.getUser()).thenReturn("test");
    Credentials creds = mock(Credentials.class);
    when(container.getCredentials()).thenReturn(creds);
    ((NMContext) context).setNodeId(NodeId.newInstance("127.0.0.1", HTTP_PORT));
    ContainerLaunch launch = new ContainerLaunch(context, conf, dispatcher, exec, app, container, dirsHandler, containerManager);
    launch.call();
    Assert.assertTrue("ContainerExitEvent should have occured", eventHandler.isContainerExitEventOccured());
}
Also used : Path(org.apache.hadoop.fs.Path) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) Dispatcher(org.apache.hadoop.yarn.event.Dispatcher) Container(org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container) NMContext(org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Application(org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application) Credentials(org.apache.hadoop.security.Credentials)

Aggregations

Dispatcher (org.apache.hadoop.yarn.event.Dispatcher)56 Test (org.junit.Test)35 Configuration (org.apache.hadoop.conf.Configuration)26 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)21 Event (org.apache.hadoop.yarn.event.Event)18 AsyncDispatcher (org.apache.hadoop.yarn.event.AsyncDispatcher)15 ContainerLaunchContext (org.apache.hadoop.yarn.api.records.ContainerLaunchContext)14 FileContext (org.apache.hadoop.fs.FileContext)11 DrainDispatcher (org.apache.hadoop.yarn.event.DrainDispatcher)11 EventHandler (org.apache.hadoop.yarn.event.EventHandler)10 NMContext (org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext)10 RMContext (org.apache.hadoop.yarn.server.resourcemanager.RMContext)10 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)9 IOException (java.io.IOException)8 Job (org.apache.hadoop.mapreduce.v2.app.job.Job)7 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)7 Container (org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container)7 Path (org.apache.hadoop.fs.Path)6 JobId (org.apache.hadoop.mapreduce.v2.api.records.JobId)6 ArrayList (java.util.ArrayList)5