use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.
the class TestNodeStatusUpdater method testStopReentrant.
@Test
public void testStopReentrant() throws Exception {
final AtomicInteger numCleanups = new AtomicInteger(0);
nm = new NodeManager() {
@Override
protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
MyNodeStatusUpdater myNodeStatusUpdater = new MyNodeStatusUpdater(context, dispatcher, healthChecker, metrics);
MyResourceTracker2 myResourceTracker2 = new MyResourceTracker2();
myResourceTracker2.heartBeatNodeAction = NodeAction.SHUTDOWN;
myNodeStatusUpdater.resourceTracker = myResourceTracker2;
return myNodeStatusUpdater;
}
@Override
protected ContainerManagerImpl createContainerManager(Context context, ContainerExecutor exec, DeletionService del, NodeStatusUpdater nodeStatusUpdater, ApplicationACLsManager aclsManager, LocalDirsHandlerService dirsHandler) {
return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater, metrics, dirsHandler) {
@Override
public void cleanUpApplicationsOnNMShutDown() {
super.cleanUpApplicationsOnNMShutDown();
numCleanups.incrementAndGet();
}
};
}
};
YarnConfiguration conf = createNMConfig();
nm.init(conf);
nm.start();
int waitCount = 0;
while (heartBeatID < 1 && waitCount++ != 200) {
Thread.sleep(500);
}
Assert.assertFalse(heartBeatID < 1);
// Meanwhile call stop directly as the shutdown hook would
nm.stop();
// NM takes a while to reach the STOPPED state.
waitCount = 0;
while (nm.getServiceState() != STATE.STOPPED && waitCount++ != 20) {
LOG.info("Waiting for NM to stop..");
Thread.sleep(1000);
}
Assert.assertEquals(STATE.STOPPED, nm.getServiceState());
// It further takes a while after NM reached the STOPPED state.
waitCount = 0;
while (numCleanups.get() == 0 && waitCount++ != 20) {
LOG.info("Waiting for NM shutdown..");
Thread.sleep(1000);
}
Assert.assertEquals(1, numCleanups.get());
}
use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.
the class TestNodeStatusUpdater method testNMRegistration.
@Test
public void testNMRegistration() throws InterruptedException, IOException {
nm = new NodeManager() {
@Override
protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
return new MyNodeStatusUpdater(context, dispatcher, healthChecker, metrics);
}
};
YarnConfiguration conf = createNMConfig();
nm.init(conf);
// verify that the last service is the nodeStatusUpdater (ie registration
// with RM)
Object[] services = nm.getServices().toArray();
Object lastService = services[services.length - 1];
Assert.assertTrue("last service is NOT the node status updater", lastService instanceof NodeStatusUpdater);
new Thread() {
public void run() {
try {
nm.start();
} catch (Throwable e) {
TestNodeStatusUpdater.this.nmStartError = e;
throw new YarnRuntimeException(e);
}
}
}.start();
System.out.println(" ----- thread already started.." + nm.getServiceState());
int waitCount = 0;
while (nm.getServiceState() == STATE.INITED && waitCount++ != 50) {
LOG.info("Waiting for NM to start..");
if (nmStartError != null) {
LOG.error("Error during startup. ", nmStartError);
Assert.fail(nmStartError.getCause().getMessage());
}
Thread.sleep(2000);
}
if (nm.getServiceState() != STATE.STARTED) {
// NM could have failed.
Assert.fail("NodeManager failed to start");
}
waitCount = 0;
while (heartBeatID <= 3 && waitCount++ != 200) {
Thread.sleep(1000);
}
Assert.assertFalse(heartBeatID <= 3);
Assert.assertEquals("Number of registered NMs is wrong!!", 1, this.registeredNodes.size());
nm.stop();
}
use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.
the class TestNodeStatusUpdater method testNMRMConnectionConf.
@Test(timeout = 100000)
public void testNMRMConnectionConf() throws Exception {
final long delta = 50000;
final long nmRmConnectionWaitMs = 100;
final long nmRmRetryInterval = 100;
final long connectionWaitMs = -1;
final long connectionRetryIntervalMs = 1000;
//Waiting for rmStartIntervalMS, RM will be started
final long rmStartIntervalMS = 2 * 1000;
conf.setLong(YarnConfiguration.NM_RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, nmRmConnectionWaitMs);
conf.setLong(YarnConfiguration.NM_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, nmRmRetryInterval);
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, connectionWaitMs);
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, connectionRetryIntervalMs);
conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 1);
//Test NM try to connect to RM Several times, but finally fail
NodeManagerWithCustomNodeStatusUpdater nmWithUpdater;
nm = nmWithUpdater = new NodeManagerWithCustomNodeStatusUpdater() {
@Override
protected NodeStatusUpdater createUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
NodeStatusUpdater nodeStatusUpdater = new MyNodeStatusUpdater6(context, dispatcher, healthChecker, metrics, rmStartIntervalMS, true);
return nodeStatusUpdater;
}
};
nm.init(conf);
long waitStartTime = System.currentTimeMillis();
try {
nm.start();
Assert.fail("NM should have failed to start due to RM connect failure");
} catch (Exception e) {
long t = System.currentTimeMillis();
long duration = t - waitStartTime;
boolean waitTimeValid = (duration >= nmRmConnectionWaitMs) && (duration < (nmRmConnectionWaitMs + delta));
if (!waitTimeValid) {
// throw exception if NM doesn't retry long enough
throw new Exception("NM should have tried re-connecting to RM during " + "period of at least " + nmRmConnectionWaitMs + " ms, but " + "stopped retrying within " + (nmRmConnectionWaitMs + delta) + " ms: " + e, e);
}
}
}
use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.
the class TestNodeStatusUpdaterForLabels method testNodeStatusUpdaterForNodeLabels.
@Test(timeout = 20000)
public void testNodeStatusUpdaterForNodeLabels() throws InterruptedException, IOException {
final ResourceTrackerForLabels resourceTracker = new ResourceTrackerForLabels();
nm = new NodeManager() {
@Override
protected NodeLabelsProvider createNodeLabelsProvider(Configuration conf) throws IOException {
return dummyLabelsProviderRef;
}
@Override
protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker, NodeLabelsProvider labelsProvider) {
return new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics, labelsProvider) {
@Override
protected ResourceTracker getRMClient() {
return resourceTracker;
}
@Override
protected void stopRMProxy() {
return;
}
};
}
};
YarnConfiguration conf = createNMConfigForDistributeNodeLabels();
conf.setLong(YarnConfiguration.NM_NODE_LABELS_RESYNC_INTERVAL, 2000);
conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, "0.0.0.0:" + ServerSocketUtil.getPort(8040, 10));
nm.init(conf);
resourceTracker.resetNMHeartbeatReceiveFlag();
nm.start();
resourceTracker.waitTillRegister();
assertNLCollectionEquals(dummyLabelsProviderRef.getNodeLabels(), resourceTracker.labels);
// wait till the first heartbeat
resourceTracker.waitTillHeartbeat();
resourceTracker.resetNMHeartbeatReceiveFlag();
// heartbeat with updated labels
dummyLabelsProviderRef.setNodeLabels(toNodeLabelSet("P"));
sendOutofBandHeartBeat();
resourceTracker.waitTillHeartbeat();
assertNLCollectionEquals(dummyLabelsProviderRef.getNodeLabels(), resourceTracker.labels);
resourceTracker.resetNMHeartbeatReceiveFlag();
// heartbeat without updating labels
sendOutofBandHeartBeat();
resourceTracker.waitTillHeartbeat();
resourceTracker.resetNMHeartbeatReceiveFlag();
assertNull("If no change in labels then null should be sent as part of request", resourceTracker.labels);
// provider return with null labels
dummyLabelsProviderRef.setNodeLabels(null);
sendOutofBandHeartBeat();
resourceTracker.waitTillHeartbeat();
assertNotNull("If provider sends null then empty label set should be sent and not null", resourceTracker.labels);
assertTrue("If provider sends null then empty labels should be sent", resourceTracker.labels.isEmpty());
resourceTracker.resetNMHeartbeatReceiveFlag();
// Since the resync interval is set to 2 sec in every alternate heartbeat
// the labels will be send along with heartbeat.In loop we sleep for 1 sec
// so that every sec 1 heartbeat is send.
int nullLabels = 0;
int nonNullLabels = 0;
dummyLabelsProviderRef.setNodeLabels(toNodeLabelSet("P1"));
for (int i = 0; i < 5; i++) {
sendOutofBandHeartBeat();
resourceTracker.waitTillHeartbeat();
if (null == resourceTracker.labels) {
nullLabels++;
} else {
Assert.assertEquals("In heartbeat PI labels should be send", toNodeLabelSet("P1"), resourceTracker.labels);
nonNullLabels++;
}
resourceTracker.resetNMHeartbeatReceiveFlag();
Thread.sleep(1000);
}
Assert.assertTrue("More than one heartbeat with empty labels expected", nullLabels > 1);
Assert.assertTrue("More than one heartbeat with labels expected", nonNullLabels > 1);
nm.stop();
}
use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.
the class TestContainerLaunch method verifyTailErrorLogOnContainerExit.
private void verifyTailErrorLogOnContainerExit(Configuration conf, String errorFileName, boolean testForMultipleErrFiles) throws Exception {
Container container = mock(Container.class);
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
ContainerId containerId = ContainerId.newContainerId(ApplicationAttemptId.newInstance(appId, 1), 1);
when(container.getContainerId()).thenReturn(containerId);
when(container.getUser()).thenReturn("test");
String relativeContainerLogDir = ContainerLaunch.getRelativeContainerLogDir(appId.toString(), containerId.toString());
Path containerLogDir = dirsHandler.getLogPathForWrite(relativeContainerLogDir, false);
ContainerLaunchContext clc = mock(ContainerLaunchContext.class);
List<String> invalidCommand = new ArrayList<String>();
invalidCommand.add("$JAVA_HOME/bin/java");
invalidCommand.add("-Djava.io.tmpdir=$PWD/tmp");
invalidCommand.add("-Dlog4j.configuration=container-log4j.properties");
invalidCommand.add("-Dyarn.app.container.log.dir=" + containerLogDir);
invalidCommand.add("-Dyarn.app.container.log.filesize=0");
invalidCommand.add("-Dhadoop.root.logger=INFO,CLA");
invalidCommand.add("-Dhadoop.root.logfile=syslog");
invalidCommand.add("-Xmx1024m");
invalidCommand.add("org.apache.hadoop.mapreduce.v2.app.MRAppMaster");
invalidCommand.add("1>" + containerLogDir + "/stdout");
invalidCommand.add("2>" + containerLogDir + errorFileName);
when(clc.getCommands()).thenReturn(invalidCommand);
Map<String, String> userSetEnv = new HashMap<String, String>();
userSetEnv.put(Environment.CONTAINER_ID.name(), "user_set_container_id");
userSetEnv.put("JAVA_HOME", INVALID_JAVA_HOME);
userSetEnv.put(Environment.NM_HOST.name(), "user_set_NM_HOST");
userSetEnv.put(Environment.NM_PORT.name(), "user_set_NM_PORT");
userSetEnv.put(Environment.NM_HTTP_PORT.name(), "user_set_NM_HTTP_PORT");
userSetEnv.put(Environment.LOCAL_DIRS.name(), "user_set_LOCAL_DIR");
userSetEnv.put(Environment.USER.key(), "user_set_" + Environment.USER.key());
userSetEnv.put(Environment.LOGNAME.name(), "user_set_LOGNAME");
userSetEnv.put(Environment.PWD.name(), "user_set_PWD");
userSetEnv.put(Environment.HOME.name(), "user_set_HOME");
userSetEnv.put(Environment.CLASSPATH.name(), "APATH");
when(clc.getEnvironment()).thenReturn(userSetEnv);
when(container.getLaunchContext()).thenReturn(clc);
when(container.getLocalizedResources()).thenReturn(Collections.<Path, List<String>>emptyMap());
Dispatcher dispatcher = mock(Dispatcher.class);
@SuppressWarnings("rawtypes") ContainerExitHandler eventHandler = new ContainerExitHandler(testForMultipleErrFiles);
when(dispatcher.getEventHandler()).thenReturn(eventHandler);
Application app = mock(Application.class);
when(app.getAppId()).thenReturn(appId);
when(app.getUser()).thenReturn("test");
Credentials creds = mock(Credentials.class);
when(container.getCredentials()).thenReturn(creds);
((NMContext) context).setNodeId(NodeId.newInstance("127.0.0.1", HTTP_PORT));
ContainerLaunch launch = new ContainerLaunch(context, conf, dispatcher, exec, app, container, dirsHandler, containerManager);
launch.call();
Assert.assertTrue("ContainerExitEvent should have occured", eventHandler.isContainerExitEventOccured());
}
Aggregations