use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.
the class ResourceManager method resetDispatcher.
private void resetDispatcher() {
Dispatcher dispatcher = setupDispatcher();
((Service) dispatcher).init(this.conf);
((Service) dispatcher).start();
removeService((Service) rmDispatcher);
// Need to stop previous rmDispatcher before assigning new dispatcher
// otherwise causes "AsyncDispatcher event handler" thread leak
((Service) rmDispatcher).stop();
rmDispatcher = dispatcher;
addIfService(rmDispatcher);
rmContext.setDispatcher(rmDispatcher);
}
use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.
the class RMHATestBase method startRMs.
protected void startRMs() throws IOException {
rm1 = new MockRM(confForRM1, null, false, false) {
@Override
protected Dispatcher createDispatcher() {
return new DrainDispatcher();
}
};
rm2 = new MockRM(confForRM2, null, false, false) {
@Override
protected Dispatcher createDispatcher() {
return new DrainDispatcher();
}
};
startRMs(rm1, confForRM1, rm2, confForRM2);
}
use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.
the class TestEventFlow method testSuccessfulContainerLaunch.
@Test
public void testSuccessfulContainerLaunch() throws InterruptedException, IOException, YarnException {
FileContext localFS = FileContext.getLocalFSFileContext();
localFS.delete(new Path(localDir.getAbsolutePath()), true);
localFS.delete(new Path(localLogDir.getAbsolutePath()), true);
localFS.delete(new Path(remoteLogDir.getAbsolutePath()), true);
localDir.mkdir();
localLogDir.mkdir();
remoteLogDir.mkdir();
YarnConfiguration conf = new YarnConfiguration();
Context context = new NMContext(new NMContainerTokenSecretManager(conf), new NMTokenSecretManagerInNM(), null, null, new NMNullStateStoreService(), false, conf) {
@Override
public int getHttpPort() {
return 1234;
}
};
conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath());
conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogDir.getAbsolutePath());
conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, "0.0.0.0:" + ServerSocketUtil.getPort(8040, 10));
ContainerExecutor exec = new DefaultContainerExecutor();
exec.setConf(conf);
DeletionService del = new DeletionService(exec);
Dispatcher dispatcher = new AsyncDispatcher();
LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
healthChecker.init(conf);
NodeManagerMetrics metrics = NodeManagerMetrics.create();
NodeStatusUpdater nodeStatusUpdater = new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics) {
@Override
protected ResourceTracker getRMClient() {
return new LocalRMInterface();
}
;
@Override
protected void stopRMProxy() {
return;
}
@Override
protected void startStatusUpdater() {
// Don't start any updating thread.
return;
}
@Override
public long getRMIdentifier() {
return SIMULATED_RM_IDENTIFIER;
}
};
DummyContainerManager containerManager = new DummyContainerManager(context, exec, del, nodeStatusUpdater, metrics, dirsHandler);
nodeStatusUpdater.init(conf);
((NMContext) context).setContainerManager(containerManager);
nodeStatusUpdater.start();
((NMContext) context).setNodeStatusUpdater(nodeStatusUpdater);
containerManager.init(conf);
containerManager.start();
ContainerLaunchContext launchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
ApplicationId applicationId = ApplicationId.newInstance(0, 0);
ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(applicationId, 0);
ContainerId cID = ContainerId.newContainerId(applicationAttemptId, 0);
String user = "testing";
StartContainerRequest scRequest = StartContainerRequest.newInstance(launchContext, TestContainerManager.createContainerToken(cID, SIMULATED_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
BaseContainerManagerTest.waitForContainerState(containerManager, cID, Arrays.asList(ContainerState.RUNNING, ContainerState.SCHEDULED), 20);
List<ContainerId> containerIds = new ArrayList<ContainerId>();
containerIds.add(cID);
StopContainersRequest stopRequest = StopContainersRequest.newInstance(containerIds);
containerManager.stopContainers(stopRequest);
BaseContainerManagerTest.waitForContainerState(containerManager, cID, ContainerState.COMPLETE);
containerManager.stop();
}
use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.
the class TestNodeStatusUpdater method testNMConnectionToRM.
@Test(timeout = 150000)
public void testNMConnectionToRM() throws Exception {
final long delta = 50000;
final long connectionWaitMs = 5000;
final long connectionRetryIntervalMs = 1000;
//Waiting for rmStartIntervalMS, RM will be started
final long rmStartIntervalMS = 2 * 1000;
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, connectionWaitMs);
conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, connectionRetryIntervalMs);
//Test NM try to connect to RM Several times, but finally fail
NodeManagerWithCustomNodeStatusUpdater nmWithUpdater;
nm = nmWithUpdater = new NodeManagerWithCustomNodeStatusUpdater() {
@Override
protected NodeStatusUpdater createUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
NodeStatusUpdater nodeStatusUpdater = new MyNodeStatusUpdater4(context, dispatcher, healthChecker, metrics, rmStartIntervalMS, true);
return nodeStatusUpdater;
}
};
nm.init(conf);
long waitStartTime = System.currentTimeMillis();
try {
nm.start();
Assert.fail("NM should have failed to start due to RM connect failure");
} catch (Exception e) {
long t = System.currentTimeMillis();
long duration = t - waitStartTime;
boolean waitTimeValid = (duration >= connectionWaitMs) && (duration < (connectionWaitMs + delta));
if (!waitTimeValid) {
//reject with the inner stack trace
throw new Exception("NM should have tried re-connecting to RM during " + "period of at least " + connectionWaitMs + " ms, but " + "stopped retrying within " + (connectionWaitMs + delta) + " ms: " + e, e);
}
}
//Test NM connect to RM, fail at first several attempts,
//but finally success.
nm = nmWithUpdater = new NodeManagerWithCustomNodeStatusUpdater() {
@Override
protected NodeStatusUpdater createUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
NodeStatusUpdater nodeStatusUpdater = new MyNodeStatusUpdater4(context, dispatcher, healthChecker, metrics, rmStartIntervalMS, false);
return nodeStatusUpdater;
}
};
nm.init(conf);
NodeStatusUpdater updater = nmWithUpdater.getUpdater();
Assert.assertNotNull("Updater not yet created ", updater);
waitStartTime = System.currentTimeMillis();
try {
nm.start();
} catch (Exception ex) {
LOG.error("NM should have started successfully " + "after connecting to RM.", ex);
throw ex;
}
long duration = System.currentTimeMillis() - waitStartTime;
MyNodeStatusUpdater4 myUpdater = (MyNodeStatusUpdater4) updater;
Assert.assertTrue("NM started before updater triggered", myUpdater.isTriggered());
Assert.assertTrue("NM should have connected to RM after " + "the start interval of " + rmStartIntervalMS + ": actual " + duration + " " + myUpdater, (duration >= rmStartIntervalMS));
Assert.assertTrue("NM should have connected to RM less than " + (rmStartIntervalMS + delta) + " milliseconds of RM starting up: actual " + duration + " " + myUpdater, (duration < (rmStartIntervalMS + delta)));
}
use of org.apache.hadoop.yarn.event.Dispatcher in project hadoop by apache.
the class TestNodeStatusUpdater method testNoRegistrationWhenNMServicesFail.
/**
* Verifies that if for some reason NM fails to start ContainerManager RPC
* server, RM is oblivious to NM's presence. The behaviour is like this
* because otherwise, NM will report to RM even if all its servers are not
* started properly, RM will think that the NM is alive and will retire the NM
* only after NM_EXPIRY interval. See MAPREDUCE-2749.
*/
@Test
public void testNoRegistrationWhenNMServicesFail() throws Exception {
nm = new NodeManager() {
@Override
protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
return new MyNodeStatusUpdater(context, dispatcher, healthChecker, metrics);
}
@Override
protected ContainerManagerImpl createContainerManager(Context context, ContainerExecutor exec, DeletionService del, NodeStatusUpdater nodeStatusUpdater, ApplicationACLsManager aclsManager, LocalDirsHandlerService diskhandler) {
return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater, metrics, diskhandler) {
@Override
protected void serviceStart() {
// Simulating failure of starting RPC server
throw new YarnRuntimeException("Starting of RPC Server failed");
}
};
}
};
verifyNodeStartFailure("Starting of RPC Server failed");
}
Aggregations