use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.
the class TestAMRMClientOnRMRestart method testAMRMClientOnAMRMTokenRollOverOnRMRestart.
// Test verify for AM issued with rolled-over AMRMToken
// is still able to communicate with restarted RM.
@Test(timeout = 30000)
public void testAMRMClientOnAMRMTokenRollOverOnRMRestart() throws Exception {
conf.setLong(YarnConfiguration.RM_AMRM_TOKEN_MASTER_KEY_ROLLING_INTERVAL_SECS, rolling_interval_sec);
conf.setLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, am_expire_ms);
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// start first RM
MyResourceManager2 rm1 = new MyResourceManager2(conf, memStore);
rm1.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm1.getRMContext().getDispatcher();
Long startTime = System.currentTimeMillis();
// Submit the application
RMApp app = rm1.submitApp(1024);
dispatcher.await();
MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// Node heartbeat
nm1.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm1.sendAMLaunched(appAttemptId);
dispatcher.await();
AMRMTokenSecretManager amrmTokenSecretManagerForRM1 = rm1.getRMContext().getAMRMTokenSecretManager();
org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> token = amrmTokenSecretManagerForRM1.createAndGetAMRMToken(appAttemptId);
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
ugi.addTokenIdentifier(token.decodeIdentifier());
AMRMClient<ContainerRequest> amClient = new MyAMRMClientImpl(rm1);
amClient.init(conf);
amClient.start();
amClient.registerApplicationMaster("h1", 10000, "");
amClient.allocate(0.1f);
// At mean time, the old AMRMToken should continue to work
while (System.currentTimeMillis() - startTime < rolling_interval_sec * 1000) {
amClient.allocate(0.1f);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// DO NOTHING
}
}
Assert.assertTrue(amrmTokenSecretManagerForRM1.getMasterKey().getMasterKey().getKeyId() != token.decodeIdentifier().getKeyId());
amClient.allocate(0.1f);
// active the nextMasterKey, and replace the currentMasterKey
org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> newToken = amrmTokenSecretManagerForRM1.createAndGetAMRMToken(appAttemptId);
int waitCount = 0;
while (waitCount++ <= 50) {
if (amrmTokenSecretManagerForRM1.getCurrnetMasterKeyData().getMasterKey().getKeyId() != token.decodeIdentifier().getKeyId()) {
break;
}
try {
amClient.allocate(0.1f);
} catch (Exception ex) {
break;
}
Thread.sleep(500);
}
Assert.assertTrue(amrmTokenSecretManagerForRM1.getNextMasterKeyData() == null);
Assert.assertTrue(amrmTokenSecretManagerForRM1.getCurrnetMasterKeyData().getMasterKey().getKeyId() == newToken.decodeIdentifier().getKeyId());
// start 2nd RM
conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, "0.0.0.0:" + ServerSocketUtil.getPort(45020, 10));
final MyResourceManager2 rm2 = new MyResourceManager2(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
((MyAMRMClientImpl) amClient).updateRMProxy(rm2);
dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher();
AMRMTokenSecretManager amrmTokenSecretManagerForRM2 = rm2.getRMContext().getAMRMTokenSecretManager();
Assert.assertTrue(amrmTokenSecretManagerForRM2.getCurrnetMasterKeyData().getMasterKey().getKeyId() == newToken.decodeIdentifier().getKeyId());
Assert.assertTrue(amrmTokenSecretManagerForRM2.getNextMasterKeyData() == null);
try {
UserGroupInformation testUser = UserGroupInformation.createRemoteUser("testUser");
SecurityUtil.setTokenService(token, rm2.getApplicationMasterService().getBindAddress());
testUser.addToken(token);
testUser.doAs(new PrivilegedAction<ApplicationMasterProtocol>() {
@Override
public ApplicationMasterProtocol run() {
return (ApplicationMasterProtocol) YarnRPC.create(conf).getProxy(ApplicationMasterProtocol.class, rm2.getApplicationMasterService().getBindAddress(), conf);
}
}).allocate(Records.newRecord(AllocateRequest.class));
Assert.fail("The old Token should not work");
} catch (Exception ex) {
Assert.assertTrue(ex instanceof InvalidToken);
Assert.assertTrue(ex.getMessage().contains("Invalid AMRMToken from " + token.decodeIdentifier().getApplicationAttemptId()));
}
// make sure the recovered AMRMToken works for new RM
amClient.allocate(0.1f);
amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
amClient.stop();
rm1.stop();
rm2.stop();
}
use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.
the class TestAMRMClientOnRMRestart method testAMRMClientForUnregisterAMOnRMRestart.
// Test verify for
// 1. AM try to unregister without registering
// 2. AM register to RM, and try to unregister immediately after RM restart
@Test(timeout = 60000)
public void testAMRMClientForUnregisterAMOnRMRestart() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore();
memStore.init(conf);
// Phase-1 Start 1st RM
MyResourceManager rm1 = new MyResourceManager(conf, memStore);
rm1.start();
DrainDispatcher dispatcher = (DrainDispatcher) rm1.getRMContext().getDispatcher();
// Submit the application
RMApp app = rm1.submitApp(1024);
dispatcher.await();
MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// Node heartbeat
nm1.nodeHeartbeat(true);
dispatcher.await();
ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt().getAppAttemptId();
rm1.sendAMLaunched(appAttemptId);
dispatcher.await();
org.apache.hadoop.security.token.Token<AMRMTokenIdentifier> token = rm1.getRMContext().getRMApps().get(appAttemptId.getApplicationId()).getRMAppAttempt(appAttemptId).getAMRMToken();
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
ugi.addTokenIdentifier(token.decodeIdentifier());
AMRMClient<ContainerRequest> amClient = new MyAMRMClientImpl(rm1);
amClient.init(conf);
amClient.start();
amClient.registerApplicationMaster("h1", 10000, "");
amClient.allocate(0.1f);
// Phase-2 start 2nd RM is up
MyResourceManager rm2 = new MyResourceManager(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
((MyAMRMClientImpl) amClient).updateRMProxy(rm2);
dispatcher = (DrainDispatcher) rm2.getRMContext().getDispatcher();
// NM should be rebooted on heartbeat, even first heartbeat for nm2
NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
// new NM to represent NM re-register
nm1 = new MockNM("h1:1234", 10240, rm2.getResourceTrackerService());
ContainerId containerId = ContainerId.newContainerId(appAttemptId, 1);
NMContainerStatus containerReport = NMContainerStatus.newInstance(containerId, 0, ContainerState.RUNNING, Resource.newInstance(1024, 1), "recover container", 0, Priority.newInstance(0), 0);
nm1.registerNode(Arrays.asList(containerReport), null);
nm1.nodeHeartbeat(true);
dispatcher.await();
amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null);
rm2.waitForState(appAttemptId, RMAppAttemptState.FINISHING);
nm1.nodeHeartbeat(appAttemptId, 1, ContainerState.COMPLETE);
rm2.waitForState(appAttemptId, RMAppAttemptState.FINISHED);
rm2.waitForState(app.getApplicationId(), RMAppState.FINISHED);
amClient.stop();
rm1.stop();
rm2.stop();
}
use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.
the class RMHATestBase method startRMs.
protected void startRMs() throws IOException {
rm1 = new MockRM(confForRM1, null, false, false) {
@Override
protected Dispatcher createDispatcher() {
return new DrainDispatcher();
}
};
rm2 = new MockRM(confForRM2, null, false, false) {
@Override
protected Dispatcher createDispatcher() {
return new DrainDispatcher();
}
};
startRMs(rm1, confForRM1, rm2, confForRM2);
}
use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.
the class ReservationACLsTestBase method registerNode.
private void registerNode(String host, int memory, int vCores) throws Exception {
try {
resourceManager.registerNode(host, memory, vCores);
int attempts = 10;
Collection<Plan> plans;
do {
DrainDispatcher dispatcher = (DrainDispatcher) resourceManager.getRMContext().getDispatcher();
dispatcher.await();
LOG.info("Waiting for node capacity to be added to plan");
plans = resourceManager.getRMContext().getReservationSystem().getAllPlans().values();
if (checkCapacity(plans)) {
break;
}
Thread.sleep(100);
} while (attempts-- > 0);
if (attempts <= 0) {
Assert.fail("Exhausted attempts in checking if node capacity was " + "added to the plan");
}
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.hadoop.yarn.event.DrainDispatcher in project hadoop by apache.
the class TestResourceLocalizationService method testPublicResourceInitializesLocalDir.
@Test
@SuppressWarnings("unchecked")
public void testPublicResourceInitializesLocalDir() throws Exception {
// Setup state to simulate restart NM with existing state meaning no
// directory creation during initialization
NMStateStoreService spyStateStore = spy(nmContext.getNMStateStore());
when(spyStateStore.canRecover()).thenReturn(true);
NMContext spyContext = spy(nmContext);
when(spyContext.getNMStateStore()).thenReturn(spyStateStore);
List<Path> localDirs = new ArrayList<Path>();
String[] sDirs = new String[4];
for (int i = 0; i < 4; ++i) {
localDirs.add(lfs.makeQualified(new Path(basedir, i + "")));
sDirs[i] = localDirs.get(i).toString();
}
conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, sDirs);
DrainDispatcher dispatcher = new DrainDispatcher();
EventHandler<ApplicationEvent> applicationBus = mock(EventHandler.class);
dispatcher.register(ApplicationEventType.class, applicationBus);
EventHandler<ContainerEvent> containerBus = mock(EventHandler.class);
dispatcher.register(ContainerEventType.class, containerBus);
ContainerExecutor exec = mock(ContainerExecutor.class);
DeletionService delService = mock(DeletionService.class);
LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
dirsHandler.init(conf);
dispatcher.init(conf);
dispatcher.start();
try {
ResourceLocalizationService rawService = new ResourceLocalizationService(dispatcher, exec, delService, dirsHandler, spyContext);
ResourceLocalizationService spyService = spy(rawService);
doReturn(mockServer).when(spyService).createServer();
doReturn(lfs).when(spyService).getLocalFileContext(isA(Configuration.class));
spyService.init(conf);
final FsPermission defaultPerm = new FsPermission((short) 0755);
// verify directory is not created at initialization
for (Path p : localDirs) {
p = new Path((new URI(p.toString())).getPath());
Path publicCache = new Path(p, ContainerLocalizer.FILECACHE);
verify(spylfs, never()).mkdir(eq(publicCache), eq(defaultPerm), eq(true));
}
spyService.start();
final String user = "user0";
// init application
final Application app = mock(Application.class);
final ApplicationId appId = BuilderUtils.newApplicationId(314159265358979L, 3);
when(app.getUser()).thenReturn(user);
when(app.getAppId()).thenReturn(appId);
spyService.handle(new ApplicationLocalizationEvent(LocalizationEventType.INIT_APPLICATION_RESOURCES, app));
dispatcher.await();
// init container.
final Container c = getMockContainer(appId, 42, user);
// init resources
Random r = new Random();
long seed = r.nextLong();
System.out.println("SEED: " + seed);
r.setSeed(seed);
// Queue up public resource localization
final LocalResource pubResource1 = getPublicMockedResource(r);
final LocalResourceRequest pubReq1 = new LocalResourceRequest(pubResource1);
LocalResource pubResource2 = null;
do {
pubResource2 = getPublicMockedResource(r);
} while (pubResource2 == null || pubResource2.equals(pubResource1));
// above call to make sure we don't get identical resources.
final LocalResourceRequest pubReq2 = new LocalResourceRequest(pubResource2);
Set<LocalResourceRequest> pubRsrcs = new HashSet<LocalResourceRequest>();
pubRsrcs.add(pubReq1);
pubRsrcs.add(pubReq2);
Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req = new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>();
req.put(LocalResourceVisibility.PUBLIC, pubRsrcs);
spyService.handle(new ContainerLocalizationRequestEvent(c, req));
dispatcher.await();
verify(spyService, times(1)).checkAndInitializeLocalDirs();
// verify directory creation
for (Path p : localDirs) {
p = new Path((new URI(p.toString())).getPath());
Path publicCache = new Path(p, ContainerLocalizer.FILECACHE);
verify(spylfs).mkdir(eq(publicCache), eq(defaultPerm), eq(true));
}
} finally {
dispatcher.stop();
}
}
Aggregations