use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService in project hadoop by apache.
the class ContainerManagerImpl method recover.
@SuppressWarnings("unchecked")
private void recover() throws IOException, URISyntaxException {
NMStateStoreService stateStore = context.getNMStateStore();
if (stateStore.canRecover()) {
rsrcLocalizationSrvc.recoverLocalizedResources(stateStore.loadLocalizationState());
RecoveredApplicationsState appsState = stateStore.loadApplicationsState();
for (ContainerManagerApplicationProto proto : appsState.getApplications()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Recovering application with state: " + proto.toString());
}
recoverApplication(proto);
}
for (RecoveredContainerState rcs : stateStore.loadContainersState()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Recovering container with state: " + rcs);
}
recoverContainer(rcs);
}
} else {
LOG.info("Not a recoverable state store. Nothing to recover.");
}
}
use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService in project hadoop by apache.
the class TestResourceLocalizationService method testPublicResourceInitializesLocalDir.
@Test
@SuppressWarnings("unchecked")
public void testPublicResourceInitializesLocalDir() throws Exception {
// Setup state to simulate restart NM with existing state meaning no
// directory creation during initialization
NMStateStoreService spyStateStore = spy(nmContext.getNMStateStore());
when(spyStateStore.canRecover()).thenReturn(true);
NMContext spyContext = spy(nmContext);
when(spyContext.getNMStateStore()).thenReturn(spyStateStore);
List<Path> localDirs = new ArrayList<Path>();
String[] sDirs = new String[4];
for (int i = 0; i < 4; ++i) {
localDirs.add(lfs.makeQualified(new Path(basedir, i + "")));
sDirs[i] = localDirs.get(i).toString();
}
conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, sDirs);
DrainDispatcher dispatcher = new DrainDispatcher();
EventHandler<ApplicationEvent> applicationBus = mock(EventHandler.class);
dispatcher.register(ApplicationEventType.class, applicationBus);
EventHandler<ContainerEvent> containerBus = mock(EventHandler.class);
dispatcher.register(ContainerEventType.class, containerBus);
ContainerExecutor exec = mock(ContainerExecutor.class);
DeletionService delService = mock(DeletionService.class);
LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
dirsHandler.init(conf);
dispatcher.init(conf);
dispatcher.start();
try {
ResourceLocalizationService rawService = new ResourceLocalizationService(dispatcher, exec, delService, dirsHandler, spyContext);
ResourceLocalizationService spyService = spy(rawService);
doReturn(mockServer).when(spyService).createServer();
doReturn(lfs).when(spyService).getLocalFileContext(isA(Configuration.class));
spyService.init(conf);
final FsPermission defaultPerm = new FsPermission((short) 0755);
// verify directory is not created at initialization
for (Path p : localDirs) {
p = new Path((new URI(p.toString())).getPath());
Path publicCache = new Path(p, ContainerLocalizer.FILECACHE);
verify(spylfs, never()).mkdir(eq(publicCache), eq(defaultPerm), eq(true));
}
spyService.start();
final String user = "user0";
// init application
final Application app = mock(Application.class);
final ApplicationId appId = BuilderUtils.newApplicationId(314159265358979L, 3);
when(app.getUser()).thenReturn(user);
when(app.getAppId()).thenReturn(appId);
spyService.handle(new ApplicationLocalizationEvent(LocalizationEventType.INIT_APPLICATION_RESOURCES, app));
dispatcher.await();
// init container.
final Container c = getMockContainer(appId, 42, user);
// init resources
Random r = new Random();
long seed = r.nextLong();
System.out.println("SEED: " + seed);
r.setSeed(seed);
// Queue up public resource localization
final LocalResource pubResource1 = getPublicMockedResource(r);
final LocalResourceRequest pubReq1 = new LocalResourceRequest(pubResource1);
LocalResource pubResource2 = null;
do {
pubResource2 = getPublicMockedResource(r);
} while (pubResource2 == null || pubResource2.equals(pubResource1));
// above call to make sure we don't get identical resources.
final LocalResourceRequest pubReq2 = new LocalResourceRequest(pubResource2);
Set<LocalResourceRequest> pubRsrcs = new HashSet<LocalResourceRequest>();
pubRsrcs.add(pubReq1);
pubRsrcs.add(pubReq2);
Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req = new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>();
req.put(LocalResourceVisibility.PUBLIC, pubRsrcs);
spyService.handle(new ContainerLocalizationRequestEvent(c, req));
dispatcher.await();
verify(spyService, times(1)).checkAndInitializeLocalDirs();
// verify directory creation
for (Path p : localDirs) {
p = new Path((new URI(p.toString())).getPath());
Path publicCache = new Path(p, ContainerLocalizer.FILECACHE);
verify(spylfs).mkdir(eq(publicCache), eq(defaultPerm), eq(true));
}
} finally {
dispatcher.stop();
}
}
use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService in project hadoop by apache.
the class TestLocalResourcesTrackerImpl method testStateStoreSuccessfulLocalization.
@Test
@SuppressWarnings("unchecked")
public void testStateStoreSuccessfulLocalization() throws Exception {
final String user = "someuser";
final ApplicationId appId = ApplicationId.newInstance(1, 1);
// This is a random path. NO File creation will take place at this place.
final Path localDir = new Path("/tmp");
Configuration conf = new YarnConfiguration();
DrainDispatcher dispatcher = null;
dispatcher = createDispatcher(conf);
EventHandler<LocalizerEvent> localizerEventHandler = mock(EventHandler.class);
EventHandler<LocalizerEvent> containerEventHandler = mock(EventHandler.class);
dispatcher.register(LocalizerEventType.class, localizerEventHandler);
dispatcher.register(ContainerEventType.class, containerEventHandler);
DeletionService mockDelService = mock(DeletionService.class);
NMStateStoreService stateStore = mock(NMStateStoreService.class);
try {
LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user, appId, dispatcher, false, conf, stateStore);
// Container 1 needs lr1 resource
ContainerId cId1 = BuilderUtils.newContainerId(1, 1, 1, 1);
LocalResourceRequest lr1 = createLocalResourceRequest(user, 1, 1, LocalResourceVisibility.APPLICATION);
LocalizerContext lc1 = new LocalizerContext(user, cId1, null);
// Container 1 requests lr1 to be localized
ResourceEvent reqEvent1 = new ResourceRequestEvent(lr1, LocalResourceVisibility.APPLICATION, lc1);
tracker.handle(reqEvent1);
dispatcher.await();
// Simulate the process of localization of lr1
Path hierarchicalPath1 = tracker.getPathForLocalization(lr1, localDir, null);
ArgumentCaptor<LocalResourceProto> localResourceCaptor = ArgumentCaptor.forClass(LocalResourceProto.class);
ArgumentCaptor<Path> pathCaptor = ArgumentCaptor.forClass(Path.class);
verify(stateStore).startResourceLocalization(eq(user), eq(appId), localResourceCaptor.capture(), pathCaptor.capture());
LocalResourceProto lrProto = localResourceCaptor.getValue();
Path localizedPath1 = pathCaptor.getValue();
Assert.assertEquals(lr1, new LocalResourceRequest(new LocalResourcePBImpl(lrProto)));
Assert.assertEquals(hierarchicalPath1, localizedPath1.getParent());
// Simulate lr1 getting localized
ResourceLocalizedEvent rle1 = new ResourceLocalizedEvent(lr1, pathCaptor.getValue(), 120);
tracker.handle(rle1);
dispatcher.await();
ArgumentCaptor<LocalizedResourceProto> localizedProtoCaptor = ArgumentCaptor.forClass(LocalizedResourceProto.class);
verify(stateStore).finishResourceLocalization(eq(user), eq(appId), localizedProtoCaptor.capture());
LocalizedResourceProto localizedProto = localizedProtoCaptor.getValue();
Assert.assertEquals(lr1, new LocalResourceRequest(new LocalResourcePBImpl(localizedProto.getResource())));
Assert.assertEquals(localizedPath1.toString(), localizedProto.getLocalPath());
LocalizedResource localizedRsrc1 = tracker.getLocalizedResource(lr1);
Assert.assertNotNull(localizedRsrc1);
// simulate release and retention processing
tracker.handle(new ResourceReleaseEvent(lr1, cId1));
dispatcher.await();
boolean removeResult = tracker.remove(localizedRsrc1, mockDelService);
Assert.assertTrue(removeResult);
verify(stateStore).removeLocalizedResource(eq(user), eq(appId), eq(localizedPath1));
} finally {
if (dispatcher != null) {
dispatcher.stop();
}
}
}
use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService in project hadoop by apache.
the class TestLocalResourcesTrackerImpl method testStateStoreFailedLocalization.
@Test
@SuppressWarnings("unchecked")
public void testStateStoreFailedLocalization() throws Exception {
final String user = "someuser";
final ApplicationId appId = ApplicationId.newInstance(1, 1);
// This is a random path. NO File creation will take place at this place.
final Path localDir = new Path("/tmp");
Configuration conf = new YarnConfiguration();
DrainDispatcher dispatcher = null;
dispatcher = createDispatcher(conf);
EventHandler<LocalizerEvent> localizerEventHandler = mock(EventHandler.class);
EventHandler<LocalizerEvent> containerEventHandler = mock(EventHandler.class);
dispatcher.register(LocalizerEventType.class, localizerEventHandler);
dispatcher.register(ContainerEventType.class, containerEventHandler);
NMStateStoreService stateStore = mock(NMStateStoreService.class);
try {
LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user, appId, dispatcher, false, conf, stateStore);
// Container 1 needs lr1 resource
ContainerId cId1 = BuilderUtils.newContainerId(1, 1, 1, 1);
LocalResourceRequest lr1 = createLocalResourceRequest(user, 1, 1, LocalResourceVisibility.APPLICATION);
LocalizerContext lc1 = new LocalizerContext(user, cId1, null);
// Container 1 requests lr1 to be localized
ResourceEvent reqEvent1 = new ResourceRequestEvent(lr1, LocalResourceVisibility.APPLICATION, lc1);
tracker.handle(reqEvent1);
dispatcher.await();
// Simulate the process of localization of lr1
Path hierarchicalPath1 = tracker.getPathForLocalization(lr1, localDir, null);
ArgumentCaptor<LocalResourceProto> localResourceCaptor = ArgumentCaptor.forClass(LocalResourceProto.class);
ArgumentCaptor<Path> pathCaptor = ArgumentCaptor.forClass(Path.class);
verify(stateStore).startResourceLocalization(eq(user), eq(appId), localResourceCaptor.capture(), pathCaptor.capture());
LocalResourceProto lrProto = localResourceCaptor.getValue();
Path localizedPath1 = pathCaptor.getValue();
Assert.assertEquals(lr1, new LocalResourceRequest(new LocalResourcePBImpl(lrProto)));
Assert.assertEquals(hierarchicalPath1, localizedPath1.getParent());
ResourceFailedLocalizationEvent rfe1 = new ResourceFailedLocalizationEvent(lr1, new Exception("Test").toString());
tracker.handle(rfe1);
dispatcher.await();
verify(stateStore).removeLocalizedResource(eq(user), eq(appId), eq(localizedPath1));
} finally {
if (dispatcher != null) {
dispatcher.stop();
}
}
}
use of org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService in project hadoop by apache.
the class TestContainerManagerRecovery method testApplicationRecovery.
@Test
public void testApplicationRecovery() throws Exception {
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
conf.set(YarnConfiguration.YARN_ADMIN_ACL, "yarn_admin_user");
NMStateStoreService stateStore = new NMMemoryStateStoreService();
stateStore.init(conf);
stateStore.start();
Context context = createContext(conf, stateStore);
ContainerManagerImpl cm = createContainerManager(context);
cm.init(conf);
cm.start();
// add an application by starting a container
String appUser = "app_user1";
String modUser = "modify_user1";
String viewUser = "view_user1";
String enemyUser = "enemy_user";
ApplicationId appId = ApplicationId.newInstance(0, 1);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cid = ContainerId.newContainerId(attemptId, 1);
Map<String, LocalResource> localResources = Collections.emptyMap();
Map<String, String> containerEnv = Collections.emptyMap();
List<String> containerCmds = Collections.emptyList();
Map<String, ByteBuffer> serviceData = Collections.emptyMap();
Credentials containerCreds = new Credentials();
DataOutputBuffer dob = new DataOutputBuffer();
containerCreds.writeTokenStorageToStream(dob);
ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
Map<ApplicationAccessType, String> acls = new HashMap<ApplicationAccessType, String>();
acls.put(ApplicationAccessType.MODIFY_APP, modUser);
acls.put(ApplicationAccessType.VIEW_APP, viewUser);
ContainerLaunchContext clc = ContainerLaunchContext.newInstance(localResources, containerEnv, containerCmds, serviceData, containerTokens, acls);
// create the logAggregationContext
LogAggregationContext logAggregationContext = LogAggregationContext.newInstance("includePattern", "excludePattern", "includePatternInRollingAggregation", "excludePatternInRollingAggregation");
StartContainersResponse startResponse = startContainer(context, cm, cid, clc, logAggregationContext);
assertTrue(startResponse.getFailedRequests().isEmpty());
assertEquals(1, context.getApplications().size());
Application app = context.getApplications().get(appId);
assertNotNull(app);
waitForAppState(app, ApplicationState.INITING);
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(modUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.VIEW_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(enemyUser), ApplicationAccessType.VIEW_APP, appUser, appId));
// reset container manager and verify app recovered with proper acls
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertEquals(1, context.getApplications().size());
app = context.getApplications().get(appId);
assertNotNull(app);
// check whether LogAggregationContext is recovered correctly
LogAggregationContext recovered = ((ApplicationImpl) app).getLogAggregationContext();
assertNotNull(recovered);
assertEquals(logAggregationContext.getIncludePattern(), recovered.getIncludePattern());
assertEquals(logAggregationContext.getExcludePattern(), recovered.getExcludePattern());
assertEquals(logAggregationContext.getRolledLogsIncludePattern(), recovered.getRolledLogsIncludePattern());
assertEquals(logAggregationContext.getRolledLogsExcludePattern(), recovered.getRolledLogsExcludePattern());
waitForAppState(app, ApplicationState.INITING);
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(modUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.VIEW_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(enemyUser), ApplicationAccessType.VIEW_APP, appUser, appId));
// simulate application completion
List<ApplicationId> finishedApps = new ArrayList<ApplicationId>();
finishedApps.add(appId);
app.handle(new ApplicationFinishEvent(appId, "Application killed by ResourceManager"));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
// restart and verify app is marked for finishing
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertEquals(1, context.getApplications().size());
app = context.getApplications().get(appId);
assertNotNull(app);
// no longer saving FINISH_APP event in NM stateStore,
// simulate by resending FINISH_APP event
app.handle(new ApplicationFinishEvent(appId, "Application killed by ResourceManager"));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(modUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.VIEW_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(enemyUser), ApplicationAccessType.VIEW_APP, appUser, appId));
// simulate log aggregation completion
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP));
assertEquals(app.getApplicationState(), ApplicationState.FINISHED);
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED));
// restart and verify app is no longer present after recovery
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertTrue(context.getApplications().isEmpty());
cm.stop();
}
Aggregations