use of org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent in project hadoop by apache.
the class TestResourceLocalizationService method testRecovery.
@Test
// mocked generics
@SuppressWarnings("unchecked")
public void testRecovery() throws Exception {
final String user1 = "user1";
final String user2 = "user2";
final ApplicationId appId1 = ApplicationId.newInstance(1, 1);
final ApplicationId appId2 = ApplicationId.newInstance(1, 2);
List<Path> localDirs = new ArrayList<Path>();
String[] sDirs = new String[4];
for (int i = 0; i < 4; ++i) {
localDirs.add(lfs.makeQualified(new Path(basedir, i + "")));
sDirs[i] = localDirs.get(i).toString();
}
conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, sDirs);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
NMMemoryStateStoreService stateStore = new NMMemoryStateStoreService();
stateStore.init(conf);
stateStore.start();
DrainDispatcher dispatcher = new DrainDispatcher();
dispatcher.init(conf);
dispatcher.start();
EventHandler<ApplicationEvent> applicationBus = mock(EventHandler.class);
dispatcher.register(ApplicationEventType.class, applicationBus);
EventHandler<ContainerEvent> containerBus = mock(EventHandler.class);
dispatcher.register(ContainerEventType.class, containerBus);
//Ignore actual localization
EventHandler<LocalizerEvent> localizerBus = mock(EventHandler.class);
dispatcher.register(LocalizerEventType.class, localizerBus);
LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
dirsHandler.init(conf);
ResourceLocalizationService spyService = createSpyService(dispatcher, dirsHandler, stateStore);
try {
spyService.init(conf);
spyService.start();
final Application app1 = mock(Application.class);
when(app1.getUser()).thenReturn(user1);
when(app1.getAppId()).thenReturn(appId1);
final Application app2 = mock(Application.class);
when(app2.getUser()).thenReturn(user2);
when(app2.getAppId()).thenReturn(appId2);
spyService.handle(new ApplicationLocalizationEvent(LocalizationEventType.INIT_APPLICATION_RESOURCES, app1));
spyService.handle(new ApplicationLocalizationEvent(LocalizationEventType.INIT_APPLICATION_RESOURCES, app2));
dispatcher.await();
//Get a handle on the trackers after they're setup with INIT_APP_RESOURCES
LocalResourcesTracker appTracker1 = spyService.getLocalResourcesTracker(LocalResourceVisibility.APPLICATION, user1, appId1);
LocalResourcesTracker privTracker1 = spyService.getLocalResourcesTracker(LocalResourceVisibility.PRIVATE, user1, null);
LocalResourcesTracker appTracker2 = spyService.getLocalResourcesTracker(LocalResourceVisibility.APPLICATION, user2, appId2);
LocalResourcesTracker pubTracker = spyService.getLocalResourcesTracker(LocalResourceVisibility.PUBLIC, null, null);
// init containers
final Container c1 = getMockContainer(appId1, 1, user1);
final Container c2 = getMockContainer(appId2, 2, user2);
// init resources
Random r = new Random();
long seed = r.nextLong();
System.out.println("SEED: " + seed);
r.setSeed(seed);
// Send localization requests of each type.
final LocalResource privResource1 = getPrivateMockedResource(r);
final LocalResourceRequest privReq1 = new LocalResourceRequest(privResource1);
final LocalResource privResource2 = getPrivateMockedResource(r);
final LocalResourceRequest privReq2 = new LocalResourceRequest(privResource2);
final LocalResource pubResource1 = getPublicMockedResource(r);
final LocalResourceRequest pubReq1 = new LocalResourceRequest(pubResource1);
final LocalResource pubResource2 = getPublicMockedResource(r);
final LocalResourceRequest pubReq2 = new LocalResourceRequest(pubResource2);
final LocalResource appResource1 = getAppMockedResource(r);
final LocalResourceRequest appReq1 = new LocalResourceRequest(appResource1);
final LocalResource appResource2 = getAppMockedResource(r);
final LocalResourceRequest appReq2 = new LocalResourceRequest(appResource2);
final LocalResource appResource3 = getAppMockedResource(r);
final LocalResourceRequest appReq3 = new LocalResourceRequest(appResource3);
Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req1 = new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>();
req1.put(LocalResourceVisibility.PRIVATE, Arrays.asList(new LocalResourceRequest[] { privReq1, privReq2 }));
req1.put(LocalResourceVisibility.PUBLIC, Collections.singletonList(pubReq1));
req1.put(LocalResourceVisibility.APPLICATION, Collections.singletonList(appReq1));
Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req2 = new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>();
req2.put(LocalResourceVisibility.APPLICATION, Arrays.asList(new LocalResourceRequest[] { appReq2, appReq3 }));
req2.put(LocalResourceVisibility.PUBLIC, Collections.singletonList(pubReq2));
// Send Request event
spyService.handle(new ContainerLocalizationRequestEvent(c1, req1));
spyService.handle(new ContainerLocalizationRequestEvent(c2, req2));
dispatcher.await();
// Simulate start of localization for all resources
privTracker1.getPathForLocalization(privReq1, dirsHandler.getLocalPathForWrite(ContainerLocalizer.USERCACHE + user1), null);
privTracker1.getPathForLocalization(privReq2, dirsHandler.getLocalPathForWrite(ContainerLocalizer.USERCACHE + user1), null);
LocalizedResource privLr1 = privTracker1.getLocalizedResource(privReq1);
LocalizedResource privLr2 = privTracker1.getLocalizedResource(privReq2);
appTracker1.getPathForLocalization(appReq1, dirsHandler.getLocalPathForWrite(ContainerLocalizer.APPCACHE + appId1), null);
LocalizedResource appLr1 = appTracker1.getLocalizedResource(appReq1);
appTracker2.getPathForLocalization(appReq2, dirsHandler.getLocalPathForWrite(ContainerLocalizer.APPCACHE + appId2), null);
LocalizedResource appLr2 = appTracker2.getLocalizedResource(appReq2);
appTracker2.getPathForLocalization(appReq3, dirsHandler.getLocalPathForWrite(ContainerLocalizer.APPCACHE + appId2), null);
LocalizedResource appLr3 = appTracker2.getLocalizedResource(appReq3);
pubTracker.getPathForLocalization(pubReq1, dirsHandler.getLocalPathForWrite(ContainerLocalizer.FILECACHE), null);
LocalizedResource pubLr1 = pubTracker.getLocalizedResource(pubReq1);
pubTracker.getPathForLocalization(pubReq2, dirsHandler.getLocalPathForWrite(ContainerLocalizer.FILECACHE), null);
LocalizedResource pubLr2 = pubTracker.getLocalizedResource(pubReq2);
// Simulate completion of localization for most resources with
// possibly different sizes than in the request
assertNotNull("Localization not started", privLr1.getLocalPath());
privTracker1.handle(new ResourceLocalizedEvent(privReq1, privLr1.getLocalPath(), privLr1.getSize() + 5));
assertNotNull("Localization not started", privLr2.getLocalPath());
privTracker1.handle(new ResourceLocalizedEvent(privReq2, privLr2.getLocalPath(), privLr2.getSize() + 10));
assertNotNull("Localization not started", appLr1.getLocalPath());
appTracker1.handle(new ResourceLocalizedEvent(appReq1, appLr1.getLocalPath(), appLr1.getSize()));
assertNotNull("Localization not started", appLr3.getLocalPath());
appTracker2.handle(new ResourceLocalizedEvent(appReq3, appLr3.getLocalPath(), appLr3.getSize() + 7));
assertNotNull("Localization not started", pubLr1.getLocalPath());
pubTracker.handle(new ResourceLocalizedEvent(pubReq1, pubLr1.getLocalPath(), pubLr1.getSize() + 1000));
assertNotNull("Localization not started", pubLr2.getLocalPath());
pubTracker.handle(new ResourceLocalizedEvent(pubReq2, pubLr2.getLocalPath(), pubLr2.getSize() + 99999));
dispatcher.await();
assertEquals(ResourceState.LOCALIZED, privLr1.getState());
assertEquals(ResourceState.LOCALIZED, privLr2.getState());
assertEquals(ResourceState.LOCALIZED, appLr1.getState());
assertEquals(ResourceState.DOWNLOADING, appLr2.getState());
assertEquals(ResourceState.LOCALIZED, appLr3.getState());
assertEquals(ResourceState.LOCALIZED, pubLr1.getState());
assertEquals(ResourceState.LOCALIZED, pubLr2.getState());
// restart and recover
spyService = createSpyService(dispatcher, dirsHandler, stateStore);
spyService.init(conf);
spyService.recoverLocalizedResources(stateStore.loadLocalizationState());
dispatcher.await();
appTracker1 = spyService.getLocalResourcesTracker(LocalResourceVisibility.APPLICATION, user1, appId1);
privTracker1 = spyService.getLocalResourcesTracker(LocalResourceVisibility.PRIVATE, user1, null);
appTracker2 = spyService.getLocalResourcesTracker(LocalResourceVisibility.APPLICATION, user2, appId2);
pubTracker = spyService.getLocalResourcesTracker(LocalResourceVisibility.PUBLIC, null, null);
LocalizedResource recoveredRsrc = privTracker1.getLocalizedResource(privReq1);
assertEquals(privReq1, recoveredRsrc.getRequest());
assertEquals(privLr1.getLocalPath(), recoveredRsrc.getLocalPath());
assertEquals(privLr1.getSize(), recoveredRsrc.getSize());
assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
recoveredRsrc = privTracker1.getLocalizedResource(privReq2);
assertEquals(privReq2, recoveredRsrc.getRequest());
assertEquals(privLr2.getLocalPath(), recoveredRsrc.getLocalPath());
assertEquals(privLr2.getSize(), recoveredRsrc.getSize());
assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
recoveredRsrc = appTracker1.getLocalizedResource(appReq1);
assertEquals(appReq1, recoveredRsrc.getRequest());
assertEquals(appLr1.getLocalPath(), recoveredRsrc.getLocalPath());
assertEquals(appLr1.getSize(), recoveredRsrc.getSize());
assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
recoveredRsrc = appTracker2.getLocalizedResource(appReq2);
assertNull("in-progress resource should not be present", recoveredRsrc);
recoveredRsrc = appTracker2.getLocalizedResource(appReq3);
assertEquals(appReq3, recoveredRsrc.getRequest());
assertEquals(appLr3.getLocalPath(), recoveredRsrc.getLocalPath());
assertEquals(appLr3.getSize(), recoveredRsrc.getSize());
assertEquals(ResourceState.LOCALIZED, recoveredRsrc.getState());
} finally {
dispatcher.stop();
stateStore.close();
}
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent in project hadoop by apache.
the class TestContainerManagerRecovery method testApplicationRecovery.
@Test
public void testApplicationRecovery() throws Exception {
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
conf.set(YarnConfiguration.YARN_ADMIN_ACL, "yarn_admin_user");
NMStateStoreService stateStore = new NMMemoryStateStoreService();
stateStore.init(conf);
stateStore.start();
Context context = createContext(conf, stateStore);
ContainerManagerImpl cm = createContainerManager(context);
cm.init(conf);
cm.start();
// add an application by starting a container
String appUser = "app_user1";
String modUser = "modify_user1";
String viewUser = "view_user1";
String enemyUser = "enemy_user";
ApplicationId appId = ApplicationId.newInstance(0, 1);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cid = ContainerId.newContainerId(attemptId, 1);
Map<String, LocalResource> localResources = Collections.emptyMap();
Map<String, String> containerEnv = Collections.emptyMap();
List<String> containerCmds = Collections.emptyList();
Map<String, ByteBuffer> serviceData = Collections.emptyMap();
Credentials containerCreds = new Credentials();
DataOutputBuffer dob = new DataOutputBuffer();
containerCreds.writeTokenStorageToStream(dob);
ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
Map<ApplicationAccessType, String> acls = new HashMap<ApplicationAccessType, String>();
acls.put(ApplicationAccessType.MODIFY_APP, modUser);
acls.put(ApplicationAccessType.VIEW_APP, viewUser);
ContainerLaunchContext clc = ContainerLaunchContext.newInstance(localResources, containerEnv, containerCmds, serviceData, containerTokens, acls);
// create the logAggregationContext
LogAggregationContext logAggregationContext = LogAggregationContext.newInstance("includePattern", "excludePattern", "includePatternInRollingAggregation", "excludePatternInRollingAggregation");
StartContainersResponse startResponse = startContainer(context, cm, cid, clc, logAggregationContext);
assertTrue(startResponse.getFailedRequests().isEmpty());
assertEquals(1, context.getApplications().size());
Application app = context.getApplications().get(appId);
assertNotNull(app);
waitForAppState(app, ApplicationState.INITING);
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(modUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.VIEW_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(enemyUser), ApplicationAccessType.VIEW_APP, appUser, appId));
// reset container manager and verify app recovered with proper acls
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertEquals(1, context.getApplications().size());
app = context.getApplications().get(appId);
assertNotNull(app);
// check whether LogAggregationContext is recovered correctly
LogAggregationContext recovered = ((ApplicationImpl) app).getLogAggregationContext();
assertNotNull(recovered);
assertEquals(logAggregationContext.getIncludePattern(), recovered.getIncludePattern());
assertEquals(logAggregationContext.getExcludePattern(), recovered.getExcludePattern());
assertEquals(logAggregationContext.getRolledLogsIncludePattern(), recovered.getRolledLogsIncludePattern());
assertEquals(logAggregationContext.getRolledLogsExcludePattern(), recovered.getRolledLogsExcludePattern());
waitForAppState(app, ApplicationState.INITING);
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(modUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.VIEW_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(enemyUser), ApplicationAccessType.VIEW_APP, appUser, appId));
// simulate application completion
List<ApplicationId> finishedApps = new ArrayList<ApplicationId>();
finishedApps.add(appId);
app.handle(new ApplicationFinishEvent(appId, "Application killed by ResourceManager"));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
// restart and verify app is marked for finishing
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertEquals(1, context.getApplications().size());
app = context.getApplications().get(appId);
assertNotNull(app);
// no longer saving FINISH_APP event in NM stateStore,
// simulate by resending FINISH_APP event
app.handle(new ApplicationFinishEvent(appId, "Application killed by ResourceManager"));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(modUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.MODIFY_APP, appUser, appId));
assertTrue(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(viewUser), ApplicationAccessType.VIEW_APP, appUser, appId));
assertFalse(context.getApplicationACLsManager().checkAccess(UserGroupInformation.createRemoteUser(enemyUser), ApplicationAccessType.VIEW_APP, appUser, appId));
// simulate log aggregation completion
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP));
assertEquals(app.getApplicationState(), ApplicationState.FINISHED);
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED));
// restart and verify app is no longer present after recovery
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertTrue(context.getApplications().isEmpty());
cm.stop();
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent in project hadoop by apache.
the class ResourceLocalizationService method handleDestroyApplicationResources.
@SuppressWarnings({ "unchecked" })
private void handleDestroyApplicationResources(Application application) {
String userName = application.getUser();
ApplicationId appId = application.getAppId();
String appIDStr = application.toString();
LocalResourcesTracker appLocalRsrcsTracker = appRsrc.remove(appId.toString());
if (appLocalRsrcsTracker != null) {
for (LocalizedResource rsrc : appLocalRsrcsTracker) {
Path localPath = rsrc.getLocalPath();
if (localPath != null) {
try {
stateStore.removeLocalizedResource(userName, appId, localPath);
} catch (IOException e) {
LOG.error("Unable to remove resource " + rsrc + " for " + appIDStr + " from state store", e);
}
}
}
} else {
LOG.warn("Removing uninitialized application " + application);
}
// Delete the application directories
userName = application.getUser();
appIDStr = application.toString();
for (String localDir : dirsHandler.getLocalDirsForCleanup()) {
// Delete the user-owned app-dir
Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
Path userdir = new Path(usersdir, userName);
Path allAppsdir = new Path(userdir, ContainerLocalizer.APPCACHE);
Path appDir = new Path(allAppsdir, appIDStr);
submitDirForDeletion(userName, appDir);
// Delete the nmPrivate app-dir
Path sysDir = new Path(localDir, NM_PRIVATE_DIR);
Path appSysDir = new Path(sysDir, appIDStr);
submitDirForDeletion(null, appSysDir);
}
// TODO: decrement reference counts of all resources associated with this
// app
dispatcher.getEventHandler().handle(new ApplicationEvent(application.getAppId(), ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP));
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent in project hadoop by apache.
the class LogAggregationService method initApp.
@SuppressWarnings("unchecked")
private void initApp(final ApplicationId appId, String user, Credentials credentials, Map<ApplicationAccessType, String> appAcls, LogAggregationContext logAggregationContext, long recoveredLogInitedTime) {
ApplicationEvent eventResponse;
try {
verifyAndCreateRemoteLogDir(getConfig());
initAppAggregator(appId, user, credentials, appAcls, logAggregationContext, recoveredLogInitedTime);
eventResponse = new ApplicationEvent(appId, ApplicationEventType.APPLICATION_LOG_HANDLING_INITED);
} catch (YarnRuntimeException e) {
LOG.warn("Application failed to init aggregation", e);
eventResponse = new ApplicationEvent(appId, ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED);
}
this.dispatcher.getEventHandler().handle(eventResponse);
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent in project hadoop by apache.
the class TestContainerManagerRecovery method testNMRecoveryForAppFinishedWithLogAggregationFailure.
@Test
public void testNMRecoveryForAppFinishedWithLogAggregationFailure() throws Exception {
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
NMStateStoreService stateStore = new NMMemoryStateStoreService();
stateStore.init(conf);
stateStore.start();
Context context = createContext(conf, stateStore);
ContainerManagerImpl cm = createContainerManager(context);
cm.init(conf);
cm.start();
// add an application by starting a container
ApplicationId appId = ApplicationId.newInstance(0, 1);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cid = ContainerId.newContainerId(attemptId, 1);
Map<String, LocalResource> localResources = Collections.emptyMap();
Map<String, String> containerEnv = Collections.emptyMap();
List<String> containerCmds = Collections.emptyList();
Map<String, ByteBuffer> serviceData = Collections.emptyMap();
ContainerLaunchContext clc = ContainerLaunchContext.newInstance(localResources, containerEnv, containerCmds, serviceData, null, null);
StartContainersResponse startResponse = startContainer(context, cm, cid, clc, null);
assertTrue(startResponse.getFailedRequests().isEmpty());
assertEquals(1, context.getApplications().size());
Application app = context.getApplications().get(appId);
assertNotNull(app);
waitForAppState(app, ApplicationState.INITING);
// simulate application completion
List<ApplicationId> finishedApps = new ArrayList<ApplicationId>();
finishedApps.add(appId);
app.handle(new ApplicationFinishEvent(appId, "Application killed by ResourceManager"));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP));
assertEquals(app.getApplicationState(), ApplicationState.FINISHED);
// application is still in NM context.
assertEquals(1, context.getApplications().size());
// restart and verify app is still there and marked as finished.
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertEquals(1, context.getApplications().size());
app = context.getApplications().get(appId);
assertNotNull(app);
// no longer saving FINISH_APP event in NM stateStore,
// simulate by resending FINISH_APP event
app.handle(new ApplicationFinishEvent(appId, "Application killed by ResourceManager"));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
// TODO need to figure out why additional APPLICATION_RESOURCES_CLEANEDUP
// is needed.
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP));
assertEquals(app.getApplicationState(), ApplicationState.FINISHED);
// simulate log aggregation failed.
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED));
// restart and verify app is no longer present after recovery
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertTrue(context.getApplications().isEmpty());
cm.stop();
}
Aggregations