use of org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application in project hadoop by apache.
the class NMWebServices method getNodeApps.
@GET
@Path("/apps")
@Produces({ MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8, MediaType.APPLICATION_XML + "; " + JettyUtils.UTF_8 })
public AppsInfo getNodeApps(@QueryParam("state") String stateQuery, @QueryParam("user") String userQuery) {
init();
AppsInfo allApps = new AppsInfo();
for (Entry<ApplicationId, Application> entry : this.nmContext.getApplications().entrySet()) {
AppInfo appInfo = new AppInfo(entry.getValue());
if (stateQuery != null && !stateQuery.isEmpty()) {
ApplicationState.valueOf(stateQuery);
if (!appInfo.getState().equalsIgnoreCase(stateQuery)) {
continue;
}
}
if (userQuery != null) {
if (userQuery.isEmpty()) {
String msg = "Error: You must specify a non-empty string for the user";
throw new BadRequestException(msg);
}
if (!appInfo.getUser().equals(userQuery)) {
continue;
}
}
allApps.add(appInfo);
}
return allApps;
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application in project hadoop by apache.
the class TestContainerManagerRecovery method testContainerResizeRecovery.
@Test
public void testContainerResizeRecovery() throws Exception {
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
NMStateStoreService stateStore = new NMMemoryStateStoreService();
stateStore.init(conf);
stateStore.start();
Context context = createContext(conf, stateStore);
ContainerManagerImpl cm = createContainerManager(context, delSrvc);
cm.init(conf);
cm.start();
// add an application by starting a container
ApplicationId appId = ApplicationId.newInstance(0, 1);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cid = ContainerId.newContainerId(attemptId, 1);
Map<String, String> containerEnv = Collections.emptyMap();
Map<String, ByteBuffer> serviceData = Collections.emptyMap();
Credentials containerCreds = new Credentials();
DataOutputBuffer dob = new DataOutputBuffer();
containerCreds.writeTokenStorageToStream(dob);
ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
Map<ApplicationAccessType, String> acls = Collections.emptyMap();
File tmpDir = new File("target", this.getClass().getSimpleName() + "-tmpDir");
File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
PrintWriter fileWriter = new PrintWriter(scriptFile);
if (Shell.WINDOWS) {
fileWriter.println("@ping -n 100 127.0.0.1 >nul");
} else {
fileWriter.write("\numask 0");
fileWriter.write("\nexec sleep 100");
}
fileWriter.close();
FileContext localFS = FileContext.getLocalFSFileContext();
URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
LocalResource rsrc_alpha = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(scriptFile.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources = new HashMap<>();
localResources.put(destinationFile, rsrc_alpha);
List<String> commands = Arrays.asList(Shell.getRunScriptCommand(scriptFile));
ContainerLaunchContext clc = ContainerLaunchContext.newInstance(localResources, containerEnv, commands, serviceData, containerTokens, acls);
StartContainersResponse startResponse = startContainer(context, cm, cid, clc, null);
assertTrue(startResponse.getFailedRequests().isEmpty());
assertEquals(1, context.getApplications().size());
Application app = context.getApplications().get(appId);
assertNotNull(app);
// make sure the container reaches RUNNING state
waitForNMContainerState(cm, cid, org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState.RUNNING);
Resource targetResource = Resource.newInstance(2048, 2);
IncreaseContainersResourceResponse increaseResponse = increaseContainersResource(context, cm, cid, targetResource);
assertTrue(increaseResponse.getFailedRequests().isEmpty());
// check status
ContainerStatus containerStatus = getContainerStatus(context, cm, cid);
assertEquals(targetResource, containerStatus.getCapability());
// restart and verify container is running and recovered
// to the correct size
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertEquals(1, context.getApplications().size());
app = context.getApplications().get(appId);
assertNotNull(app);
containerStatus = getContainerStatus(context, cm, cid);
assertEquals(targetResource, containerStatus.getCapability());
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application in project hadoop by apache.
the class TestContainerManagerRecovery method testNMRecoveryForAppFinishedWithLogAggregationFailure.
@Test
public void testNMRecoveryForAppFinishedWithLogAggregationFailure() throws Exception {
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
NMStateStoreService stateStore = new NMMemoryStateStoreService();
stateStore.init(conf);
stateStore.start();
Context context = createContext(conf, stateStore);
ContainerManagerImpl cm = createContainerManager(context);
cm.init(conf);
cm.start();
// add an application by starting a container
ApplicationId appId = ApplicationId.newInstance(0, 1);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId cid = ContainerId.newContainerId(attemptId, 1);
Map<String, LocalResource> localResources = Collections.emptyMap();
Map<String, String> containerEnv = Collections.emptyMap();
List<String> containerCmds = Collections.emptyList();
Map<String, ByteBuffer> serviceData = Collections.emptyMap();
ContainerLaunchContext clc = ContainerLaunchContext.newInstance(localResources, containerEnv, containerCmds, serviceData, null, null);
StartContainersResponse startResponse = startContainer(context, cm, cid, clc, null);
assertTrue(startResponse.getFailedRequests().isEmpty());
assertEquals(1, context.getApplications().size());
Application app = context.getApplications().get(appId);
assertNotNull(app);
waitForAppState(app, ApplicationState.INITING);
// simulate application completion
List<ApplicationId> finishedApps = new ArrayList<ApplicationId>();
finishedApps.add(appId);
app.handle(new ApplicationFinishEvent(appId, "Application killed by ResourceManager"));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP));
assertEquals(app.getApplicationState(), ApplicationState.FINISHED);
// application is still in NM context.
assertEquals(1, context.getApplications().size());
// restart and verify app is still there and marked as finished.
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertEquals(1, context.getApplications().size());
app = context.getApplications().get(appId);
assertNotNull(app);
// no longer saving FINISH_APP event in NM stateStore,
// simulate by resending FINISH_APP event
app.handle(new ApplicationFinishEvent(appId, "Application killed by ResourceManager"));
waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
// TODO need to figure out why additional APPLICATION_RESOURCES_CLEANEDUP
// is needed.
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP));
assertEquals(app.getApplicationState(), ApplicationState.FINISHED);
// simulate log aggregation failed.
app.handle(new ApplicationEvent(app.getAppId(), ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED));
// restart and verify app is no longer present after recovery
cm.stop();
context = createContext(conf, stateStore);
cm = createContainerManager(context);
cm.init(conf);
cm.start();
assertTrue(context.getApplications().isEmpty());
cm.stop();
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application in project hadoop by apache.
the class TestContainerLaunch method verifyTailErrorLogOnContainerExit.
private void verifyTailErrorLogOnContainerExit(Configuration conf, String errorFileName, boolean testForMultipleErrFiles) throws Exception {
Container container = mock(Container.class);
ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
ContainerId containerId = ContainerId.newContainerId(ApplicationAttemptId.newInstance(appId, 1), 1);
when(container.getContainerId()).thenReturn(containerId);
when(container.getUser()).thenReturn("test");
String relativeContainerLogDir = ContainerLaunch.getRelativeContainerLogDir(appId.toString(), containerId.toString());
Path containerLogDir = dirsHandler.getLogPathForWrite(relativeContainerLogDir, false);
ContainerLaunchContext clc = mock(ContainerLaunchContext.class);
List<String> invalidCommand = new ArrayList<String>();
invalidCommand.add("$JAVA_HOME/bin/java");
invalidCommand.add("-Djava.io.tmpdir=$PWD/tmp");
invalidCommand.add("-Dlog4j.configuration=container-log4j.properties");
invalidCommand.add("-Dyarn.app.container.log.dir=" + containerLogDir);
invalidCommand.add("-Dyarn.app.container.log.filesize=0");
invalidCommand.add("-Dhadoop.root.logger=INFO,CLA");
invalidCommand.add("-Dhadoop.root.logfile=syslog");
invalidCommand.add("-Xmx1024m");
invalidCommand.add("org.apache.hadoop.mapreduce.v2.app.MRAppMaster");
invalidCommand.add("1>" + containerLogDir + "/stdout");
invalidCommand.add("2>" + containerLogDir + errorFileName);
when(clc.getCommands()).thenReturn(invalidCommand);
Map<String, String> userSetEnv = new HashMap<String, String>();
userSetEnv.put(Environment.CONTAINER_ID.name(), "user_set_container_id");
userSetEnv.put("JAVA_HOME", INVALID_JAVA_HOME);
userSetEnv.put(Environment.NM_HOST.name(), "user_set_NM_HOST");
userSetEnv.put(Environment.NM_PORT.name(), "user_set_NM_PORT");
userSetEnv.put(Environment.NM_HTTP_PORT.name(), "user_set_NM_HTTP_PORT");
userSetEnv.put(Environment.LOCAL_DIRS.name(), "user_set_LOCAL_DIR");
userSetEnv.put(Environment.USER.key(), "user_set_" + Environment.USER.key());
userSetEnv.put(Environment.LOGNAME.name(), "user_set_LOGNAME");
userSetEnv.put(Environment.PWD.name(), "user_set_PWD");
userSetEnv.put(Environment.HOME.name(), "user_set_HOME");
userSetEnv.put(Environment.CLASSPATH.name(), "APATH");
when(clc.getEnvironment()).thenReturn(userSetEnv);
when(container.getLaunchContext()).thenReturn(clc);
when(container.getLocalizedResources()).thenReturn(Collections.<Path, List<String>>emptyMap());
Dispatcher dispatcher = mock(Dispatcher.class);
@SuppressWarnings("rawtypes") ContainerExitHandler eventHandler = new ContainerExitHandler(testForMultipleErrFiles);
when(dispatcher.getEventHandler()).thenReturn(eventHandler);
Application app = mock(Application.class);
when(app.getAppId()).thenReturn(appId);
when(app.getUser()).thenReturn("test");
Credentials creds = mock(Credentials.class);
when(container.getCredentials()).thenReturn(creds);
((NMContext) context).setNodeId(NodeId.newInstance("127.0.0.1", HTTP_PORT));
ContainerLaunch launch = new ContainerLaunch(context, conf, dispatcher, exec, app, container, dirsHandler, containerManager);
launch.call();
Assert.assertTrue("ContainerExitEvent should have occured", eventHandler.isContainerExitEventOccured());
}
use of org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application in project hadoop by apache.
the class TestResourceLocalizationService method testLocalizerHeartbeatWhenAppCleaningUp.
@Test(timeout = 20000)
@SuppressWarnings("unchecked")
public void testLocalizerHeartbeatWhenAppCleaningUp() throws Exception {
conf.set(YarnConfiguration.NM_LOCAL_DIRS, lfs.makeQualified(new Path(basedir, 0 + "")).toString());
// Start dispatcher.
DrainDispatcher dispatcher = new DrainDispatcher();
dispatcher.init(conf);
dispatcher.start();
dispatcher.register(ApplicationEventType.class, mock(EventHandler.class));
dispatcher.register(ContainerEventType.class, mock(EventHandler.class));
DummyExecutor exec = new DummyExecutor();
LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
dirsHandler.init(conf);
// Start resource localization service.
ResourceLocalizationService rawService = new ResourceLocalizationService(dispatcher, exec, mock(DeletionService.class), dirsHandler, nmContext);
ResourceLocalizationService spyService = spy(rawService);
doReturn(mockServer).when(spyService).createServer();
doReturn(lfs).when(spyService).getLocalFileContext(isA(Configuration.class));
try {
spyService.init(conf);
spyService.start();
// Init application resources.
final Application app = mock(Application.class);
final ApplicationId appId = BuilderUtils.newApplicationId(1234567890L, 3);
when(app.getUser()).thenReturn("user0");
when(app.getAppId()).thenReturn(appId);
when(app.toString()).thenReturn(appId.toString());
spyService.handle(new ApplicationLocalizationEvent(LocalizationEventType.INIT_APPLICATION_RESOURCES, app));
dispatcher.await();
// Initialize localizer.
Random r = new Random();
long seed = r.nextLong();
System.out.println("SEED: " + seed);
r.setSeed(seed);
final Container c = getMockContainer(appId, 46, "user0");
FSDataOutputStream out = new FSDataOutputStream(new DataOutputBuffer(), null);
doReturn(out).when(spylfs).createInternal(isA(Path.class), isA(EnumSet.class), isA(FsPermission.class), anyInt(), anyShort(), anyLong(), isA(Progressable.class), isA(ChecksumOpt.class), anyBoolean());
final LocalResource resource1 = getAppMockedResource(r);
final LocalResource resource2 = getAppMockedResource(r);
// Send localization requests for container.
// 2 resources generated with APPLICATION visibility.
final LocalResourceRequest req1 = new LocalResourceRequest(resource1);
final LocalResourceRequest req2 = new LocalResourceRequest(resource2);
Map<LocalResourceVisibility, Collection<LocalResourceRequest>> rsrcs = new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>();
List<LocalResourceRequest> appResourceList = Arrays.asList(req1, req2);
rsrcs.put(LocalResourceVisibility.APPLICATION, appResourceList);
spyService.handle(new ContainerLocalizationRequestEvent(c, rsrcs));
dispatcher.await();
// Wait for localization to begin.
exec.waitForLocalizers(1);
final String containerIdStr = c.getContainerId().toString();
LocalizerRunner locRunnerForContainer = spyService.getLocalizerRunner(containerIdStr);
// Heartbeats from container localizer
LocalResourceStatus rsrcSuccess = mock(LocalResourceStatus.class);
LocalizerStatus stat = mock(LocalizerStatus.class);
when(stat.getLocalizerId()).thenReturn(containerIdStr);
when(rsrcSuccess.getResource()).thenReturn(resource1);
when(rsrcSuccess.getLocalSize()).thenReturn(4344L);
when(rsrcSuccess.getLocalPath()).thenReturn(getPath("/some/path"));
when(rsrcSuccess.getStatus()).thenReturn(ResourceStatusType.FETCH_SUCCESS);
when(stat.getResources()).thenReturn(Collections.<LocalResourceStatus>emptyList());
// First heartbeat which schedules first resource.
LocalizerHeartbeatResponse response = spyService.heartbeat(stat);
assertEquals("NM should tell localizer to be LIVE in Heartbeat.", LocalizerAction.LIVE, response.getLocalizerAction());
// Cleanup application.
spyService.handle(new ContainerLocalizationCleanupEvent(c, rsrcs));
spyService.handle(new ApplicationLocalizationEvent(LocalizationEventType.DESTROY_APPLICATION_RESOURCES, app));
dispatcher.await();
try {
// Directly send heartbeat to introduce race as app is being cleaned up.
locRunnerForContainer.processHeartbeat(Collections.singletonList(rsrcSuccess));
} catch (Exception e) {
fail("Exception should not have been thrown on processing heartbeat");
}
// Send another heartbeat.
response = spyService.heartbeat(stat);
assertEquals("NM should tell localizer to DIE in Heartbeat.", LocalizerAction.DIE, response.getLocalizerAction());
exec.setStopLocalization();
} finally {
spyService.stop();
dispatcher.stop();
}
}
Aggregations