use of org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent in project hadoop by apache.
the class ContainerManagerImpl method handle.
@SuppressWarnings("unchecked")
@Override
public void handle(ContainerManagerEvent event) {
switch(event.getType()) {
case FINISH_APPS:
CMgrCompletedAppsEvent appsFinishedEvent = (CMgrCompletedAppsEvent) event;
for (ApplicationId appID : appsFinishedEvent.getAppsToCleanup()) {
Application app = this.context.getApplications().get(appID);
if (app == null) {
LOG.warn("couldn't find application " + appID + " while processing" + " FINISH_APPS event");
continue;
}
boolean shouldDropEvent = false;
for (Container container : app.getContainers().values()) {
if (container.isRecovering()) {
LOG.info("drop FINISH_APPS event to " + appID + " because " + "container " + container.getContainerId() + " is recovering");
shouldDropEvent = true;
break;
}
}
if (shouldDropEvent) {
continue;
}
String diagnostic = "";
if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN) {
diagnostic = "Application killed on shutdown";
} else if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER) {
diagnostic = "Application killed by ResourceManager";
}
this.dispatcher.getEventHandler().handle(new ApplicationFinishEvent(appID, diagnostic));
}
break;
case FINISH_CONTAINERS:
CMgrCompletedContainersEvent containersFinishedEvent = (CMgrCompletedContainersEvent) event;
for (ContainerId containerId : containersFinishedEvent.getContainersToCleanup()) {
ApplicationId appId = containerId.getApplicationAttemptId().getApplicationId();
Application app = this.context.getApplications().get(appId);
if (app == null) {
LOG.warn("couldn't find app " + appId + " while processing" + " FINISH_CONTAINERS event");
continue;
}
Container container = app.getContainers().get(containerId);
if (container == null) {
LOG.warn("couldn't find container " + containerId + " while processing FINISH_CONTAINERS event");
continue;
}
if (container.isRecovering()) {
LOG.info("drop FINISH_CONTAINERS event to " + containerId + " because container is recovering");
continue;
}
this.dispatcher.getEventHandler().handle(new ContainerKillEvent(containerId, ContainerExitStatus.KILLED_BY_RESOURCEMANAGER, "Container Killed by ResourceManager"));
}
break;
case DECREASE_CONTAINERS_RESOURCE:
CMgrDecreaseContainersResourceEvent containersDecreasedEvent = (CMgrDecreaseContainersResourceEvent) event;
for (org.apache.hadoop.yarn.api.records.Container container : containersDecreasedEvent.getContainersToDecrease()) {
try {
changeContainerResourceInternal(container.getId(), container.getVersion(), container.getResource(), false);
} catch (YarnException e) {
LOG.error("Unable to decrease container resource", e);
} catch (IOException e) {
LOG.error("Unable to update container resource in store", e);
}
}
break;
case SIGNAL_CONTAINERS:
CMgrSignalContainersEvent containersSignalEvent = (CMgrSignalContainersEvent) event;
for (SignalContainerRequest request : containersSignalEvent.getContainersToSignal()) {
internalSignalToContainer(request, "ResourceManager");
}
break;
default:
throw new YarnRuntimeException("Got an unknown ContainerManagerEvent type: " + event.getType());
}
}
use of org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent in project hadoop by apache.
the class TestContainerManager method testLocalFilesCleanup.
@Test
public void testLocalFilesCleanup() throws InterruptedException, IOException, YarnException {
// Real del service
delSrvc = new DeletionService(exec);
delSrvc.init(conf);
containerManager = createContainerManager(delSrvc);
containerManager.init(conf);
containerManager.start();
// ////// Create the resources for the container
File dir = new File(tmpDir, "dir");
dir.mkdirs();
File file = new File(dir, "file");
PrintWriter fileWriter = new PrintWriter(file);
fileWriter.write("Hello World!");
fileWriter.close();
// ////// Construct the Container-id
ContainerId cId = createContainerId(0);
ApplicationId appId = cId.getApplicationAttemptId().getApplicationId();
// ////// Construct the container-spec.
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
// containerLaunchContext.resources =
// new HashMap<CharSequence, LocalResource>();
URL resource_alpha = URL.fromPath(FileContext.getLocalFSFileContext().makeQualified(new Path(file.getAbsolutePath())));
LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(file.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
List<StartContainerRequest> list = new ArrayList<>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
BaseContainerManagerTest.waitForContainerState(containerManager, cId, ContainerState.COMPLETE);
BaseContainerManagerTest.waitForApplicationState(containerManager, cId.getApplicationAttemptId().getApplicationId(), ApplicationState.RUNNING);
// Now ascertain that the resources are localised correctly.
String appIDStr = appId.toString();
String containerIDStr = cId.toString();
File userCacheDir = new File(localDir, ContainerLocalizer.USERCACHE);
File userDir = new File(userCacheDir, user);
File appCache = new File(userDir, ContainerLocalizer.APPCACHE);
File appDir = new File(appCache, appIDStr);
File containerDir = new File(appDir, containerIDStr);
File targetFile = new File(containerDir, destinationFile);
File sysDir = new File(localDir, ResourceLocalizationService.NM_PRIVATE_DIR);
File appSysDir = new File(sysDir, appIDStr);
File containerSysDir = new File(appSysDir, containerIDStr);
// AppDir should still exist
Assert.assertTrue("AppDir " + appDir.getAbsolutePath() + " doesn't exist!!", appDir.exists());
Assert.assertTrue("AppSysDir " + appSysDir.getAbsolutePath() + " doesn't exist!!", appSysDir.exists());
for (File f : new File[] { containerDir, containerSysDir }) {
Assert.assertFalse(f.getAbsolutePath() + " exists!!", f.exists());
}
Assert.assertFalse(targetFile.getAbsolutePath() + " exists!!", targetFile.exists());
// Simulate RM sending an AppFinish event.
containerManager.handle(new CMgrCompletedAppsEvent(Arrays.asList(new ApplicationId[] { appId }), CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN));
BaseContainerManagerTest.waitForApplicationState(containerManager, cId.getApplicationAttemptId().getApplicationId(), ApplicationState.FINISHED);
// Now ascertain that the resources are localised correctly.
for (File f : new File[] { appDir, containerDir, appSysDir, containerSysDir }) {
// Wait for deletion. Deletion can happen long after AppFinish because of
// the async DeletionService
int timeout = 0;
while (f.exists() && timeout++ < 15) {
Thread.sleep(1000);
}
Assert.assertFalse(f.getAbsolutePath() + " exists!!", f.exists());
}
// Wait for deletion
int timeout = 0;
while (targetFile.exists() && timeout++ < 15) {
Thread.sleep(1000);
}
Assert.assertFalse(targetFile.getAbsolutePath() + " exists!!", targetFile.exists());
}
use of org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent in project hadoop by apache.
the class ContainerManagerImpl method cleanUpApplicationsOnNMShutDown.
public void cleanUpApplicationsOnNMShutDown() {
Map<ApplicationId, Application> applications = this.context.getApplications();
if (applications.isEmpty()) {
return;
}
LOG.info("Applications still running : " + applications.keySet());
if (this.context.getNMStateStore().canRecover() && !this.context.getDecommissioned()) {
if (getConfig().getBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, YarnConfiguration.DEFAULT_NM_RECOVERY_SUPERVISED)) {
// do not cleanup apps as they can be recovered on restart
return;
}
}
List<ApplicationId> appIds = new ArrayList<ApplicationId>(applications.keySet());
this.handle(new CMgrCompletedAppsEvent(appIds, CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN));
LOG.info("Waiting for Applications to be Finished");
long waitStartTime = System.currentTimeMillis();
while (!applications.isEmpty() && System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) {
try {
Thread.sleep(1000);
} catch (InterruptedException ex) {
LOG.warn("Interrupted while sleeping on applications finish on shutdown", ex);
}
}
// All applications Finished
if (applications.isEmpty()) {
LOG.info("All applications in FINISHED state");
} else {
LOG.info("Done waiting for Applications to be Finished. Still alive: " + applications.keySet());
}
}
use of org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent in project hadoop by apache.
the class TestLogAggregationService method testLogAggregationForRealContainerLaunch.
@Test
public void testLogAggregationForRealContainerLaunch() throws IOException, InterruptedException, YarnException {
this.containerManager.start();
File scriptFile = new File(tmpDir, "scriptFile.sh");
PrintWriter fileWriter = new PrintWriter(scriptFile);
fileWriter.write("\necho Hello World! Stdout! > " + new File(localLogDir, "stdout"));
fileWriter.write("\necho Hello World! Stderr! > " + new File(localLogDir, "stderr"));
fileWriter.write("\necho Hello World! Syslog! > " + new File(localLogDir, "syslog"));
fileWriter.close();
ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class);
// ////// Construct the Container-id
ApplicationId appId = ApplicationId.newInstance(0, 0);
ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(appId, 1);
ContainerId cId = BuilderUtils.newContainerId(appAttemptId, 0);
URL resource_alpha = URL.fromPath(localFS.makeQualified(new Path(scriptFile.getAbsolutePath())));
LocalResource rsrc_alpha = recordFactory.newRecordInstance(LocalResource.class);
rsrc_alpha.setResource(resource_alpha);
rsrc_alpha.setSize(-1);
rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
rsrc_alpha.setType(LocalResourceType.FILE);
rsrc_alpha.setTimestamp(scriptFile.lastModified());
String destinationFile = "dest_file";
Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
localResources.put(destinationFile, rsrc_alpha);
containerLaunchContext.setLocalResources(localResources);
List<String> commands = new ArrayList<String>();
commands.add("/bin/bash");
commands.add(scriptFile.getAbsolutePath());
containerLaunchContext.setCommands(commands);
StartContainerRequest scRequest = StartContainerRequest.newInstance(containerLaunchContext, TestContainerManager.createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), user, context.getContainerTokenSecretManager()));
List<StartContainerRequest> list = new ArrayList<StartContainerRequest>();
list.add(scRequest);
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
this.containerManager.startContainers(allRequests);
BaseContainerManagerTest.waitForContainerState(this.containerManager, cId, ContainerState.COMPLETE);
this.containerManager.handle(new CMgrCompletedAppsEvent(Arrays.asList(appId), CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN));
this.containerManager.stop();
}
Aggregations