use of org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedContainersEvent in project hadoop by apache.
the class ContainerManagerImpl method handle.
@SuppressWarnings("unchecked")
@Override
public void handle(ContainerManagerEvent event) {
switch(event.getType()) {
case FINISH_APPS:
CMgrCompletedAppsEvent appsFinishedEvent = (CMgrCompletedAppsEvent) event;
for (ApplicationId appID : appsFinishedEvent.getAppsToCleanup()) {
Application app = this.context.getApplications().get(appID);
if (app == null) {
LOG.warn("couldn't find application " + appID + " while processing" + " FINISH_APPS event");
continue;
}
boolean shouldDropEvent = false;
for (Container container : app.getContainers().values()) {
if (container.isRecovering()) {
LOG.info("drop FINISH_APPS event to " + appID + " because " + "container " + container.getContainerId() + " is recovering");
shouldDropEvent = true;
break;
}
}
if (shouldDropEvent) {
continue;
}
String diagnostic = "";
if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN) {
diagnostic = "Application killed on shutdown";
} else if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER) {
diagnostic = "Application killed by ResourceManager";
}
this.dispatcher.getEventHandler().handle(new ApplicationFinishEvent(appID, diagnostic));
}
break;
case FINISH_CONTAINERS:
CMgrCompletedContainersEvent containersFinishedEvent = (CMgrCompletedContainersEvent) event;
for (ContainerId containerId : containersFinishedEvent.getContainersToCleanup()) {
ApplicationId appId = containerId.getApplicationAttemptId().getApplicationId();
Application app = this.context.getApplications().get(appId);
if (app == null) {
LOG.warn("couldn't find app " + appId + " while processing" + " FINISH_CONTAINERS event");
continue;
}
Container container = app.getContainers().get(containerId);
if (container == null) {
LOG.warn("couldn't find container " + containerId + " while processing FINISH_CONTAINERS event");
continue;
}
if (container.isRecovering()) {
LOG.info("drop FINISH_CONTAINERS event to " + containerId + " because container is recovering");
continue;
}
this.dispatcher.getEventHandler().handle(new ContainerKillEvent(containerId, ContainerExitStatus.KILLED_BY_RESOURCEMANAGER, "Container Killed by ResourceManager"));
}
break;
case DECREASE_CONTAINERS_RESOURCE:
CMgrDecreaseContainersResourceEvent containersDecreasedEvent = (CMgrDecreaseContainersResourceEvent) event;
for (org.apache.hadoop.yarn.api.records.Container container : containersDecreasedEvent.getContainersToDecrease()) {
try {
changeContainerResourceInternal(container.getId(), container.getVersion(), container.getResource(), false);
} catch (YarnException e) {
LOG.error("Unable to decrease container resource", e);
} catch (IOException e) {
LOG.error("Unable to update container resource in store", e);
}
}
break;
case SIGNAL_CONTAINERS:
CMgrSignalContainersEvent containersSignalEvent = (CMgrSignalContainersEvent) event;
for (SignalContainerRequest request : containersSignalEvent.getContainersToSignal()) {
internalSignalToContainer(request, "ResourceManager");
}
break;
default:
throw new YarnRuntimeException("Got an unknown ContainerManagerEvent type: " + event.getType());
}
}
use of org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedContainersEvent in project hadoop by apache.
the class ContainerManagerImpl method cleanupContainersOnNMResync.
public void cleanupContainersOnNMResync() {
Map<ContainerId, Container> containers = context.getContainers();
if (containers.isEmpty()) {
return;
}
LOG.info("Containers still running on " + CMgrCompletedContainersEvent.Reason.ON_NODEMANAGER_RESYNC + " : " + containers.keySet());
List<ContainerId> containerIds = new ArrayList<ContainerId>(containers.keySet());
LOG.info("Waiting for containers to be killed");
this.handle(new CMgrCompletedContainersEvent(containerIds, CMgrCompletedContainersEvent.Reason.ON_NODEMANAGER_RESYNC));
/*
* We will wait till all the containers change their state to COMPLETE. We
* will not remove the container statuses from nm context because these
* are used while re-registering node manager with resource manager.
*/
boolean allContainersCompleted = false;
while (!containers.isEmpty() && !allContainersCompleted) {
allContainersCompleted = true;
for (Entry<ContainerId, Container> container : containers.entrySet()) {
if (((ContainerImpl) container.getValue()).getCurrentState() != ContainerState.COMPLETE) {
allContainersCompleted = false;
try {
Thread.sleep(1000);
} catch (InterruptedException ex) {
LOG.warn("Interrupted while sleeping on container kill on resync", ex);
}
break;
}
}
}
// All containers killed
if (allContainersCompleted) {
LOG.info("All containers in DONE state");
} else {
LOG.info("Done waiting for containers to be killed. Still alive: " + containers.keySet());
}
}
Aggregations