use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent in project hadoop by apache.
the class AbstractYarnScheduler method recoverContainersOnNode.
public void recoverContainersOnNode(List<NMContainerStatus> containerReports, RMNode nm) {
try {
writeLock.lock();
if (!rmContext.isWorkPreservingRecoveryEnabled() || containerReports == null || (containerReports != null && containerReports.isEmpty())) {
return;
}
for (NMContainerStatus container : containerReports) {
ApplicationId appId = container.getContainerId().getApplicationAttemptId().getApplicationId();
RMApp rmApp = rmContext.getRMApps().get(appId);
if (rmApp == null) {
LOG.error("Skip recovering container " + container + " for unknown application.");
killOrphanContainerOnNode(nm, container);
continue;
}
SchedulerApplication<T> schedulerApp = applications.get(appId);
if (schedulerApp == null) {
LOG.info("Skip recovering container " + container + " for unknown SchedulerApplication. " + "Application current state is " + rmApp.getState());
killOrphanContainerOnNode(nm, container);
continue;
}
LOG.info("Recovering container " + container);
SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
if (!rmApp.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()) {
// Do not recover containers for stopped attempt or previous attempt.
if (schedulerAttempt.isStopped() || !schedulerAttempt.getApplicationAttemptId().equals(container.getContainerId().getApplicationAttemptId())) {
LOG.info("Skip recovering container " + container + " for already stopped attempt.");
killOrphanContainerOnNode(nm, container);
continue;
}
}
// create container
RMContainer rmContainer = recoverAndCreateContainer(container, nm);
// recover RMContainer
rmContainer.handle(new RMContainerRecoverEvent(container.getContainerId(), container));
// recover scheduler node
SchedulerNode schedulerNode = nodeTracker.getNode(nm.getNodeID());
schedulerNode.recoverContainer(rmContainer);
// recover queue: update headroom etc.
Queue queue = schedulerAttempt.getQueue();
queue.recoverContainer(getClusterResource(), schedulerAttempt, rmContainer);
// recover scheduler attempt
schedulerAttempt.recoverContainer(schedulerNode, rmContainer);
// set master container for the current running AMContainer for this
// attempt.
RMAppAttempt appAttempt = rmApp.getCurrentAppAttempt();
if (appAttempt != null) {
Container masterContainer = appAttempt.getMasterContainer();
// container ID stored in AppAttempt.
if (masterContainer != null && masterContainer.getId().equals(rmContainer.getContainerId())) {
((RMContainerImpl) rmContainer).setAMContainer(true);
}
}
if (schedulerAttempt.getPendingRelease().remove(container.getContainerId())) {
// release the container
rmContainer.handle(new RMContainerFinishedEvent(container.getContainerId(), SchedulerUtils.createAbnormalContainerStatus(container.getContainerId(), SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED));
LOG.info(container.getContainerId() + " is released by application.");
}
}
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent in project hadoop by apache.
the class AbstractYarnScheduler method completedContainer.
@VisibleForTesting
@Private
public // clean up a completed container
void completedContainer(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) {
if (rmContainer == null) {
LOG.info("Container " + containerStatus.getContainerId() + " completed with event " + event + ", but corresponding RMContainer doesn't exist.");
return;
}
if (rmContainer.getExecutionType() == ExecutionType.GUARANTEED) {
completedContainerInternal(rmContainer, containerStatus, event);
completeOustandingUpdatesWhichAreReserved(rmContainer, containerStatus, event);
} else {
ContainerId containerId = rmContainer.getContainerId();
// Inform the container
rmContainer.handle(new RMContainerFinishedEvent(containerId, containerStatus, event));
SchedulerApplicationAttempt schedulerAttempt = getCurrentAttemptForContainer(containerId);
if (schedulerAttempt != null) {
schedulerAttempt.removeRMContainer(containerId);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Completed container: " + rmContainer.getContainerId() + " in state: " + rmContainer.getState() + " event:" + event);
}
getSchedulerNode(rmContainer.getNodeId()).releaseContainer(rmContainer.getContainerId(), false);
}
// If the container is getting killed in ACQUIRED state, the requester (AM
// for regular containers and RM itself for AM container) will not know what
// happened. Simply add the ResourceRequest back again so that requester
// doesn't need to do anything conditionally.
recoverResourceRequestForContainer(rmContainer);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent in project hadoop by apache.
the class FSAppAttempt method containerCompleted.
void containerCompleted(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) {
try {
writeLock.lock();
Container container = rmContainer.getContainer();
ContainerId containerId = container.getId();
// Remove from the list of containers
if (liveContainers.remove(containerId) == null) {
LOG.info("Additional complete request on completed container " + rmContainer.getContainerId());
return;
}
// Remove from the list of newly allocated containers if found
newlyAllocatedContainers.remove(rmContainer);
// Inform the container
rmContainer.handle(new RMContainerFinishedEvent(containerId, containerStatus, event));
if (LOG.isDebugEnabled()) {
LOG.debug("Completed container: " + rmContainer.getContainerId() + " in state: " + rmContainer.getState() + " event:" + event);
}
untrackContainerForPreemption(rmContainer);
if (containerStatus.getDiagnostics().equals(SchedulerUtils.PREEMPTED_CONTAINER)) {
queue.getMetrics().preemptContainer();
}
Resource containerResource = rmContainer.getContainer().getResource();
RMAuditLogger.logSuccess(getUser(), AuditConstants.RELEASE_CONTAINER, "SchedulerApp", getApplicationId(), containerId, containerResource);
// Update usage metrics
queue.getMetrics().releaseResources(getUser(), 1, containerResource);
this.attemptResourceUsage.decUsed(containerResource);
// Clear resource utilization metrics cache.
lastMemoryAggregateAllocationUpdateTime = -1;
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent in project hadoop by apache.
the class FiCaSchedulerApp method containerCompleted.
public boolean containerCompleted(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event, String partition) {
try {
writeLock.lock();
ContainerId containerId = rmContainer.getContainerId();
// Remove from the list of containers
if (null == liveContainers.remove(containerId)) {
return false;
}
// Remove from the list of newly allocated containers if found
newlyAllocatedContainers.remove(rmContainer);
// Inform the container
rmContainer.handle(new RMContainerFinishedEvent(containerId, containerStatus, event));
containersToPreempt.remove(containerId);
Resource containerResource = rmContainer.getContainer().getResource();
RMAuditLogger.logSuccess(getUser(), AuditConstants.RELEASE_CONTAINER, "SchedulerApp", getApplicationId(), containerId, containerResource);
// Update usage metrics
queue.getMetrics().releaseResources(getUser(), 1, containerResource);
attemptResourceUsage.decUsed(partition, containerResource);
// Clear resource utilization metrics cache.
lastMemoryAggregateAllocationUpdateTime = -1;
return true;
} finally {
writeLock.unlock();
}
}
Aggregations