Search in sources :

Example 1 with RMContainerFinishedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent in project hadoop by apache.

the class AbstractYarnScheduler method recoverContainersOnNode.

public void recoverContainersOnNode(List<NMContainerStatus> containerReports, RMNode nm) {
    try {
        writeLock.lock();
        if (!rmContext.isWorkPreservingRecoveryEnabled() || containerReports == null || (containerReports != null && containerReports.isEmpty())) {
            return;
        }
        for (NMContainerStatus container : containerReports) {
            ApplicationId appId = container.getContainerId().getApplicationAttemptId().getApplicationId();
            RMApp rmApp = rmContext.getRMApps().get(appId);
            if (rmApp == null) {
                LOG.error("Skip recovering container " + container + " for unknown application.");
                killOrphanContainerOnNode(nm, container);
                continue;
            }
            SchedulerApplication<T> schedulerApp = applications.get(appId);
            if (schedulerApp == null) {
                LOG.info("Skip recovering container  " + container + " for unknown SchedulerApplication. " + "Application current state is " + rmApp.getState());
                killOrphanContainerOnNode(nm, container);
                continue;
            }
            LOG.info("Recovering container " + container);
            SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
            if (!rmApp.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()) {
                // Do not recover containers for stopped attempt or previous attempt.
                if (schedulerAttempt.isStopped() || !schedulerAttempt.getApplicationAttemptId().equals(container.getContainerId().getApplicationAttemptId())) {
                    LOG.info("Skip recovering container " + container + " for already stopped attempt.");
                    killOrphanContainerOnNode(nm, container);
                    continue;
                }
            }
            // create container
            RMContainer rmContainer = recoverAndCreateContainer(container, nm);
            // recover RMContainer
            rmContainer.handle(new RMContainerRecoverEvent(container.getContainerId(), container));
            // recover scheduler node
            SchedulerNode schedulerNode = nodeTracker.getNode(nm.getNodeID());
            schedulerNode.recoverContainer(rmContainer);
            // recover queue: update headroom etc.
            Queue queue = schedulerAttempt.getQueue();
            queue.recoverContainer(getClusterResource(), schedulerAttempt, rmContainer);
            // recover scheduler attempt
            schedulerAttempt.recoverContainer(schedulerNode, rmContainer);
            // set master container for the current running AMContainer for this
            // attempt.
            RMAppAttempt appAttempt = rmApp.getCurrentAppAttempt();
            if (appAttempt != null) {
                Container masterContainer = appAttempt.getMasterContainer();
                // container ID stored in AppAttempt.
                if (masterContainer != null && masterContainer.getId().equals(rmContainer.getContainerId())) {
                    ((RMContainerImpl) rmContainer).setAMContainer(true);
                }
            }
            if (schedulerAttempt.getPendingRelease().remove(container.getContainerId())) {
                // release the container
                rmContainer.handle(new RMContainerFinishedEvent(container.getContainerId(), SchedulerUtils.createAbnormalContainerStatus(container.getContainerId(), SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED));
                LOG.info(container.getContainerId() + " is released by application.");
            }
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) RMContainerRecoverEvent(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerRecoverEvent) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) RMContainerImpl(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) RMContainerFinishedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Example 2 with RMContainerFinishedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent in project hadoop by apache.

the class AbstractYarnScheduler method completedContainer.

@VisibleForTesting
@Private
public // clean up a completed container
void completedContainer(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) {
    if (rmContainer == null) {
        LOG.info("Container " + containerStatus.getContainerId() + " completed with event " + event + ", but corresponding RMContainer doesn't exist.");
        return;
    }
    if (rmContainer.getExecutionType() == ExecutionType.GUARANTEED) {
        completedContainerInternal(rmContainer, containerStatus, event);
        completeOustandingUpdatesWhichAreReserved(rmContainer, containerStatus, event);
    } else {
        ContainerId containerId = rmContainer.getContainerId();
        // Inform the container
        rmContainer.handle(new RMContainerFinishedEvent(containerId, containerStatus, event));
        SchedulerApplicationAttempt schedulerAttempt = getCurrentAttemptForContainer(containerId);
        if (schedulerAttempt != null) {
            schedulerAttempt.removeRMContainer(containerId);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Completed container: " + rmContainer.getContainerId() + " in state: " + rmContainer.getState() + " event:" + event);
        }
        getSchedulerNode(rmContainer.getNodeId()).releaseContainer(rmContainer.getContainerId(), false);
    }
    // If the container is getting killed in ACQUIRED state, the requester (AM
    // for regular containers and RM itself for AM container) will not know what
    // happened. Simply add the ResourceRequest back again so that requester
    // doesn't need to do anything conditionally.
    recoverResourceRequestForContainer(rmContainer);
}
Also used : ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) RMContainerFinishedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Private(org.apache.hadoop.classification.InterfaceAudience.Private)

Example 3 with RMContainerFinishedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent in project hadoop by apache.

the class FSAppAttempt method containerCompleted.

void containerCompleted(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) {
    try {
        writeLock.lock();
        Container container = rmContainer.getContainer();
        ContainerId containerId = container.getId();
        // Remove from the list of containers
        if (liveContainers.remove(containerId) == null) {
            LOG.info("Additional complete request on completed container " + rmContainer.getContainerId());
            return;
        }
        // Remove from the list of newly allocated containers if found
        newlyAllocatedContainers.remove(rmContainer);
        // Inform the container
        rmContainer.handle(new RMContainerFinishedEvent(containerId, containerStatus, event));
        if (LOG.isDebugEnabled()) {
            LOG.debug("Completed container: " + rmContainer.getContainerId() + " in state: " + rmContainer.getState() + " event:" + event);
        }
        untrackContainerForPreemption(rmContainer);
        if (containerStatus.getDiagnostics().equals(SchedulerUtils.PREEMPTED_CONTAINER)) {
            queue.getMetrics().preemptContainer();
        }
        Resource containerResource = rmContainer.getContainer().getResource();
        RMAuditLogger.logSuccess(getUser(), AuditConstants.RELEASE_CONTAINER, "SchedulerApp", getApplicationId(), containerId, containerResource);
        // Update usage metrics
        queue.getMetrics().releaseResources(getUser(), 1, containerResource);
        this.attemptResourceUsage.decUsed(containerResource);
        // Clear resource utilization metrics cache.
        lastMemoryAggregateAllocationUpdateTime = -1;
    } finally {
        writeLock.unlock();
    }
}
Also used : RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) RMContainerFinishedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent) Resource(org.apache.hadoop.yarn.api.records.Resource)

Example 4 with RMContainerFinishedEvent

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent in project hadoop by apache.

the class FiCaSchedulerApp method containerCompleted.

public boolean containerCompleted(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event, String partition) {
    try {
        writeLock.lock();
        ContainerId containerId = rmContainer.getContainerId();
        // Remove from the list of containers
        if (null == liveContainers.remove(containerId)) {
            return false;
        }
        // Remove from the list of newly allocated containers if found
        newlyAllocatedContainers.remove(rmContainer);
        // Inform the container
        rmContainer.handle(new RMContainerFinishedEvent(containerId, containerStatus, event));
        containersToPreempt.remove(containerId);
        Resource containerResource = rmContainer.getContainer().getResource();
        RMAuditLogger.logSuccess(getUser(), AuditConstants.RELEASE_CONTAINER, "SchedulerApp", getApplicationId(), containerId, containerResource);
        // Update usage metrics
        queue.getMetrics().releaseResources(getUser(), 1, containerResource);
        attemptResourceUsage.decUsed(partition, containerResource);
        // Clear resource utilization metrics cache.
        lastMemoryAggregateAllocationUpdateTime = -1;
        return true;
    } finally {
        writeLock.unlock();
    }
}
Also used : ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) RMContainerFinishedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent) Resource(org.apache.hadoop.yarn.api.records.Resource)

Aggregations

RMContainerFinishedEvent (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent)4 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)3 Container (org.apache.hadoop.yarn.api.records.Container)2 Resource (org.apache.hadoop.yarn.api.records.Resource)2 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Private (org.apache.hadoop.classification.InterfaceAudience.Private)1 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)1 NMContainerStatus (org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus)1 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)1 RMAppAttempt (org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt)1 RMContainerImpl (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl)1 RMContainerRecoverEvent (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerRecoverEvent)1