Search in sources :

Example 71 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class FifoScheduler method removeNode.

private synchronized void removeNode(RMNode nodeInfo) {
    FiCaSchedulerNode node = nodeTracker.getNode(nodeInfo.getNodeID());
    if (node == null) {
        return;
    }
    // Kill running containers
    for (RMContainer container : node.getCopiedListOfRunningContainers()) {
        super.completedContainer(container, SchedulerUtils.createAbnormalContainerStatus(container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL);
    }
    nodeTracker.removeNode(nodeInfo.getNodeID());
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)

Example 72 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class FifoScheduler method completedContainerInternal.

@Lock(FifoScheduler.class)
@Override
protected synchronized void completedContainerInternal(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) {
    // Get the application for the finished container
    Container container = rmContainer.getContainer();
    FifoAppAttempt application = getCurrentAttemptForContainer(container.getId());
    ApplicationId appId = container.getId().getApplicationAttemptId().getApplicationId();
    // Get the node on which the container was allocated
    FiCaSchedulerNode node = (FiCaSchedulerNode) getNode(container.getNodeId());
    if (application == null) {
        LOG.info("Unknown application: " + appId + " released container " + container.getId() + " on node: " + node + " with event: " + event);
        return;
    }
    // Inform the application
    application.containerCompleted(rmContainer, containerStatus, event, RMNodeLabelsManager.NO_LABEL);
    // Inform the node
    node.releaseContainer(rmContainer.getContainerId(), false);
    // Update total usage
    Resources.subtractFrom(usedResource, container.getResource());
    LOG.info("Application attempt " + application.getApplicationAttemptId() + " released container " + container.getId() + " on node: " + node + " with event: " + event);
}
Also used : RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Lock(org.apache.hadoop.yarn.server.utils.Lock)

Example 73 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class FSPreemptionThread method preemptContainers.

private void preemptContainers(List<RMContainer> containers) {
    // Warn application about containers to be killed
    for (RMContainer container : containers) {
        ApplicationAttemptId appAttemptId = container.getApplicationAttemptId();
        FSAppAttempt app = scheduler.getSchedulerApp(appAttemptId);
        LOG.info("Preempting container " + container + " from queue " + app.getQueueName());
        app.trackContainerForPreemption(container);
    }
    // Schedule timer task to kill containers
    preemptionTimer.schedule(new PreemptContainersTask(containers), warnTimeBeforeKill);
}
Also used : ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)

Example 74 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class AbstractYarnScheduler method recoverContainersOnNode.

public void recoverContainersOnNode(List<NMContainerStatus> containerReports, RMNode nm) {
    try {
        writeLock.lock();
        if (!rmContext.isWorkPreservingRecoveryEnabled() || containerReports == null || (containerReports != null && containerReports.isEmpty())) {
            return;
        }
        for (NMContainerStatus container : containerReports) {
            ApplicationId appId = container.getContainerId().getApplicationAttemptId().getApplicationId();
            RMApp rmApp = rmContext.getRMApps().get(appId);
            if (rmApp == null) {
                LOG.error("Skip recovering container " + container + " for unknown application.");
                killOrphanContainerOnNode(nm, container);
                continue;
            }
            SchedulerApplication<T> schedulerApp = applications.get(appId);
            if (schedulerApp == null) {
                LOG.info("Skip recovering container  " + container + " for unknown SchedulerApplication. " + "Application current state is " + rmApp.getState());
                killOrphanContainerOnNode(nm, container);
                continue;
            }
            LOG.info("Recovering container " + container);
            SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt();
            if (!rmApp.getApplicationSubmissionContext().getKeepContainersAcrossApplicationAttempts()) {
                // Do not recover containers for stopped attempt or previous attempt.
                if (schedulerAttempt.isStopped() || !schedulerAttempt.getApplicationAttemptId().equals(container.getContainerId().getApplicationAttemptId())) {
                    LOG.info("Skip recovering container " + container + " for already stopped attempt.");
                    killOrphanContainerOnNode(nm, container);
                    continue;
                }
            }
            // create container
            RMContainer rmContainer = recoverAndCreateContainer(container, nm);
            // recover RMContainer
            rmContainer.handle(new RMContainerRecoverEvent(container.getContainerId(), container));
            // recover scheduler node
            SchedulerNode schedulerNode = nodeTracker.getNode(nm.getNodeID());
            schedulerNode.recoverContainer(rmContainer);
            // recover queue: update headroom etc.
            Queue queue = schedulerAttempt.getQueue();
            queue.recoverContainer(getClusterResource(), schedulerAttempt, rmContainer);
            // recover scheduler attempt
            schedulerAttempt.recoverContainer(schedulerNode, rmContainer);
            // set master container for the current running AMContainer for this
            // attempt.
            RMAppAttempt appAttempt = rmApp.getCurrentAppAttempt();
            if (appAttempt != null) {
                Container masterContainer = appAttempt.getMasterContainer();
                // container ID stored in AppAttempt.
                if (masterContainer != null && masterContainer.getId().equals(rmContainer.getContainerId())) {
                    ((RMContainerImpl) rmContainer).setAMContainer(true);
                }
            }
            if (schedulerAttempt.getPendingRelease().remove(container.getContainerId())) {
                // release the container
                rmContainer.handle(new RMContainerFinishedEvent(container.getContainerId(), SchedulerUtils.createAbnormalContainerStatus(container.getContainerId(), SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED));
                LOG.info(container.getContainerId() + " is released by application.");
            }
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) RMAppAttempt(org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) RMContainerRecoverEvent(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerRecoverEvent) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) RMContainerImpl(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl) NMContainerStatus(org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus) RMContainerFinishedEvent(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Example 75 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class AbstractYarnScheduler method createSchedContainerChangeRequest.

/**
   * Sanity check increase/decrease request, and return
   * SchedulerContainerResourceChangeRequest according to given
   * UpdateContainerRequest.
   * 
   * <pre>
   * - Returns non-null value means validation succeeded
   * - Throw exception when any other error happens
   * </pre>
   */
private SchedContainerChangeRequest createSchedContainerChangeRequest(UpdateContainerRequest request, boolean increase) throws YarnException {
    ContainerId containerId = request.getContainerId();
    RMContainer rmContainer = getRMContainer(containerId);
    if (null == rmContainer) {
        String msg = "Failed to get rmContainer for " + (increase ? "increase" : "decrease") + " request, with container-id=" + containerId;
        throw new InvalidResourceRequestException(msg);
    }
    SchedulerNode schedulerNode = getSchedulerNode(rmContainer.getAllocatedNode());
    return new SchedContainerChangeRequest(this.rmContext, schedulerNode, rmContainer, request.getCapability());
}
Also used : InvalidResourceRequestException(org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)

Aggregations

RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)166 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)55 Resource (org.apache.hadoop.yarn.api.records.Resource)49 Container (org.apache.hadoop.yarn.api.records.Container)48 Test (org.junit.Test)45 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)41 ArrayList (java.util.ArrayList)29 NodeId (org.apache.hadoop.yarn.api.records.NodeId)29 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)29 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)28 FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)21 RMContainerImpl (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl)18 HashMap (java.util.HashMap)17 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)17 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)17 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)17 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)15 Priority (org.apache.hadoop.yarn.api.records.Priority)14 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)13 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)12