Search in sources :

Example 6 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class SLSCapacityScheduler method updateQueueWithNodeUpdate.

private void updateQueueWithNodeUpdate(NodeUpdateSchedulerEventWrapper eventWrapper) {
    RMNodeWrapper node = (RMNodeWrapper) eventWrapper.getRMNode();
    List<UpdatedContainerInfo> containerList = node.getContainerUpdates();
    for (UpdatedContainerInfo info : containerList) {
        for (ContainerStatus status : info.getCompletedContainers()) {
            ContainerId containerId = status.getContainerId();
            SchedulerAppReport app = super.getSchedulerAppInfo(containerId.getApplicationAttemptId());
            if (app == null) {
                // information.
                continue;
            }
            String queue = appQueueMap.get(containerId.getApplicationAttemptId());
            int releasedMemory = 0, releasedVCores = 0;
            if (status.getExitStatus() == ContainerExitStatus.SUCCESS) {
                for (RMContainer rmc : app.getLiveContainers()) {
                    if (rmc.getContainerId() == containerId) {
                        releasedMemory += rmc.getContainer().getResource().getMemorySize();
                        releasedVCores += rmc.getContainer().getResource().getVirtualCores();
                        break;
                    }
                }
            } else if (status.getExitStatus() == ContainerExitStatus.ABORTED) {
                if (preemptionContainerMap.containsKey(containerId)) {
                    Resource preResource = preemptionContainerMap.get(containerId);
                    releasedMemory += preResource.getMemorySize();
                    releasedVCores += preResource.getVirtualCores();
                    preemptionContainerMap.remove(containerId);
                }
            }
            // update queue counters
            updateQueueMetrics(queue, releasedMemory, releasedVCores);
        }
    }
}
Also used : ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) UpdatedContainerInfo(org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo) Resource(org.apache.hadoop.yarn.api.records.Resource) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) SchedulerAppReport(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport)

Example 7 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class ResourceSchedulerWrapper method handle.

@Override
public void handle(SchedulerEvent schedulerEvent) {
    // metrics off
    if (!metricsON) {
        scheduler.handle(schedulerEvent);
        return;
    }
    if (!running)
        running = true;
    // metrics on
    Timer.Context handlerTimer = null;
    Timer.Context operationTimer = null;
    NodeUpdateSchedulerEventWrapper eventWrapper;
    try {
        //if (schedulerEvent instanceof NodeUpdateSchedulerEvent) {
        if (schedulerEvent.getType() == SchedulerEventType.NODE_UPDATE && schedulerEvent instanceof NodeUpdateSchedulerEvent) {
            eventWrapper = new NodeUpdateSchedulerEventWrapper((NodeUpdateSchedulerEvent) schedulerEvent);
            schedulerEvent = eventWrapper;
            updateQueueWithNodeUpdate(eventWrapper);
        } else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED && schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) {
            // check if having AM Container, update resource usage information
            AppAttemptRemovedSchedulerEvent appRemoveEvent = (AppAttemptRemovedSchedulerEvent) schedulerEvent;
            ApplicationAttemptId appAttemptId = appRemoveEvent.getApplicationAttemptID();
            String queue = appQueueMap.get(appAttemptId.getApplicationId());
            SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
            if (!app.getLiveContainers().isEmpty()) {
                // have 0 or 1
                // should have one container which is AM container
                RMContainer rmc = app.getLiveContainers().iterator().next();
                updateQueueMetrics(queue, rmc.getContainer().getResource().getMemorySize(), rmc.getContainer().getResource().getVirtualCores());
            }
        }
        handlerTimer = schedulerHandleTimer.time();
        operationTimer = schedulerHandleTimerMap.get(schedulerEvent.getType()).time();
        scheduler.handle(schedulerEvent);
    } finally {
        if (handlerTimer != null)
            handlerTimer.stop();
        if (operationTimer != null)
            operationTimer.stop();
        schedulerHandleCounter.inc();
        schedulerHandleCounterMap.get(schedulerEvent.getType()).inc();
        if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED && schedulerEvent instanceof AppRemovedSchedulerEvent) {
            SLSRunner.decreaseRemainingApps();
            AppRemovedSchedulerEvent appRemoveEvent = (AppRemovedSchedulerEvent) schedulerEvent;
            appQueueMap.remove(appRemoveEvent.getApplicationID());
        } else if (schedulerEvent.getType() == SchedulerEventType.APP_ADDED && schedulerEvent instanceof AppAddedSchedulerEvent) {
            AppAddedSchedulerEvent appAddEvent = (AppAddedSchedulerEvent) schedulerEvent;
            String queueName = appAddEvent.getQueue();
            appQueueMap.put(appAddEvent.getApplicationId(), queueName);
        }
    }
}
Also used : AppRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent) NodeUpdateSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent) Timer(com.codahale.metrics.Timer) AppAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent) AppAttemptRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) SchedulerAppReport(org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport)

Example 8 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class QueuePriorityContainerCandidateSelector method selectCandidates.

@Override
public Map<ApplicationAttemptId, Set<RMContainer>> selectCandidates(Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates, Resource clusterResource, Resource totalPreemptedResourceAllowed) {
    // Initialize digraph from queues
    // TODO (wangda): only do this when queue refreshed.
    priorityDigraph.clear();
    intializePriorityDigraph();
    // direct return.
    if (priorityDigraph.isEmpty()) {
        return selectedCandidates;
    }
    // Save parameters to be shared by other methods
    this.selectedCandidates = selectedCandidates;
    this.clusterResource = clusterResource;
    this.totalPreemptionAllowed = totalPreemptedResourceAllowed;
    toPreemptedFromOtherQueues.clear();
    reservedContainers = new ArrayList<>();
    // Clear temp-scheduler-node-map every time when doing selection of
    // containers.
    tempSchedulerNodeMap.clear();
    touchedNodes = new HashSet<>();
    // Add all reserved containers for analysis
    List<FiCaSchedulerNode> allSchedulerNodes = preemptionContext.getScheduler().getAllNodes();
    for (FiCaSchedulerNode node : allSchedulerNodes) {
        RMContainer reservedContainer = node.getReservedContainer();
        if (null != reservedContainer) {
            // container belongs to has high priority than at least one queue
            if (priorityDigraph.containsRow(reservedContainer.getQueueName())) {
                reservedContainers.add(reservedContainer);
            }
        }
    }
    // Sort reserved container by creation time
    Collections.sort(reservedContainers, CONTAINER_CREATION_TIME_COMPARATOR);
    long currentTime = System.currentTimeMillis();
    // From the begining of the list
    for (RMContainer reservedContainer : reservedContainers) {
        // and cannot be allocated after minTimeout
        if (currentTime - reservedContainer.getCreationTime() < minTimeout) {
            continue;
        }
        FiCaSchedulerNode node = preemptionContext.getScheduler().getNode(reservedContainer.getReservedNode());
        if (null == node) {
            // Something is wrong, ignore
            continue;
        }
        List<RMContainer> newlySelectedToBePreemptContainers = new ArrayList<>();
        // Check if we can preempt for this queue
        // We will skip if the demanding queue is already satisfied.
        String demandingQueueName = reservedContainer.getQueueName();
        boolean demandingQueueSatisfied = isQueueSatisfied(demandingQueueName, node.getPartition());
        // We will continue check if it is possible to preempt reserved container
        // from the node.
        boolean canPreempt = false;
        if (!demandingQueueSatisfied) {
            canPreempt = canPreemptEnoughResourceForAsked(reservedContainer.getReservedResource(), demandingQueueName, node, false, newlySelectedToBePreemptContainers);
        }
        // preemption others
        if (canPreempt) {
            touchedNodes.add(node.getNodeID());
            if (LOG.isDebugEnabled()) {
                LOG.debug("Trying to preempt following containers to make reserved " + "container=" + reservedContainer.getContainerId() + " on node=" + node.getNodeID() + " can be allocated:");
            }
            // Update to-be-preempt
            incToPreempt(demandingQueueName, node.getPartition(), reservedContainer.getReservedResource());
            for (RMContainer c : newlySelectedToBePreemptContainers) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug(" --container=" + c.getContainerId() + " resource=" + c.getReservedResource());
                }
                Set<RMContainer> containers = selectedCandidates.get(c.getApplicationAttemptId());
                if (null == containers) {
                    containers = new HashSet<>();
                    selectedCandidates.put(c.getApplicationAttemptId(), containers);
                }
                containers.add(c);
                // Update totalPreemptionResourceAllowed
                Resources.subtractFrom(totalPreemptedResourceAllowed, c.getAllocatedResource());
            }
        } else if (!demandingQueueSatisfied) {
            //
            if (allowMoveReservation) {
                tryToMakeBetterReservationPlacement(reservedContainer, allSchedulerNodes);
            }
        }
    }
    return selectedCandidates;
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) ArrayList(java.util.ArrayList) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)

Example 9 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class ReservedContainerCandidatesSelector method getPreemptionCandidatesOnNode.

/**
   * Try to check if we can preempt resources for reserved container in given node
   * @param node
   * @param cluster
   * @param queueToPreemptableResourceByPartition it's a map of
   *                 <queueName, <partition, preemptable-resource>>
   * @param readOnly do we want to modify preemptable resource after we selected
   *                 candidates
   * @return NodeForPreemption if it's possible to preempt containers on the node
   * to satisfy reserved resource
   */
private NodeForPreemption getPreemptionCandidatesOnNode(FiCaSchedulerNode node, Resource cluster, Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition, Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates, Resource totalPreemptionAllowed, boolean readOnly) {
    RMContainer reservedContainer = node.getReservedContainer();
    Resource available = Resources.clone(node.getUnallocatedResource());
    Resource totalSelected = Resources.createResource(0);
    List<RMContainer> sortedRunningContainers = node.getCopiedListOfRunningContainers();
    List<RMContainer> selectedContainers = new ArrayList<>();
    Map<ContainerId, RMContainer> killableContainers = node.getKillableContainers();
    // Sort running container by launch time, we preferred to preempt recent
    // launched preempt container
    Collections.sort(sortedRunningContainers, new Comparator<RMContainer>() {

        @Override
        public int compare(RMContainer o1, RMContainer o2) {
            return -1 * o1.getContainerId().compareTo(o2.getContainerId());
        }
    });
    // First check: can we preempt containers to allocate the
    // reservedContainer?
    boolean canAllocateReservedContainer = false;
    // At least, we can get available + killable resources from this node
    Resource cur = Resources.add(available, node.getTotalKillableResources());
    String partition = node.getPartition();
    // Avoid preempt any container if required <= available + killable
    if (Resources.fitsIn(rc, cluster, reservedContainer.getReservedResource(), cur)) {
        return null;
    }
    // Extra cost of am container preemption
    float amPreemptionCost = 0f;
    for (RMContainer c : sortedRunningContainers) {
        String containerQueueName = c.getQueueName();
        // Skip container if it is already marked killable
        if (killableContainers.containsKey(c.getContainerId())) {
            continue;
        }
        // selected. Here for safety, avoid preempt AM container in any cases
        if (c.isAMContainer()) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Skip selecting AM container on host=" + node.getNodeID() + " AM container=" + c.getContainerId());
            }
            continue;
        }
        // Can we preempt container c?
        // Check if we have quota to preempt this container
        boolean canPreempt = tryToPreemptFromQueue(cluster, containerQueueName, partition, queueToPreemptableResourceByPartition, c.getAllocatedResource(), totalPreemptionAllowed, readOnly);
        // If we can, add to selected container, and change resource accordingly.
        if (canPreempt) {
            if (!CapacitySchedulerPreemptionUtils.isContainerAlreadySelected(c, selectedCandidates)) {
                if (!readOnly) {
                    selectedContainers.add(c);
                }
                Resources.addTo(totalSelected, c.getAllocatedResource());
            }
            Resources.addTo(cur, c.getAllocatedResource());
            if (Resources.fitsIn(rc, cluster, reservedContainer.getReservedResource(), cur)) {
                canAllocateReservedContainer = true;
                break;
            }
        }
    }
    if (!canAllocateReservedContainer) {
        if (!readOnly) {
            // Revert queue preemption quotas
            for (RMContainer c : selectedContainers) {
                Resource res = getPreemptableResource(c.getQueueName(), partition, queueToPreemptableResourceByPartition);
                if (null == res) {
                    // Ignore such failures.
                    continue;
                }
                Resources.addTo(res, c.getAllocatedResource());
            }
        }
        return null;
    }
    float ratio = Resources.ratio(rc, totalSelected, reservedContainer.getReservedResource());
    // Compute preemption score
    NodeForPreemption nfp = new NodeForPreemption(ratio + amPreemptionCost, node, selectedContainers);
    return nfp;
}
Also used : ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) Resource(org.apache.hadoop.yarn.api.records.Resource) ArrayList(java.util.ArrayList) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)

Example 10 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class FifoCandidatesSelector method selectCandidates.

@Override
public Map<ApplicationAttemptId, Set<RMContainer>> selectCandidates(Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates, Resource clusterResource, Resource totalPreemptionAllowed) {
    // Calculate how much resources we need to preempt
    preemptableAmountCalculator.computeIdealAllocation(clusterResource, totalPreemptionAllowed);
    // Previous selectors (with higher priority) could have already
    // selected containers. We need to deduct preemptable resources
    // based on already selected candidates.
    CapacitySchedulerPreemptionUtils.deductPreemptableResourcesBasedSelectedCandidates(preemptionContext, selectedCandidates);
    List<RMContainer> skippedAMContainerlist = new ArrayList<>();
    // Loop all leaf queues
    for (String queueName : preemptionContext.getLeafQueueNames()) {
        // check if preemption disabled for the queue
        if (preemptionContext.getQueueByPartition(queueName, RMNodeLabelsManager.NO_LABEL).preemptionDisabled) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("skipping from queue=" + queueName + " because it's a non-preemptable queue");
            }
            continue;
        }
        // compute resToObtainByPartition considered inter-queue preemption
        LeafQueue leafQueue = preemptionContext.getQueueByPartition(queueName, RMNodeLabelsManager.NO_LABEL).leafQueue;
        Map<String, Resource> resToObtainByPartition = CapacitySchedulerPreemptionUtils.getResToObtainByPartitionForLeafQueue(preemptionContext, queueName, clusterResource);
        try {
            leafQueue.getReadLock().lock();
            // go through all ignore-partition-exclusivity containers first to make
            // sure such containers will be preemptionCandidates first
            Map<String, TreeSet<RMContainer>> ignorePartitionExclusivityContainers = leafQueue.getIgnoreExclusivityRMContainers();
            for (String partition : resToObtainByPartition.keySet()) {
                if (ignorePartitionExclusivityContainers.containsKey(partition)) {
                    TreeSet<RMContainer> rmContainers = ignorePartitionExclusivityContainers.get(partition);
                    // application's containers will be preemptionCandidates first.
                    for (RMContainer c : rmContainers.descendingSet()) {
                        if (CapacitySchedulerPreemptionUtils.isContainerAlreadySelected(c, selectedCandidates)) {
                            // Skip already selected containers
                            continue;
                        }
                        boolean preempted = CapacitySchedulerPreemptionUtils.tryPreemptContainerAndDeductResToObtain(rc, preemptionContext, resToObtainByPartition, c, clusterResource, selectedCandidates, totalPreemptionAllowed);
                        if (!preempted) {
                            continue;
                        }
                    }
                }
            }
            // preempt other containers
            Resource skippedAMSize = Resource.newInstance(0, 0);
            Iterator<FiCaSchedulerApp> desc = leafQueue.getOrderingPolicy().getPreemptionIterator();
            while (desc.hasNext()) {
                FiCaSchedulerApp fc = desc.next();
                // more preemption is needed
                if (resToObtainByPartition.isEmpty()) {
                    break;
                }
                preemptFrom(fc, clusterResource, resToObtainByPartition, skippedAMContainerlist, skippedAMSize, selectedCandidates, totalPreemptionAllowed);
            }
            // Can try preempting AMContainers (still saving atmost
            // maxAMCapacityForThisQueue AMResource's) if more resources are
            // required to be preemptionCandidates from this Queue.
            Resource maxAMCapacityForThisQueue = Resources.multiply(Resources.multiply(clusterResource, leafQueue.getAbsoluteCapacity()), leafQueue.getMaxAMResourcePerQueuePercent());
            preemptAMContainers(clusterResource, selectedCandidates, skippedAMContainerlist, resToObtainByPartition, skippedAMSize, maxAMCapacityForThisQueue, totalPreemptionAllowed);
        } finally {
            leafQueue.getReadLock().unlock();
        }
    }
    return selectedCandidates;
}
Also used : ArrayList(java.util.ArrayList) Resource(org.apache.hadoop.yarn.api.records.Resource) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) LeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue) TreeSet(java.util.TreeSet) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)

Aggregations

RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)166 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)55 Resource (org.apache.hadoop.yarn.api.records.Resource)49 Container (org.apache.hadoop.yarn.api.records.Container)48 Test (org.junit.Test)45 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)41 ArrayList (java.util.ArrayList)29 NodeId (org.apache.hadoop.yarn.api.records.NodeId)29 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)29 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)28 FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)21 RMContainerImpl (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl)18 HashMap (java.util.HashMap)17 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)17 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)17 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)17 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)15 Priority (org.apache.hadoop.yarn.api.records.Priority)14 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)13 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)12