Search in sources :

Example 1 with LeafQueue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue in project hadoop by apache.

the class FifoCandidatesSelector method selectCandidates.

@Override
public Map<ApplicationAttemptId, Set<RMContainer>> selectCandidates(Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates, Resource clusterResource, Resource totalPreemptionAllowed) {
    // Calculate how much resources we need to preempt
    preemptableAmountCalculator.computeIdealAllocation(clusterResource, totalPreemptionAllowed);
    // Previous selectors (with higher priority) could have already
    // selected containers. We need to deduct preemptable resources
    // based on already selected candidates.
    CapacitySchedulerPreemptionUtils.deductPreemptableResourcesBasedSelectedCandidates(preemptionContext, selectedCandidates);
    List<RMContainer> skippedAMContainerlist = new ArrayList<>();
    // Loop all leaf queues
    for (String queueName : preemptionContext.getLeafQueueNames()) {
        // check if preemption disabled for the queue
        if (preemptionContext.getQueueByPartition(queueName, RMNodeLabelsManager.NO_LABEL).preemptionDisabled) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("skipping from queue=" + queueName + " because it's a non-preemptable queue");
            }
            continue;
        }
        // compute resToObtainByPartition considered inter-queue preemption
        LeafQueue leafQueue = preemptionContext.getQueueByPartition(queueName, RMNodeLabelsManager.NO_LABEL).leafQueue;
        Map<String, Resource> resToObtainByPartition = CapacitySchedulerPreemptionUtils.getResToObtainByPartitionForLeafQueue(preemptionContext, queueName, clusterResource);
        try {
            leafQueue.getReadLock().lock();
            // go through all ignore-partition-exclusivity containers first to make
            // sure such containers will be preemptionCandidates first
            Map<String, TreeSet<RMContainer>> ignorePartitionExclusivityContainers = leafQueue.getIgnoreExclusivityRMContainers();
            for (String partition : resToObtainByPartition.keySet()) {
                if (ignorePartitionExclusivityContainers.containsKey(partition)) {
                    TreeSet<RMContainer> rmContainers = ignorePartitionExclusivityContainers.get(partition);
                    // application's containers will be preemptionCandidates first.
                    for (RMContainer c : rmContainers.descendingSet()) {
                        if (CapacitySchedulerPreemptionUtils.isContainerAlreadySelected(c, selectedCandidates)) {
                            // Skip already selected containers
                            continue;
                        }
                        boolean preempted = CapacitySchedulerPreemptionUtils.tryPreemptContainerAndDeductResToObtain(rc, preemptionContext, resToObtainByPartition, c, clusterResource, selectedCandidates, totalPreemptionAllowed);
                        if (!preempted) {
                            continue;
                        }
                    }
                }
            }
            // preempt other containers
            Resource skippedAMSize = Resource.newInstance(0, 0);
            Iterator<FiCaSchedulerApp> desc = leafQueue.getOrderingPolicy().getPreemptionIterator();
            while (desc.hasNext()) {
                FiCaSchedulerApp fc = desc.next();
                // more preemption is needed
                if (resToObtainByPartition.isEmpty()) {
                    break;
                }
                preemptFrom(fc, clusterResource, resToObtainByPartition, skippedAMContainerlist, skippedAMSize, selectedCandidates, totalPreemptionAllowed);
            }
            // Can try preempting AMContainers (still saving atmost
            // maxAMCapacityForThisQueue AMResource's) if more resources are
            // required to be preemptionCandidates from this Queue.
            Resource maxAMCapacityForThisQueue = Resources.multiply(Resources.multiply(clusterResource, leafQueue.getAbsoluteCapacity()), leafQueue.getMaxAMResourcePerQueuePercent());
            preemptAMContainers(clusterResource, selectedCandidates, skippedAMContainerlist, resToObtainByPartition, skippedAMSize, maxAMCapacityForThisQueue, totalPreemptionAllowed);
        } finally {
            leafQueue.getReadLock().unlock();
        }
    }
    return selectedCandidates;
}
Also used : ArrayList(java.util.ArrayList) Resource(org.apache.hadoop.yarn.api.records.Resource) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) LeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue) TreeSet(java.util.TreeSet) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)

Example 2 with LeafQueue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue in project hadoop by apache.

the class IntraQueueCandidatesSelector method computeIntraQueuePreemptionDemand.

private void computeIntraQueuePreemptionDemand(Resource clusterResource, Resource totalPreemptedResourceAllowed, Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates) {
    // 1. Iterate through all partition to calculate demand within a partition.
    for (String partition : context.getAllPartitions()) {
        LinkedHashSet<String> queueNames = context.getUnderServedQueuesPerPartition(partition);
        if (null == queueNames) {
            continue;
        }
        // 2. Its better to get partition based resource limit earlier before
        // starting calculation
        Resource partitionBasedResource = context.getPartitionResource(partition);
        // 3. loop through all queues corresponding to a partition.
        for (String queueName : queueNames) {
            TempQueuePerPartition tq = context.getQueueByPartition(queueName, partition);
            LeafQueue leafQueue = tq.leafQueue;
            // skip if its parent queue
            if (null == leafQueue) {
                continue;
            }
            // 4. Consider reassignableResource as (used - actuallyToBePreempted).
            // This provides as upper limit to split apps quota in a queue.
            Resource queueReassignableResource = Resources.subtract(tq.getUsed(), tq.getActuallyToBePreempted());
            // above certain limit to consider for intra queue preemption.
            if (leafQueue.getQueueCapacities().getUsedCapacity(partition) < context.getMinimumThresholdForIntraQueuePreemption()) {
                continue;
            }
            // 6. compute the allocation of all apps based on queue's unallocated
            // capacity
            fifoPreemptionComputePlugin.computeAppsIdealAllocation(clusterResource, partitionBasedResource, tq, selectedCandidates, totalPreemptedResourceAllowed, queueReassignableResource, context.getMaxAllowableLimitForIntraQueuePreemption());
        }
    }
}
Also used : Resource(org.apache.hadoop.yarn.api.records.Resource) LeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue)

Example 3 with LeafQueue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue in project hadoop by apache.

the class ProportionalCapacityPreemptionPolicyMockFramework method mockApplications.

/**
   * Format is:
   * <pre>
   * queueName\t  // app1
   * (priority,resource,host,expression,#repeat,reserved)
   * (priority,resource,host,expression,#repeat,reserved);
   * queueName\t  // app2
   * </pre>
   */
private void mockApplications(String appsConfig) {
    int id = 1;
    HashMap<String, HashSet<String>> userMap = new HashMap<String, HashSet<String>>();
    LeafQueue queue = null;
    for (String a : appsConfig.split(";")) {
        String[] strs = a.split("\t");
        String queueName = strs[0];
        // get containers
        List<RMContainer> liveContainers = new ArrayList<RMContainer>();
        List<RMContainer> reservedContainers = new ArrayList<RMContainer>();
        ApplicationId appId = ApplicationId.newInstance(0L, id);
        ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
        FiCaSchedulerApp app = mock(FiCaSchedulerApp.class);
        when(app.getAMResource(anyString())).thenReturn(Resources.createResource(0, 0));
        mockContainers(strs[1], app, appAttemptId, queueName, reservedContainers, liveContainers);
        LOG.debug("Application mock: queue: " + queueName + ", appId:" + appId);
        when(app.getLiveContainers()).thenReturn(liveContainers);
        when(app.getReservedContainers()).thenReturn(reservedContainers);
        when(app.getApplicationAttemptId()).thenReturn(appAttemptId);
        when(app.getApplicationId()).thenReturn(appId);
        // add to LeafQueue
        queue = (LeafQueue) nameToCSQueues.get(queueName);
        queue.getApplications().add(app);
        queue.getAllApplications().add(app);
        HashSet<String> users = userMap.get(queueName);
        if (null == users) {
            users = new HashSet<String>();
            userMap.put(queueName, users);
        }
        users.add(app.getUser());
        id++;
    }
    for (String queueName : userMap.keySet()) {
        queue = (LeafQueue) nameToCSQueues.get(queueName);
        // Currently we have user-limit test support only for default label.
        Resource totResoucePerPartition = partitionToResource.get("");
        Resource capacity = Resources.multiply(totResoucePerPartition, queue.getQueueCapacities().getAbsoluteCapacity());
        HashSet<String> users = userMap.get(queue.getQueueName());
        Resource userLimit = Resources.divideAndCeil(rc, capacity, users.size());
        for (String userName : users) {
            when(queue.getResourceLimitForAllUsers(eq(userName), any(Resource.class), anyString(), any(SchedulingMode.class))).thenReturn(userLimit);
        }
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Resource(org.apache.hadoop.yarn.api.records.Resource) Matchers.anyString(org.mockito.Matchers.anyString) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) LeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) SchedulingMode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) HashSet(java.util.HashSet)

Example 4 with LeafQueue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue in project hadoop by apache.

the class ProportionalCapacityPreemptionPolicyMockFramework method mockQueueHierarchy.

/**
   * Format is:
   * <pre>
   * root (<partition-name-1>=[guaranteed max used pending (reserved)],<partition-name-2>=..);
   * -A(...);
   * --A1(...);
   * --A2(...);
   * -B...
   * </pre>
   * ";" splits queues, and there should no empty lines, no extra spaces
   *
   * For each queue, it has configurations to specify capacities (to each
   * partition), format is:
   * <pre>
   * -<queueName> (<labelName1>=[guaranteed max used pending], \
   *               <labelName2>=[guaranteed max used pending])
   *              {key1=value1,key2=value2};  // Additional configs
   * </pre>
   */
@SuppressWarnings({ "unchecked", "rawtypes" })
private ParentQueue mockQueueHierarchy(String queueExprs) {
    String[] queueExprArray = queueExprs.split(";");
    ParentQueue rootQueue = null;
    for (int idx = 0; idx < queueExprArray.length; idx++) {
        String q = queueExprArray[idx];
        CSQueue queue;
        // Initialize queue
        if (isParent(queueExprArray, idx)) {
            ParentQueue parentQueue = mock(ParentQueue.class);
            queue = parentQueue;
            List<CSQueue> children = new ArrayList<CSQueue>();
            when(parentQueue.getChildQueues()).thenReturn(children);
            QueueOrderingPolicy policy = mock(QueueOrderingPolicy.class);
            when(policy.getConfigName()).thenReturn(CapacitySchedulerConfiguration.QUEUE_PRIORITY_UTILIZATION_ORDERING_POLICY);
            when(parentQueue.getQueueOrderingPolicy()).thenReturn(policy);
        } else {
            LeafQueue leafQueue = mock(LeafQueue.class);
            final TreeSet<FiCaSchedulerApp> apps = new TreeSet<>(new Comparator<FiCaSchedulerApp>() {

                @Override
                public int compare(FiCaSchedulerApp a1, FiCaSchedulerApp a2) {
                    if (a1.getPriority() != null && !a1.getPriority().equals(a2.getPriority())) {
                        return a1.getPriority().compareTo(a2.getPriority());
                    }
                    int res = a1.getApplicationId().compareTo(a2.getApplicationId());
                    return res;
                }
            });
            when(leafQueue.getApplications()).thenReturn(apps);
            when(leafQueue.getAllApplications()).thenReturn(apps);
            OrderingPolicy<FiCaSchedulerApp> so = mock(OrderingPolicy.class);
            when(so.getPreemptionIterator()).thenAnswer(new Answer() {

                public Object answer(InvocationOnMock invocation) {
                    return apps.descendingIterator();
                }
            });
            when(leafQueue.getOrderingPolicy()).thenReturn(so);
            Map<String, TreeSet<RMContainer>> ignorePartitionContainers = new HashMap<>();
            when(leafQueue.getIgnoreExclusivityRMContainers()).thenReturn(ignorePartitionContainers);
            queue = leafQueue;
        }
        ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
        when(queue.getReadLock()).thenReturn(lock.readLock());
        setupQueue(queue, q, queueExprArray, idx);
        if (queue.getQueueName().equals(ROOT)) {
            rootQueue = (ParentQueue) queue;
        }
    }
    return rootQueue;
}
Also used : ParentQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Matchers.anyString(org.mockito.Matchers.anyString) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) QueueOrderingPolicy(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.policy.QueueOrderingPolicy) LeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue) CSQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Answer(org.mockito.stubbing.Answer) TreeSet(java.util.TreeSet) InvocationOnMock(org.mockito.invocation.InvocationOnMock) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)

Example 5 with LeafQueue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue in project hadoop by apache.

the class TestProportionalCapacityPreemptionPolicy method mockLeafQueue.

@SuppressWarnings("rawtypes")
LeafQueue mockLeafQueue(ParentQueue p, Resource tot, int i, Resource[] abs, Resource[] used, Resource[] pending, Resource[] reserved, int[] apps, Resource[] gran) {
    LeafQueue lq = mock(LeafQueue.class);
    ResourceCalculator rc = mCS.getResourceCalculator();
    List<ApplicationAttemptId> appAttemptIdList = new ArrayList<ApplicationAttemptId>();
    when(lq.getTotalPendingResourcesConsideringUserLimit(isA(Resource.class), isA(String.class), eq(false))).thenReturn(pending[i]);
    when(lq.getTotalPendingResourcesConsideringUserLimit(isA(Resource.class), isA(String.class), eq(true))).thenReturn(Resources.componentwiseMax(Resources.subtract(pending[i], reserved[i] == null ? Resources.none() : reserved[i]), Resources.none()));
    // need to set pending resource in resource usage as well
    ResourceUsage ru = new ResourceUsage();
    ru.setPending(pending[i]);
    ru.setUsed(used[i]);
    ru.setReserved(reserved[i]);
    when(lq.getQueueResourceUsage()).thenReturn(ru);
    // consider moving where CapacityScheduler::comparator accessible
    final NavigableSet<FiCaSchedulerApp> qApps = new TreeSet<FiCaSchedulerApp>(new Comparator<FiCaSchedulerApp>() {

        @Override
        public int compare(FiCaSchedulerApp a1, FiCaSchedulerApp a2) {
            return a1.getApplicationAttemptId().compareTo(a2.getApplicationAttemptId());
        }
    });
    // applications are added in global L->R order in queues
    if (apps[i] != 0) {
        Resource aUsed = Resources.divideAndCeil(rc, used[i], apps[i]);
        Resource aPending = Resources.divideAndCeil(rc, pending[i], apps[i]);
        Resource aReserve = Resources.divideAndCeil(rc, reserved[i], apps[i]);
        for (int a = 0; a < apps[i]; ++a) {
            FiCaSchedulerApp mockFiCaApp = mockApp(i, appAlloc, aUsed, aPending, aReserve, gran[i]);
            qApps.add(mockFiCaApp);
            ++appAlloc;
            appAttemptIdList.add(mockFiCaApp.getApplicationAttemptId());
        }
        when(mCS.getAppsInQueue("queue" + (char) ('A' + i - 1))).thenReturn(appAttemptIdList);
    }
    when(lq.getApplications()).thenReturn(qApps);
    @SuppressWarnings("unchecked") OrderingPolicy<FiCaSchedulerApp> so = mock(OrderingPolicy.class);
    when(so.getPreemptionIterator()).thenAnswer(new Answer() {

        public Object answer(InvocationOnMock invocation) {
            return qApps.descendingIterator();
        }
    });
    when(lq.getOrderingPolicy()).thenReturn(so);
    if (setAMResourcePercent != 0.0f) {
        when(lq.getMaxAMResourcePerQueuePercent()).thenReturn(setAMResourcePercent);
    }
    ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
    when(lq.getReadLock()).thenReturn(lock.readLock());
    when(lq.getPriority()).thenReturn(Priority.newInstance(0));
    p.getChildQueues().add(lq);
    return lq;
}
Also used : ResourceUsage(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage) ArrayList(java.util.ArrayList) Resource(org.apache.hadoop.yarn.api.records.Resource) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) Matchers.anyString(org.mockito.Matchers.anyString) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) LeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue) DefaultResourceCalculator(org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator) DominantResourceCalculator(org.apache.hadoop.yarn.util.resource.DominantResourceCalculator) ResourceCalculator(org.apache.hadoop.yarn.util.resource.ResourceCalculator) Answer(org.mockito.stubbing.Answer) TreeSet(java.util.TreeSet) InvocationOnMock(org.mockito.invocation.InvocationOnMock) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)

Aggregations

LeafQueue (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue)17 Resource (org.apache.hadoop.yarn.api.records.Resource)9 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)7 ArrayList (java.util.ArrayList)5 Matchers.anyString (org.mockito.Matchers.anyString)5 TreeSet (java.util.TreeSet)4 HashMap (java.util.HashMap)3 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)3 ResourceUsage (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage)3 CSQueue (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue)3 ParentQueue (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue)3 DominantResourceCalculator (org.apache.hadoop.yarn.util.resource.DominantResourceCalculator)3 Test (org.junit.Test)3 InvocationOnMock (org.mockito.invocation.InvocationOnMock)3 ReentrantReadWriteLock (java.util.concurrent.locks.ReentrantReadWriteLock)2 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)2 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)2 QueueMetrics (org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics)2 CapacityScheduler (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler)2 CapacitySchedulerConfiguration (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration)2