use of org.apache.hadoop.yarn.api.records.Resource in project hadoop by apache.
the class TempQueuePerPartition method updatePreemptableExtras.
public void updatePreemptableExtras(ResourceCalculator rc) {
// Reset untouchableExtra and preemptableExtra
untouchableExtra = Resources.none();
preemptableExtra = Resources.none();
Resource extra = Resources.subtract(getUsed(), getGuaranteed());
if (Resources.lessThan(rc, totalPartitionResource, extra, Resources.none())) {
extra = Resources.none();
}
if (null == children || children.isEmpty()) {
// If it is a leaf queue
if (preemptionDisabled) {
untouchableExtra = extra;
} else {
preemptableExtra = extra;
}
} else {
// If it is a parent queue
Resource childrensPreemptable = Resource.newInstance(0, 0);
for (TempQueuePerPartition child : children) {
Resources.addTo(childrensPreemptable, child.preemptableExtra);
}
// untouchableExtra = max(extra - childrenPreemptable, 0)
if (Resources.greaterThanOrEqual(rc, totalPartitionResource, childrensPreemptable, extra)) {
untouchableExtra = Resource.newInstance(0, 0);
} else {
untouchableExtra = Resources.subtract(extra, childrensPreemptable);
}
preemptableExtra = Resources.min(rc, totalPartitionResource, childrensPreemptable, extra);
}
}
use of org.apache.hadoop.yarn.api.records.Resource in project hadoop by apache.
the class AbstractPreemptableResourceCalculator method computeFixpointAllocation.
/**
* Given a set of queues compute the fix-point distribution of unassigned
* resources among them. As pending request of a queue are exhausted, the
* queue is removed from the set and remaining capacity redistributed among
* remaining queues. The distribution is weighted based on guaranteed
* capacity, unless asked to ignoreGuarantee, in which case resources are
* distributed uniformly.
*
* @param totGuarant
* total guaranteed resource
* @param qAlloc
* List of child queues
* @param unassigned
* Unassigned resource per queue
* @param ignoreGuarantee
* ignore guarantee per queue.
*/
protected void computeFixpointAllocation(Resource totGuarant, Collection<TempQueuePerPartition> qAlloc, Resource unassigned, boolean ignoreGuarantee) {
// Prior to assigning the unused resources, process each queue as follows:
// If current > guaranteed, idealAssigned = guaranteed + untouchable extra
// Else idealAssigned = current;
// Subtract idealAssigned resources from unassigned.
// If the queue has all of its needs met (that is, if
// idealAssigned >= current + pending), remove it from consideration.
// Sort queues from most under-guaranteed to most over-guaranteed.
TQComparator tqComparator = new TQComparator(rc, totGuarant);
PriorityQueue<TempQueuePerPartition> orderedByNeed = new PriorityQueue<>(10, tqComparator);
for (Iterator<TempQueuePerPartition> i = qAlloc.iterator(); i.hasNext(); ) {
TempQueuePerPartition q = i.next();
Resource used = q.getUsed();
if (Resources.greaterThan(rc, totGuarant, used, q.getGuaranteed())) {
q.idealAssigned = Resources.add(q.getGuaranteed(), q.untouchableExtra);
} else {
q.idealAssigned = Resources.clone(used);
}
Resources.subtractFrom(unassigned, q.idealAssigned);
// If idealAssigned < (allocated + used + pending), q needs more
// resources, so
// add it to the list of underserved queues, ordered by need.
Resource curPlusPend = Resources.add(q.getUsed(), q.pending);
if (Resources.lessThan(rc, totGuarant, q.idealAssigned, curPlusPend)) {
orderedByNeed.add(q);
}
}
// left
while (!orderedByNeed.isEmpty() && Resources.greaterThan(rc, totGuarant, unassigned, Resources.none())) {
Resource wQassigned = Resource.newInstance(0, 0);
// we compute normalizedGuarantees capacity based on currently active
// queues
resetCapacity(unassigned, orderedByNeed, ignoreGuarantee);
// For each underserved queue (or set of queues if multiple are equally
// underserved), offer its share of the unassigned resources based on its
// normalized guarantee. After the offer, if the queue is not satisfied,
// place it back in the ordered list of queues, recalculating its place
// in the order of most under-guaranteed to most over-guaranteed. In this
// way, the most underserved queue(s) are always given resources first.
Collection<TempQueuePerPartition> underserved = getMostUnderservedQueues(orderedByNeed, tqComparator);
for (Iterator<TempQueuePerPartition> i = underserved.iterator(); i.hasNext(); ) {
TempQueuePerPartition sub = i.next();
Resource wQavail = Resources.multiplyAndNormalizeUp(rc, unassigned, sub.normalizedGuarantee, Resource.newInstance(1, 1));
Resource wQidle = sub.offer(wQavail, rc, totGuarant, isReservedPreemptionCandidatesSelector);
Resource wQdone = Resources.subtract(wQavail, wQidle);
if (Resources.greaterThan(rc, totGuarant, wQdone, Resources.none())) {
// The queue is still asking for more. Put it back in the priority
// queue, recalculating its order based on need.
orderedByNeed.add(sub);
}
Resources.addTo(wQassigned, wQdone);
}
Resources.subtractFrom(unassigned, wQassigned);
}
// all queues are added to list.
while (!orderedByNeed.isEmpty()) {
TempQueuePerPartition q1 = orderedByNeed.remove();
context.addPartitionToUnderServedQueues(q1.queueName, q1.partition);
}
}
use of org.apache.hadoop.yarn.api.records.Resource in project hadoop by apache.
the class CapacitySchedulerPreemptionUtils method tryPreemptContainerAndDeductResToObtain.
/**
* Invoke this method to preempt container based on resToObtain.
*
* @param rc
* resource calculator
* @param context
* preemption context
* @param resourceToObtainByPartitions
* map to hold resource to obtain per partition
* @param rmContainer
* container
* @param clusterResource
* total resource
* @param preemptMap
* map to hold preempted containers
* @param totalPreemptionAllowed
* total preemption allowed per round
* @return should we preempt rmContainer. If we should, deduct from
* <code>resourceToObtainByPartition</code>
*/
public static boolean tryPreemptContainerAndDeductResToObtain(ResourceCalculator rc, CapacitySchedulerPreemptionContext context, Map<String, Resource> resourceToObtainByPartitions, RMContainer rmContainer, Resource clusterResource, Map<ApplicationAttemptId, Set<RMContainer>> preemptMap, Resource totalPreemptionAllowed) {
ApplicationAttemptId attemptId = rmContainer.getApplicationAttemptId();
// We will not account resource of a container twice or more
if (preemptMapContains(preemptMap, attemptId, rmContainer)) {
return false;
}
String nodePartition = getPartitionByNodeId(context, rmContainer.getAllocatedNode());
Resource toObtainByPartition = resourceToObtainByPartitions.get(nodePartition);
if (null != toObtainByPartition && Resources.greaterThan(rc, clusterResource, toObtainByPartition, Resources.none()) && Resources.fitsIn(rc, clusterResource, rmContainer.getAllocatedResource(), totalPreemptionAllowed)) {
Resources.subtractFrom(toObtainByPartition, rmContainer.getAllocatedResource());
Resources.subtractFrom(totalPreemptionAllowed, rmContainer.getAllocatedResource());
// When we have no more resource need to obtain, remove from map.
if (Resources.lessThanOrEqual(rc, clusterResource, toObtainByPartition, Resources.none())) {
resourceToObtainByPartitions.remove(nodePartition);
}
// Add to preemptMap
addToPreemptMap(preemptMap, attemptId, rmContainer);
return true;
}
return false;
}
use of org.apache.hadoop.yarn.api.records.Resource in project hadoop by apache.
the class FifoCandidatesSelector method selectCandidates.
@Override
public Map<ApplicationAttemptId, Set<RMContainer>> selectCandidates(Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates, Resource clusterResource, Resource totalPreemptionAllowed) {
// Calculate how much resources we need to preempt
preemptableAmountCalculator.computeIdealAllocation(clusterResource, totalPreemptionAllowed);
// Previous selectors (with higher priority) could have already
// selected containers. We need to deduct preemptable resources
// based on already selected candidates.
CapacitySchedulerPreemptionUtils.deductPreemptableResourcesBasedSelectedCandidates(preemptionContext, selectedCandidates);
List<RMContainer> skippedAMContainerlist = new ArrayList<>();
// Loop all leaf queues
for (String queueName : preemptionContext.getLeafQueueNames()) {
// check if preemption disabled for the queue
if (preemptionContext.getQueueByPartition(queueName, RMNodeLabelsManager.NO_LABEL).preemptionDisabled) {
if (LOG.isDebugEnabled()) {
LOG.debug("skipping from queue=" + queueName + " because it's a non-preemptable queue");
}
continue;
}
// compute resToObtainByPartition considered inter-queue preemption
LeafQueue leafQueue = preemptionContext.getQueueByPartition(queueName, RMNodeLabelsManager.NO_LABEL).leafQueue;
Map<String, Resource> resToObtainByPartition = CapacitySchedulerPreemptionUtils.getResToObtainByPartitionForLeafQueue(preemptionContext, queueName, clusterResource);
try {
leafQueue.getReadLock().lock();
// go through all ignore-partition-exclusivity containers first to make
// sure such containers will be preemptionCandidates first
Map<String, TreeSet<RMContainer>> ignorePartitionExclusivityContainers = leafQueue.getIgnoreExclusivityRMContainers();
for (String partition : resToObtainByPartition.keySet()) {
if (ignorePartitionExclusivityContainers.containsKey(partition)) {
TreeSet<RMContainer> rmContainers = ignorePartitionExclusivityContainers.get(partition);
// application's containers will be preemptionCandidates first.
for (RMContainer c : rmContainers.descendingSet()) {
if (CapacitySchedulerPreemptionUtils.isContainerAlreadySelected(c, selectedCandidates)) {
// Skip already selected containers
continue;
}
boolean preempted = CapacitySchedulerPreemptionUtils.tryPreemptContainerAndDeductResToObtain(rc, preemptionContext, resToObtainByPartition, c, clusterResource, selectedCandidates, totalPreemptionAllowed);
if (!preempted) {
continue;
}
}
}
}
// preempt other containers
Resource skippedAMSize = Resource.newInstance(0, 0);
Iterator<FiCaSchedulerApp> desc = leafQueue.getOrderingPolicy().getPreemptionIterator();
while (desc.hasNext()) {
FiCaSchedulerApp fc = desc.next();
// more preemption is needed
if (resToObtainByPartition.isEmpty()) {
break;
}
preemptFrom(fc, clusterResource, resToObtainByPartition, skippedAMContainerlist, skippedAMSize, selectedCandidates, totalPreemptionAllowed);
}
// Can try preempting AMContainers (still saving atmost
// maxAMCapacityForThisQueue AMResource's) if more resources are
// required to be preemptionCandidates from this Queue.
Resource maxAMCapacityForThisQueue = Resources.multiply(Resources.multiply(clusterResource, leafQueue.getAbsoluteCapacity()), leafQueue.getMaxAMResourcePerQueuePercent());
preemptAMContainers(clusterResource, selectedCandidates, skippedAMContainerlist, resToObtainByPartition, skippedAMSize, maxAMCapacityForThisQueue, totalPreemptionAllowed);
} finally {
leafQueue.getReadLock().unlock();
}
}
return selectedCandidates;
}
use of org.apache.hadoop.yarn.api.records.Resource in project hadoop by apache.
the class FifoIntraQueuePreemptionPlugin method createTempAppForResCalculation.
private PriorityQueue<TempAppPerPartition> createTempAppForResCalculation(String partition, Collection<FiCaSchedulerApp> apps, TAPriorityComparator taComparator) {
PriorityQueue<TempAppPerPartition> orderedByPriority = new PriorityQueue<>(100, taComparator);
// have an internal temp app structure to store intermediate data(priority)
for (FiCaSchedulerApp app : apps) {
Resource used = app.getAppAttemptResourceUsage().getUsed(partition);
Resource amUsed = null;
if (!app.isWaitingForAMContainer()) {
amUsed = app.getAMResource(partition);
}
Resource pending = app.getTotalPendingRequestsPerPartition().get(partition);
Resource reserved = app.getAppAttemptResourceUsage().getReserved(partition);
used = (used == null) ? Resources.createResource(0, 0) : used;
amUsed = (amUsed == null) ? Resources.createResource(0, 0) : amUsed;
pending = (pending == null) ? Resources.createResource(0, 0) : pending;
reserved = (reserved == null) ? Resources.createResource(0, 0) : reserved;
HashSet<String> partitions = new HashSet<String>(app.getAppAttemptResourceUsage().getNodePartitionsSet());
partitions.addAll(app.getTotalPendingRequestsPerPartition().keySet());
// Create TempAppPerQueue for further calculation.
TempAppPerPartition tmpApp = new TempAppPerPartition(app, Resources.clone(used), Resources.clone(amUsed), Resources.clone(reserved), Resources.clone(pending));
// Set ideal allocation of app as 0.
tmpApp.idealAssigned = Resources.createResource(0, 0);
orderedByPriority.add(tmpApp);
}
return orderedByPriority;
}
Aggregations