use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class QueuePriorityContainerCandidateSelector method tryToMakeBetterReservationPlacement.
private void tryToMakeBetterReservationPlacement(RMContainer reservedContainer, List<FiCaSchedulerNode> allSchedulerNodes) {
for (FiCaSchedulerNode targetNode : allSchedulerNodes) {
// Precheck if we can move the rmContainer to the new targetNode
if (!preChecksForMovingReservedContainerToNode(reservedContainer, targetNode)) {
continue;
}
if (canPreemptEnoughResourceForAsked(reservedContainer.getReservedResource(), reservedContainer.getQueueName(), targetNode, true, null)) {
NodeId fromNode = reservedContainer.getNodeId();
// scheduler
if (preemptionContext.getScheduler().moveReservedContainer(reservedContainer, targetNode)) {
LOG.info("Successfully moved reserved container=" + reservedContainer.getContainerId() + " from targetNode=" + fromNode + " to targetNode=" + targetNode.getNodeID());
touchedNodes.add(targetNode.getNodeID());
}
}
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class QueuePriorityContainerCandidateSelector method selectCandidates.
@Override
public Map<ApplicationAttemptId, Set<RMContainer>> selectCandidates(Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates, Resource clusterResource, Resource totalPreemptedResourceAllowed) {
// Initialize digraph from queues
// TODO (wangda): only do this when queue refreshed.
priorityDigraph.clear();
intializePriorityDigraph();
// direct return.
if (priorityDigraph.isEmpty()) {
return selectedCandidates;
}
// Save parameters to be shared by other methods
this.selectedCandidates = selectedCandidates;
this.clusterResource = clusterResource;
this.totalPreemptionAllowed = totalPreemptedResourceAllowed;
toPreemptedFromOtherQueues.clear();
reservedContainers = new ArrayList<>();
// Clear temp-scheduler-node-map every time when doing selection of
// containers.
tempSchedulerNodeMap.clear();
touchedNodes = new HashSet<>();
// Add all reserved containers for analysis
List<FiCaSchedulerNode> allSchedulerNodes = preemptionContext.getScheduler().getAllNodes();
for (FiCaSchedulerNode node : allSchedulerNodes) {
RMContainer reservedContainer = node.getReservedContainer();
if (null != reservedContainer) {
// container belongs to has high priority than at least one queue
if (priorityDigraph.containsRow(reservedContainer.getQueueName())) {
reservedContainers.add(reservedContainer);
}
}
}
// Sort reserved container by creation time
Collections.sort(reservedContainers, CONTAINER_CREATION_TIME_COMPARATOR);
long currentTime = System.currentTimeMillis();
// From the begining of the list
for (RMContainer reservedContainer : reservedContainers) {
// and cannot be allocated after minTimeout
if (currentTime - reservedContainer.getCreationTime() < minTimeout) {
continue;
}
FiCaSchedulerNode node = preemptionContext.getScheduler().getNode(reservedContainer.getReservedNode());
if (null == node) {
// Something is wrong, ignore
continue;
}
List<RMContainer> newlySelectedToBePreemptContainers = new ArrayList<>();
// Check if we can preempt for this queue
// We will skip if the demanding queue is already satisfied.
String demandingQueueName = reservedContainer.getQueueName();
boolean demandingQueueSatisfied = isQueueSatisfied(demandingQueueName, node.getPartition());
// We will continue check if it is possible to preempt reserved container
// from the node.
boolean canPreempt = false;
if (!demandingQueueSatisfied) {
canPreempt = canPreemptEnoughResourceForAsked(reservedContainer.getReservedResource(), demandingQueueName, node, false, newlySelectedToBePreemptContainers);
}
// preemption others
if (canPreempt) {
touchedNodes.add(node.getNodeID());
if (LOG.isDebugEnabled()) {
LOG.debug("Trying to preempt following containers to make reserved " + "container=" + reservedContainer.getContainerId() + " on node=" + node.getNodeID() + " can be allocated:");
}
// Update to-be-preempt
incToPreempt(demandingQueueName, node.getPartition(), reservedContainer.getReservedResource());
for (RMContainer c : newlySelectedToBePreemptContainers) {
if (LOG.isDebugEnabled()) {
LOG.debug(" --container=" + c.getContainerId() + " resource=" + c.getReservedResource());
}
Set<RMContainer> containers = selectedCandidates.get(c.getApplicationAttemptId());
if (null == containers) {
containers = new HashSet<>();
selectedCandidates.put(c.getApplicationAttemptId(), containers);
}
containers.add(c);
// Update totalPreemptionResourceAllowed
Resources.subtractFrom(totalPreemptedResourceAllowed, c.getAllocatedResource());
}
} else if (!demandingQueueSatisfied) {
//
if (allowMoveReservation) {
tryToMakeBetterReservationPlacement(reservedContainer, allSchedulerNodes);
}
}
}
return selectedCandidates;
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class ReservedContainerCandidatesSelector method getNodesForPreemption.
private List<NodeForPreemption> getNodesForPreemption(Resource cluster, Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition, Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates, Resource totalPreemptionAllowed) {
List<NodeForPreemption> nfps = new ArrayList<>();
// get nodes have reserved container
for (FiCaSchedulerNode node : preemptionContext.getScheduler().getAllNodes()) {
if (node.getReservedContainer() != null) {
NodeForPreemption nfp = getPreemptionCandidatesOnNode(node, cluster, queueToPreemptableResourceByPartition, selectedCandidates, totalPreemptionAllowed, true);
if (null != nfp) {
// Null means we cannot preempt containers on the node to satisfy
// reserved container
nfps.add(nfp);
}
}
}
// Return sorted node-for-preemptions (by cost)
Collections.sort(nfps, new Comparator<NodeForPreemption>() {
@Override
public int compare(NodeForPreemption o1, NodeForPreemption o2) {
return Float.compare(o1.preemptionCost, o2.preemptionCost);
}
});
return nfps;
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class AbstractCSQueue method accept.
@Override
public boolean accept(Resource cluster, ResourceCommitRequest<FiCaSchedulerApp, FiCaSchedulerNode> request) {
// Do we need to check parent queue before making this decision?
boolean checkParentQueue = false;
ContainerAllocationProposal<FiCaSchedulerApp, FiCaSchedulerNode> allocation = request.getFirstAllocatedOrReservedContainer();
SchedulerContainer<FiCaSchedulerApp, FiCaSchedulerNode> schedulerContainer = allocation.getAllocatedOrReservedContainer();
// Do not check when allocating new container from a reserved container
if (allocation.getAllocateFromReservedContainer() == null) {
Resource required = allocation.getAllocatedOrReservedResource();
Resource netAllocated = Resources.subtract(required, request.getTotalReleasedResource());
try {
readLock.lock();
String partition = schedulerContainer.getNodePartition();
Resource maxResourceLimit;
if (allocation.getSchedulingMode() == SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY) {
maxResourceLimit = getQueueMaxResource(partition, cluster);
} else {
maxResourceLimit = labelManager.getResourceByLabel(schedulerContainer.getNodePartition(), cluster);
}
if (!Resources.fitsIn(resourceCalculator, cluster, Resources.add(queueUsage.getUsed(partition), netAllocated), maxResourceLimit)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Used resource=" + queueUsage.getUsed(partition) + " exceeded maxResourceLimit of the queue =" + maxResourceLimit);
}
return false;
}
} finally {
readLock.unlock();
}
// Only check parent queue when something new allocated or reserved.
checkParentQueue = true;
}
if (parent != null && checkParentQueue) {
return parent.accept(cluster, request);
}
return true;
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class ParentQueue method killContainersToEnforceMaxQueueCapacity.
private void killContainersToEnforceMaxQueueCapacity(String partition, Resource clusterResource) {
Iterator<RMContainer> killableContainerIter = getKillableContainers(partition);
if (!killableContainerIter.hasNext()) {
return;
}
Resource partitionResource = labelManager.getResourceByLabel(partition, null);
Resource maxResource = Resources.multiply(partitionResource, getQueueCapacities().getAbsoluteMaximumCapacity(partition));
while (Resources.greaterThan(resourceCalculator, partitionResource, queueUsage.getUsed(partition), maxResource)) {
RMContainer toKillContainer = killableContainerIter.next();
FiCaSchedulerApp attempt = csContext.getApplicationAttempt(toKillContainer.getContainerId().getApplicationAttemptId());
FiCaSchedulerNode node = csContext.getNode(toKillContainer.getAllocatedNode());
if (null != attempt && null != node) {
LeafQueue lq = attempt.getCSLeafQueue();
lq.completedContainer(clusterResource, attempt, node, toKillContainer, SchedulerUtils.createPreemptedContainerStatus(toKillContainer.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER), RMContainerEventType.KILL, null, false);
LOG.info("Killed container=" + toKillContainer.getContainerId() + " from queue=" + lq.getQueueName() + " to make queue=" + this.getQueueName() + "'s max-capacity enforced");
}
if (!killableContainerIter.hasNext()) {
break;
}
}
}
Aggregations