use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class SLSCapacityScheduler method updateQueueWithNodeUpdate.
private void updateQueueWithNodeUpdate(NodeUpdateSchedulerEventWrapper eventWrapper) {
RMNodeWrapper node = (RMNodeWrapper) eventWrapper.getRMNode();
List<UpdatedContainerInfo> containerList = node.getContainerUpdates();
for (UpdatedContainerInfo info : containerList) {
for (ContainerStatus status : info.getCompletedContainers()) {
ContainerId containerId = status.getContainerId();
SchedulerAppReport app = super.getSchedulerAppInfo(containerId.getApplicationAttemptId());
if (app == null) {
// information.
continue;
}
String queue = appQueueMap.get(containerId.getApplicationAttemptId());
int releasedMemory = 0, releasedVCores = 0;
if (status.getExitStatus() == ContainerExitStatus.SUCCESS) {
for (RMContainer rmc : app.getLiveContainers()) {
if (rmc.getContainerId() == containerId) {
releasedMemory += rmc.getContainer().getResource().getMemorySize();
releasedVCores += rmc.getContainer().getResource().getVirtualCores();
break;
}
}
} else if (status.getExitStatus() == ContainerExitStatus.ABORTED) {
if (preemptionContainerMap.containsKey(containerId)) {
Resource preResource = preemptionContainerMap.get(containerId);
releasedMemory += preResource.getMemorySize();
releasedVCores += preResource.getVirtualCores();
preemptionContainerMap.remove(containerId);
}
}
// update queue counters
updateQueueMetrics(queue, releasedMemory, releasedVCores);
}
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class ResourceSchedulerWrapper method handle.
@Override
public void handle(SchedulerEvent schedulerEvent) {
// metrics off
if (!metricsON) {
scheduler.handle(schedulerEvent);
return;
}
if (!running)
running = true;
// metrics on
Timer.Context handlerTimer = null;
Timer.Context operationTimer = null;
NodeUpdateSchedulerEventWrapper eventWrapper;
try {
//if (schedulerEvent instanceof NodeUpdateSchedulerEvent) {
if (schedulerEvent.getType() == SchedulerEventType.NODE_UPDATE && schedulerEvent instanceof NodeUpdateSchedulerEvent) {
eventWrapper = new NodeUpdateSchedulerEventWrapper((NodeUpdateSchedulerEvent) schedulerEvent);
schedulerEvent = eventWrapper;
updateQueueWithNodeUpdate(eventWrapper);
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED && schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) {
// check if having AM Container, update resource usage information
AppAttemptRemovedSchedulerEvent appRemoveEvent = (AppAttemptRemovedSchedulerEvent) schedulerEvent;
ApplicationAttemptId appAttemptId = appRemoveEvent.getApplicationAttemptID();
String queue = appQueueMap.get(appAttemptId.getApplicationId());
SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
if (!app.getLiveContainers().isEmpty()) {
// have 0 or 1
// should have one container which is AM container
RMContainer rmc = app.getLiveContainers().iterator().next();
updateQueueMetrics(queue, rmc.getContainer().getResource().getMemorySize(), rmc.getContainer().getResource().getVirtualCores());
}
}
handlerTimer = schedulerHandleTimer.time();
operationTimer = schedulerHandleTimerMap.get(schedulerEvent.getType()).time();
scheduler.handle(schedulerEvent);
} finally {
if (handlerTimer != null)
handlerTimer.stop();
if (operationTimer != null)
operationTimer.stop();
schedulerHandleCounter.inc();
schedulerHandleCounterMap.get(schedulerEvent.getType()).inc();
if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED && schedulerEvent instanceof AppRemovedSchedulerEvent) {
SLSRunner.decreaseRemainingApps();
AppRemovedSchedulerEvent appRemoveEvent = (AppRemovedSchedulerEvent) schedulerEvent;
appQueueMap.remove(appRemoveEvent.getApplicationID());
} else if (schedulerEvent.getType() == SchedulerEventType.APP_ADDED && schedulerEvent instanceof AppAddedSchedulerEvent) {
AppAddedSchedulerEvent appAddEvent = (AppAddedSchedulerEvent) schedulerEvent;
String queueName = appAddEvent.getQueue();
appQueueMap.put(appAddEvent.getApplicationId(), queueName);
}
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class QueuePriorityContainerCandidateSelector method selectCandidates.
@Override
public Map<ApplicationAttemptId, Set<RMContainer>> selectCandidates(Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates, Resource clusterResource, Resource totalPreemptedResourceAllowed) {
// Initialize digraph from queues
// TODO (wangda): only do this when queue refreshed.
priorityDigraph.clear();
intializePriorityDigraph();
// direct return.
if (priorityDigraph.isEmpty()) {
return selectedCandidates;
}
// Save parameters to be shared by other methods
this.selectedCandidates = selectedCandidates;
this.clusterResource = clusterResource;
this.totalPreemptionAllowed = totalPreemptedResourceAllowed;
toPreemptedFromOtherQueues.clear();
reservedContainers = new ArrayList<>();
// Clear temp-scheduler-node-map every time when doing selection of
// containers.
tempSchedulerNodeMap.clear();
touchedNodes = new HashSet<>();
// Add all reserved containers for analysis
List<FiCaSchedulerNode> allSchedulerNodes = preemptionContext.getScheduler().getAllNodes();
for (FiCaSchedulerNode node : allSchedulerNodes) {
RMContainer reservedContainer = node.getReservedContainer();
if (null != reservedContainer) {
// container belongs to has high priority than at least one queue
if (priorityDigraph.containsRow(reservedContainer.getQueueName())) {
reservedContainers.add(reservedContainer);
}
}
}
// Sort reserved container by creation time
Collections.sort(reservedContainers, CONTAINER_CREATION_TIME_COMPARATOR);
long currentTime = System.currentTimeMillis();
// From the begining of the list
for (RMContainer reservedContainer : reservedContainers) {
// and cannot be allocated after minTimeout
if (currentTime - reservedContainer.getCreationTime() < minTimeout) {
continue;
}
FiCaSchedulerNode node = preemptionContext.getScheduler().getNode(reservedContainer.getReservedNode());
if (null == node) {
// Something is wrong, ignore
continue;
}
List<RMContainer> newlySelectedToBePreemptContainers = new ArrayList<>();
// Check if we can preempt for this queue
// We will skip if the demanding queue is already satisfied.
String demandingQueueName = reservedContainer.getQueueName();
boolean demandingQueueSatisfied = isQueueSatisfied(demandingQueueName, node.getPartition());
// We will continue check if it is possible to preempt reserved container
// from the node.
boolean canPreempt = false;
if (!demandingQueueSatisfied) {
canPreempt = canPreemptEnoughResourceForAsked(reservedContainer.getReservedResource(), demandingQueueName, node, false, newlySelectedToBePreemptContainers);
}
// preemption others
if (canPreempt) {
touchedNodes.add(node.getNodeID());
if (LOG.isDebugEnabled()) {
LOG.debug("Trying to preempt following containers to make reserved " + "container=" + reservedContainer.getContainerId() + " on node=" + node.getNodeID() + " can be allocated:");
}
// Update to-be-preempt
incToPreempt(demandingQueueName, node.getPartition(), reservedContainer.getReservedResource());
for (RMContainer c : newlySelectedToBePreemptContainers) {
if (LOG.isDebugEnabled()) {
LOG.debug(" --container=" + c.getContainerId() + " resource=" + c.getReservedResource());
}
Set<RMContainer> containers = selectedCandidates.get(c.getApplicationAttemptId());
if (null == containers) {
containers = new HashSet<>();
selectedCandidates.put(c.getApplicationAttemptId(), containers);
}
containers.add(c);
// Update totalPreemptionResourceAllowed
Resources.subtractFrom(totalPreemptedResourceAllowed, c.getAllocatedResource());
}
} else if (!demandingQueueSatisfied) {
//
if (allowMoveReservation) {
tryToMakeBetterReservationPlacement(reservedContainer, allSchedulerNodes);
}
}
}
return selectedCandidates;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class ReservedContainerCandidatesSelector method getPreemptionCandidatesOnNode.
/**
* Try to check if we can preempt resources for reserved container in given node
* @param node
* @param cluster
* @param queueToPreemptableResourceByPartition it's a map of
* <queueName, <partition, preemptable-resource>>
* @param readOnly do we want to modify preemptable resource after we selected
* candidates
* @return NodeForPreemption if it's possible to preempt containers on the node
* to satisfy reserved resource
*/
private NodeForPreemption getPreemptionCandidatesOnNode(FiCaSchedulerNode node, Resource cluster, Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition, Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates, Resource totalPreemptionAllowed, boolean readOnly) {
RMContainer reservedContainer = node.getReservedContainer();
Resource available = Resources.clone(node.getUnallocatedResource());
Resource totalSelected = Resources.createResource(0);
List<RMContainer> sortedRunningContainers = node.getCopiedListOfRunningContainers();
List<RMContainer> selectedContainers = new ArrayList<>();
Map<ContainerId, RMContainer> killableContainers = node.getKillableContainers();
// Sort running container by launch time, we preferred to preempt recent
// launched preempt container
Collections.sort(sortedRunningContainers, new Comparator<RMContainer>() {
@Override
public int compare(RMContainer o1, RMContainer o2) {
return -1 * o1.getContainerId().compareTo(o2.getContainerId());
}
});
// First check: can we preempt containers to allocate the
// reservedContainer?
boolean canAllocateReservedContainer = false;
// At least, we can get available + killable resources from this node
Resource cur = Resources.add(available, node.getTotalKillableResources());
String partition = node.getPartition();
// Avoid preempt any container if required <= available + killable
if (Resources.fitsIn(rc, cluster, reservedContainer.getReservedResource(), cur)) {
return null;
}
// Extra cost of am container preemption
float amPreemptionCost = 0f;
for (RMContainer c : sortedRunningContainers) {
String containerQueueName = c.getQueueName();
// Skip container if it is already marked killable
if (killableContainers.containsKey(c.getContainerId())) {
continue;
}
// selected. Here for safety, avoid preempt AM container in any cases
if (c.isAMContainer()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skip selecting AM container on host=" + node.getNodeID() + " AM container=" + c.getContainerId());
}
continue;
}
// Can we preempt container c?
// Check if we have quota to preempt this container
boolean canPreempt = tryToPreemptFromQueue(cluster, containerQueueName, partition, queueToPreemptableResourceByPartition, c.getAllocatedResource(), totalPreemptionAllowed, readOnly);
// If we can, add to selected container, and change resource accordingly.
if (canPreempt) {
if (!CapacitySchedulerPreemptionUtils.isContainerAlreadySelected(c, selectedCandidates)) {
if (!readOnly) {
selectedContainers.add(c);
}
Resources.addTo(totalSelected, c.getAllocatedResource());
}
Resources.addTo(cur, c.getAllocatedResource());
if (Resources.fitsIn(rc, cluster, reservedContainer.getReservedResource(), cur)) {
canAllocateReservedContainer = true;
break;
}
}
}
if (!canAllocateReservedContainer) {
if (!readOnly) {
// Revert queue preemption quotas
for (RMContainer c : selectedContainers) {
Resource res = getPreemptableResource(c.getQueueName(), partition, queueToPreemptableResourceByPartition);
if (null == res) {
// Ignore such failures.
continue;
}
Resources.addTo(res, c.getAllocatedResource());
}
}
return null;
}
float ratio = Resources.ratio(rc, totalSelected, reservedContainer.getReservedResource());
// Compute preemption score
NodeForPreemption nfp = new NodeForPreemption(ratio + amPreemptionCost, node, selectedContainers);
return nfp;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class FifoCandidatesSelector method selectCandidates.
@Override
public Map<ApplicationAttemptId, Set<RMContainer>> selectCandidates(Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates, Resource clusterResource, Resource totalPreemptionAllowed) {
// Calculate how much resources we need to preempt
preemptableAmountCalculator.computeIdealAllocation(clusterResource, totalPreemptionAllowed);
// Previous selectors (with higher priority) could have already
// selected containers. We need to deduct preemptable resources
// based on already selected candidates.
CapacitySchedulerPreemptionUtils.deductPreemptableResourcesBasedSelectedCandidates(preemptionContext, selectedCandidates);
List<RMContainer> skippedAMContainerlist = new ArrayList<>();
// Loop all leaf queues
for (String queueName : preemptionContext.getLeafQueueNames()) {
// check if preemption disabled for the queue
if (preemptionContext.getQueueByPartition(queueName, RMNodeLabelsManager.NO_LABEL).preemptionDisabled) {
if (LOG.isDebugEnabled()) {
LOG.debug("skipping from queue=" + queueName + " because it's a non-preemptable queue");
}
continue;
}
// compute resToObtainByPartition considered inter-queue preemption
LeafQueue leafQueue = preemptionContext.getQueueByPartition(queueName, RMNodeLabelsManager.NO_LABEL).leafQueue;
Map<String, Resource> resToObtainByPartition = CapacitySchedulerPreemptionUtils.getResToObtainByPartitionForLeafQueue(preemptionContext, queueName, clusterResource);
try {
leafQueue.getReadLock().lock();
// go through all ignore-partition-exclusivity containers first to make
// sure such containers will be preemptionCandidates first
Map<String, TreeSet<RMContainer>> ignorePartitionExclusivityContainers = leafQueue.getIgnoreExclusivityRMContainers();
for (String partition : resToObtainByPartition.keySet()) {
if (ignorePartitionExclusivityContainers.containsKey(partition)) {
TreeSet<RMContainer> rmContainers = ignorePartitionExclusivityContainers.get(partition);
// application's containers will be preemptionCandidates first.
for (RMContainer c : rmContainers.descendingSet()) {
if (CapacitySchedulerPreemptionUtils.isContainerAlreadySelected(c, selectedCandidates)) {
// Skip already selected containers
continue;
}
boolean preempted = CapacitySchedulerPreemptionUtils.tryPreemptContainerAndDeductResToObtain(rc, preemptionContext, resToObtainByPartition, c, clusterResource, selectedCandidates, totalPreemptionAllowed);
if (!preempted) {
continue;
}
}
}
}
// preempt other containers
Resource skippedAMSize = Resource.newInstance(0, 0);
Iterator<FiCaSchedulerApp> desc = leafQueue.getOrderingPolicy().getPreemptionIterator();
while (desc.hasNext()) {
FiCaSchedulerApp fc = desc.next();
// more preemption is needed
if (resToObtainByPartition.isEmpty()) {
break;
}
preemptFrom(fc, clusterResource, resToObtainByPartition, skippedAMContainerlist, skippedAMSize, selectedCandidates, totalPreemptionAllowed);
}
// Can try preempting AMContainers (still saving atmost
// maxAMCapacityForThisQueue AMResource's) if more resources are
// required to be preemptionCandidates from this Queue.
Resource maxAMCapacityForThisQueue = Resources.multiply(Resources.multiply(clusterResource, leafQueue.getAbsoluteCapacity()), leafQueue.getMaxAMResourcePerQueuePercent());
preemptAMContainers(clusterResource, selectedCandidates, skippedAMContainerlist, resToObtainByPartition, skippedAMSize, maxAMCapacityForThisQueue, totalPreemptionAllowed);
} finally {
leafQueue.getReadLock().unlock();
}
}
return selectedCandidates;
}
Aggregations