use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class ContainerUpdateContext method checkAndAddToOutstandingIncreases.
/**
* Add the container to outstanding increases.
* @param rmContainer RMContainer.
* @param schedulerNode SchedulerNode.
* @param updateRequest UpdateContainerRequest.
* @return true if updated to outstanding increases was successful.
*/
public synchronized boolean checkAndAddToOutstandingIncreases(RMContainer rmContainer, SchedulerNode schedulerNode, UpdateContainerRequest updateRequest) {
Container container = rmContainer.getContainer();
SchedulerRequestKey schedulerKey = SchedulerRequestKey.create(updateRequest, rmContainer.getAllocatedSchedulerKey());
Map<Resource, Map<NodeId, Set<ContainerId>>> resourceMap = outstandingIncreases.get(schedulerKey);
if (resourceMap == null) {
resourceMap = new HashMap<>();
outstandingIncreases.put(schedulerKey, resourceMap);
} else {
// Updating Resource for and existing increase container
if (ContainerUpdateType.INCREASE_RESOURCE == updateRequest.getContainerUpdateType()) {
cancelPreviousRequest(schedulerNode, schedulerKey);
} else {
return false;
}
}
Resource resToIncrease = getResourceToIncrease(updateRequest, rmContainer);
Map<NodeId, Set<ContainerId>> locationMap = resourceMap.get(resToIncrease);
if (locationMap == null) {
locationMap = new HashMap<>();
resourceMap.put(resToIncrease, locationMap);
}
Set<ContainerId> containerIds = locationMap.get(container.getNodeId());
if (containerIds == null) {
containerIds = new HashSet<>();
locationMap.put(container.getNodeId(), containerIds);
}
if (outstandingDecreases.containsKey(container.getId())) {
return false;
}
containerIds.add(container.getId());
if (!Resources.isNone(resToIncrease)) {
Map<SchedulerRequestKey, Map<String, ResourceRequest>> updateResReqs = new HashMap<>();
Map<String, ResourceRequest> resMap = createResourceRequests(rmContainer, schedulerNode, schedulerKey, resToIncrease);
updateResReqs.put(schedulerKey, resMap);
appSchedulingInfo.addToPlacementSets(false, updateResReqs);
}
return true;
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class LeafQueue method completedContainer.
@Override
public void completedContainer(Resource clusterResource, FiCaSchedulerApp application, FiCaSchedulerNode node, RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event, CSQueue childQueue, boolean sortQueues) {
// Update SchedulerHealth for released / preempted container
updateSchedulerHealthForCompletedContainer(rmContainer, containerStatus);
if (application != null) {
boolean removed = false;
// Careful! Locking order is important!
try {
writeLock.lock();
Container container = rmContainer.getContainer();
// So, this is, in effect, a transaction across application & node
if (rmContainer.getState() == RMContainerState.RESERVED) {
removed = application.unreserve(rmContainer.getReservedSchedulerKey(), node, rmContainer);
} else {
removed = application.containerCompleted(rmContainer, containerStatus, event, node.getPartition());
node.releaseContainer(rmContainer.getContainerId(), false);
}
// Book-keeping
if (removed) {
// Inform the ordering policy
orderingPolicy.containerReleased(application, rmContainer);
releaseResource(clusterResource, application, container.getResource(), node.getPartition(), rmContainer);
}
} finally {
writeLock.unlock();
}
if (removed) {
// Inform the parent queue _outside_ of the leaf-queue lock
getParent().completedContainer(clusterResource, application, node, rmContainer, null, event, this, sortQueues);
}
}
// Notify PreemptionManager
csContext.getPreemptionManager().removeKillableContainer(new KillableContainer(rmContainer, node.getPartition(), queueName));
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class CapacityScheduler method doneApplicationAttempt.
private void doneApplicationAttempt(ApplicationAttemptId applicationAttemptId, RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers) {
try {
writeLock.lock();
LOG.info("Application Attempt " + applicationAttemptId + " is done." + " finalState=" + rmAppAttemptFinalState);
FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId);
SchedulerApplication<FiCaSchedulerApp> application = applications.get(applicationAttemptId.getApplicationId());
if (application == null || attempt == null) {
LOG.info("Unknown application " + applicationAttemptId + " has completed!");
return;
}
// Release all the allocated, acquired, running containers
for (RMContainer rmContainer : attempt.getLiveContainers()) {
if (keepContainers && rmContainer.getState().equals(RMContainerState.RUNNING)) {
// do not kill the running container in the case of work-preserving AM
// restart.
LOG.info("Skip killing " + rmContainer.getContainerId());
continue;
}
super.completedContainer(rmContainer, SchedulerUtils.createAbnormalContainerStatus(rmContainer.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION), RMContainerEventType.KILL);
}
// Release all reserved containers
for (RMContainer rmContainer : attempt.getReservedContainers()) {
super.completedContainer(rmContainer, SchedulerUtils.createAbnormalContainerStatus(rmContainer.getContainerId(), "Application Complete"), RMContainerEventType.KILL);
}
// Clean up pending requests, metrics etc.
attempt.stop(rmAppAttemptFinalState);
// Inform the queue
String queueName = attempt.getQueue().getQueueName();
CSQueue queue = this.getQueue(queueName);
if (!(queue instanceof LeafQueue)) {
LOG.error("Cannot finish application " + "from non-leaf queue: " + queueName);
} else {
queue.finishApplicationAttempt(attempt, queue.getQueueName());
}
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class CapacityScheduler method completedContainerInternal.
@Override
protected void completedContainerInternal(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) {
Container container = rmContainer.getContainer();
ContainerId containerId = container.getId();
// Get the application for the finished container
FiCaSchedulerApp application = getCurrentAttemptForContainer(container.getId());
ApplicationId appId = containerId.getApplicationAttemptId().getApplicationId();
if (application == null) {
LOG.info("Container " + container + " of" + " finished application " + appId + " completed with event " + event);
return;
}
// Get the node on which the container was allocated
FiCaSchedulerNode node = getNode(container.getNodeId());
if (null == node) {
LOG.info("Container " + container + " of" + " removed node " + container.getNodeId() + " completed with event " + event);
return;
}
// Inform the queue
LeafQueue queue = (LeafQueue) application.getQueue();
queue.completedContainer(getClusterResource(), application, node, rmContainer, containerStatus, event, null, true);
}
use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.
the class CapacityScheduler method allocateContainerOnSingleNode.
/*
* Logics of allocate container on a single node (Old behavior)
*/
private CSAssignment allocateContainerOnSingleNode(PlacementSet<FiCaSchedulerNode> ps, FiCaSchedulerNode node, boolean withNodeHeartbeat) {
// driven by node heartbeat works.
if (getNode(node.getNodeID()) != node) {
LOG.error("Trying to schedule on a removed node, please double check.");
return null;
}
CSAssignment assignment;
// Assign new containers...
// 1. Check for reserved applications
// 2. Schedule if there are no reservations
RMContainer reservedContainer = node.getReservedContainer();
if (reservedContainer != null) {
FiCaSchedulerApp reservedApplication = getCurrentAttemptForContainer(reservedContainer.getContainerId());
// Try to fulfill the reservation
LOG.info("Trying to fulfill reservation for application " + reservedApplication.getApplicationId() + " on node: " + node.getNodeID());
LeafQueue queue = ((LeafQueue) reservedApplication.getQueue());
assignment = queue.assignContainers(getClusterResource(), ps, // resources, should consider labeled resources as well.
new ResourceLimits(labelManager.getResourceByLabel(RMNodeLabelsManager.NO_LABEL, getClusterResource())), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
if (assignment.isFulfilledReservation()) {
if (withNodeHeartbeat) {
// Only update SchedulerHealth in sync scheduling, existing
// Data structure of SchedulerHealth need to be updated for
// Async mode
updateSchedulerHealth(lastNodeUpdateTime, node.getNodeID(), assignment);
}
schedulerHealth.updateSchedulerFulfilledReservationCounts(1);
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, queue.getParent().getQueueName(), queue.getQueueName(), ActivityState.ACCEPTED, ActivityDiagnosticConstant.EMPTY);
ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, node, reservedContainer.getContainerId(), AllocationState.ALLOCATED_FROM_RESERVED);
} else {
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, queue.getParent().getQueueName(), queue.getQueueName(), ActivityState.ACCEPTED, ActivityDiagnosticConstant.EMPTY);
ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, node, reservedContainer.getContainerId(), AllocationState.SKIPPED);
}
assignment.setSchedulingMode(SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
submitResourceCommitRequest(getClusterResource(), assignment);
}
// Do not schedule if there are any reservations to fulfill on the node
if (node.getReservedContainer() != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skipping scheduling since node " + node.getNodeID() + " is reserved by application " + node.getReservedContainer().getContainerId().getApplicationAttemptId());
}
return null;
}
// has any available or killable resource
if (calculator.computeAvailableContainers(Resources.add(node.getUnallocatedResource(), node.getTotalKillableResources()), minimumAllocation) <= 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("This node or this node partition doesn't have available or" + "killable resource");
}
return null;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Trying to schedule on node: " + node.getNodeName() + ", available: " + node.getUnallocatedResource());
}
return allocateOrReserveNewContainers(ps, withNodeHeartbeat);
}
Aggregations