use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class RegularContainerAllocator method preCheckForPlacementSet.
/*
* Pre-check if we can allocate a pending resource request
* (given schedulerKey) to a given PlacementSet.
* We will consider stuffs like exclusivity, pending resource, node partition,
* headroom, etc.
*/
private ContainerAllocation preCheckForPlacementSet(Resource clusterResource, PlacementSet<FiCaSchedulerNode> ps, SchedulingMode schedulingMode, ResourceLimits resourceLimits, SchedulerRequestKey schedulerKey) {
Priority priority = schedulerKey.getPriority();
FiCaSchedulerNode node = PlacementSetUtils.getSingleNode(ps);
PendingAsk offswitchPendingAsk = application.getPendingAsk(schedulerKey, ResourceRequest.ANY);
if (offswitchPendingAsk.getCount() <= 0) {
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST);
return ContainerAllocation.PRIORITY_SKIPPED;
}
// Required resource
Resource required = offswitchPendingAsk.getPerAllocationResource();
// Do we need containers at this 'priority'?
if (application.getOutstandingAsksCount(schedulerKey) <= 0) {
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE);
return ContainerAllocation.PRIORITY_SKIPPED;
}
// avoid painful of preempt an AM container
if (schedulingMode == SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY) {
if (application.isWaitingForAMContainer()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skip allocating AM container to app_attempt=" + application.getApplicationAttemptId() + ", don't allow to allocate AM container in non-exclusive mode");
}
application.updateAppSkipNodeDiagnostics("Skipping assigning to Node in Ignore Exclusivity mode. ");
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.SKIP_IN_IGNORE_EXCLUSIVITY_MODE);
return ContainerAllocation.APP_SKIPPED;
}
}
// If not match, jump to next priority.
if (!appInfo.acceptNodePartition(schedulerKey, node.getPartition(), schedulingMode)) {
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.PRIORITY_SKIPPED_BECAUSE_NODE_PARTITION_DOES_NOT_MATCH_REQUEST);
return ContainerAllocation.PRIORITY_SKIPPED;
}
if (!application.getCSLeafQueue().getReservationContinueLooking()) {
if (!shouldAllocOrReserveNewContainer(schedulerKey, required)) {
if (LOG.isDebugEnabled()) {
LOG.debug("doesn't need containers based on reservation algo!");
}
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.DO_NOT_NEED_ALLOCATIONATTEMPTINFOS);
return ContainerAllocation.PRIORITY_SKIPPED;
}
}
if (!checkHeadroom(clusterResource, resourceLimits, required, ps.getPartition())) {
if (LOG.isDebugEnabled()) {
LOG.debug("cannot allocate required resource=" + required + " because of headroom");
}
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.QUEUE_SKIPPED_HEADROOM);
return ContainerAllocation.QUEUE_SKIPPED;
}
// Increase missed-non-partitioned-resource-request-opportunity.
// This is to make sure non-partitioned-resource-request will prefer
// to be allocated to non-partitioned nodes
int missedNonPartitionedRequestSchedulingOpportunity = 0;
// NO_LABEL under RESPECT_EXCLUSIVITY mode
if (StringUtils.equals(RMNodeLabelsManager.NO_LABEL, appInfo.getSchedulingPlacementSet(schedulerKey).getPrimaryRequestedNodePartition())) {
missedNonPartitionedRequestSchedulingOpportunity = application.addMissedNonPartitionedRequestSchedulingOpportunity(schedulerKey);
}
if (schedulingMode == SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY) {
// non-partitioned partition first.
if (missedNonPartitionedRequestSchedulingOpportunity < rmContext.getScheduler().getNumClusterNodes()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skip app_attempt=" + application.getApplicationAttemptId() + " priority=" + schedulerKey.getPriority() + " because missed-non-partitioned-resource-request" + " opportunity under required:" + " Now=" + missedNonPartitionedRequestSchedulingOpportunity + " required=" + rmContext.getScheduler().getNumClusterNodes());
}
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.NON_PARTITIONED_PARTITION_FIRST);
return ContainerAllocation.APP_SKIPPED;
}
}
return null;
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class RegularContainerAllocator method allocate.
private ContainerAllocation allocate(Resource clusterResource, PlacementSet<FiCaSchedulerNode> ps, SchedulingMode schedulingMode, ResourceLimits resourceLimits, SchedulerRequestKey schedulerKey, RMContainer reservedContainer) {
// Do checks before determining which node to allocate
// Directly return if this check fails.
ContainerAllocation result;
if (reservedContainer == null) {
result = preCheckForPlacementSet(clusterResource, ps, schedulingMode, resourceLimits, schedulerKey);
if (null != result) {
return result;
}
} else {
// pre-check when allocating reserved container
if (application.getOutstandingAsksCount(schedulerKey) == 0) {
// Release
return new ContainerAllocation(reservedContainer, null, AllocationState.QUEUE_SKIPPED);
}
}
SchedulingPlacementSet<FiCaSchedulerNode> schedulingPS = application.getAppSchedulingInfo().getSchedulingPlacementSet(schedulerKey);
result = ContainerAllocation.PRIORITY_SKIPPED;
Iterator<FiCaSchedulerNode> iter = schedulingPS.getPreferredNodeIterator(ps);
while (iter.hasNext()) {
FiCaSchedulerNode node = iter.next();
result = tryAllocateOnNode(clusterResource, node, schedulingMode, resourceLimits, schedulerKey, reservedContainer);
if (AllocationState.ALLOCATED == result.state || AllocationState.RESERVED == result.state) {
result = doAllocation(result, node, schedulerKey, reservedContainer);
break;
}
}
return result;
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class ParentQueue method assignContainers.
@Override
public CSAssignment assignContainers(Resource clusterResource, PlacementSet<FiCaSchedulerNode> ps, ResourceLimits resourceLimits, SchedulingMode schedulingMode) {
FiCaSchedulerNode node = PlacementSetUtils.getSingleNode(ps);
// if our queue cannot access this node, just return
if (schedulingMode == SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY && !accessibleToPartition(ps.getPartition())) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skip this queue=" + getQueuePath() + ", because it is not able to access partition=" + ps.getPartition());
}
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.REJECTED, ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION + node.getPartition());
if (rootQueue) {
ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node);
}
return CSAssignment.NULL_ASSIGNMENT;
}
// queue doesn't need more resources.
if (!super.hasPendingResourceRequest(ps.getPartition(), clusterResource, schedulingMode)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Skip this queue=" + getQueuePath() + ", because it doesn't need more resource, schedulingMode=" + schedulingMode.name() + " node-partition=" + ps.getPartition());
}
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.SKIPPED, ActivityDiagnosticConstant.QUEUE_DO_NOT_NEED_MORE_RESOURCE);
if (rootQueue) {
ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node);
}
return CSAssignment.NULL_ASSIGNMENT;
}
CSAssignment assignment = new CSAssignment(Resources.createResource(0, 0), NodeType.NODE_LOCAL);
while (canAssign(clusterResource, node)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Trying to assign containers to child-queue of " + getQueueName());
}
// looking
if (!super.canAssignToThisQueue(clusterResource, ps.getPartition(), resourceLimits, Resources.createResource(getMetrics().getReservedMB(), getMetrics().getReservedVirtualCores()), schedulingMode)) {
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.SKIPPED, ActivityDiagnosticConstant.QUEUE_MAX_CAPACITY_LIMIT);
if (rootQueue) {
ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node);
}
break;
}
// Schedule
CSAssignment assignedToChild = assignContainersToChildQueues(clusterResource, ps, resourceLimits, schedulingMode);
assignment.setType(assignedToChild.getType());
assignment.setRequestLocalityType(assignedToChild.getRequestLocalityType());
assignment.setExcessReservation(assignedToChild.getExcessReservation());
assignment.setContainersToKill(assignedToChild.getContainersToKill());
// Done if no child-queue assigned anything
if (Resources.greaterThan(resourceCalculator, clusterResource, assignedToChild.getResource(), Resources.none())) {
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.ACCEPTED, ActivityDiagnosticConstant.EMPTY);
boolean isReserved = assignedToChild.getAssignmentInformation().getReservationDetails() != null && !assignedToChild.getAssignmentInformation().getReservationDetails().isEmpty();
if (node != null && !isReserved) {
if (rootQueue) {
ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, node, assignedToChild.getAssignmentInformation().getFirstAllocatedOrReservedContainerId(), AllocationState.ALLOCATED);
}
} else {
if (rootQueue) {
ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, node, assignedToChild.getAssignmentInformation().getFirstAllocatedOrReservedContainerId(), AllocationState.RESERVED);
}
}
// Track resource utilization in this pass of the scheduler
Resources.addTo(assignment.getResource(), assignedToChild.getResource());
Resources.addTo(assignment.getAssignmentInformation().getAllocated(), assignedToChild.getAssignmentInformation().getAllocated());
Resources.addTo(assignment.getAssignmentInformation().getReserved(), assignedToChild.getAssignmentInformation().getReserved());
assignment.getAssignmentInformation().incrAllocations(assignedToChild.getAssignmentInformation().getNumAllocations());
assignment.getAssignmentInformation().incrReservations(assignedToChild.getAssignmentInformation().getNumReservations());
assignment.getAssignmentInformation().getAllocationDetails().addAll(assignedToChild.getAssignmentInformation().getAllocationDetails());
assignment.getAssignmentInformation().getReservationDetails().addAll(assignedToChild.getAssignmentInformation().getReservationDetails());
assignment.setIncreasedAllocation(assignedToChild.isIncreasedAllocation());
LOG.info("assignedContainer" + " queue=" + getQueueName() + " usedCapacity=" + getUsedCapacity() + " absoluteUsedCapacity=" + getAbsoluteUsedCapacity() + " used=" + queueUsage.getUsed() + " cluster=" + clusterResource);
if (LOG.isDebugEnabled()) {
LOG.debug("ParentQ=" + getQueueName() + " assignedSoFarInThisIteration=" + assignment.getResource() + " usedCapacity=" + getUsedCapacity() + " absoluteUsedCapacity=" + getAbsoluteUsedCapacity());
}
} else {
assignment.setSkippedType(assignedToChild.getSkippedType());
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY);
if (rootQueue) {
ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node);
}
break;
}
/*
* Previously here, we can allocate more than one container for each
* allocation under rootQ. Now this logic is not proper any more
* in global scheduling world.
*
* So here do not try to allocate more than one container for each
* allocation, let top scheduler make the decision.
*/
break;
}
return assignment;
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class ParentQueue method attachContainer.
@Override
public void attachContainer(Resource clusterResource, FiCaSchedulerApp application, RMContainer rmContainer) {
if (application != null) {
FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId());
allocateResource(clusterResource, rmContainer.getContainer().getResource(), node.getPartition());
LOG.info("movedContainer" + " queueMoveIn=" + getQueueName() + " usedCapacity=" + getUsedCapacity() + " absoluteUsedCapacity=" + getAbsoluteUsedCapacity() + " used=" + queueUsage.getUsed() + " cluster=" + clusterResource);
// Inform the parent
if (parent != null) {
parent.attachContainer(clusterResource, application, rmContainer);
}
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.
the class CapacityScheduler method addNode.
private void addNode(RMNode nodeManager) {
try {
writeLock.lock();
FiCaSchedulerNode schedulerNode = new FiCaSchedulerNode(nodeManager, usePortForNodeName, nodeManager.getNodeLabels());
nodeTracker.addNode(schedulerNode);
// update this node to node label manager
if (labelManager != null) {
labelManager.activateNode(nodeManager.getNodeID(), schedulerNode.getTotalResource());
}
Resource clusterResource = getClusterResource();
getRootQueue().updateClusterResource(clusterResource, new ResourceLimits(clusterResource));
LOG.info("Added node " + nodeManager.getNodeAddress() + " clusterResource: " + clusterResource);
if (scheduleAsynchronously && getNumClusterNodes() == 1) {
for (AsyncScheduleThread t : asyncSchedulerThreads) {
t.beginSchedule();
}
}
} finally {
writeLock.unlock();
}
}
Aggregations