Search in sources :

Example 6 with FiCaSchedulerNode

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.

the class RegularContainerAllocator method preCheckForPlacementSet.

/*
   * Pre-check if we can allocate a pending resource request
   * (given schedulerKey) to a given PlacementSet.
   * We will consider stuffs like exclusivity, pending resource, node partition,
   * headroom, etc.
   */
private ContainerAllocation preCheckForPlacementSet(Resource clusterResource, PlacementSet<FiCaSchedulerNode> ps, SchedulingMode schedulingMode, ResourceLimits resourceLimits, SchedulerRequestKey schedulerKey) {
    Priority priority = schedulerKey.getPriority();
    FiCaSchedulerNode node = PlacementSetUtils.getSingleNode(ps);
    PendingAsk offswitchPendingAsk = application.getPendingAsk(schedulerKey, ResourceRequest.ANY);
    if (offswitchPendingAsk.getCount() <= 0) {
        ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST);
        return ContainerAllocation.PRIORITY_SKIPPED;
    }
    // Required resource
    Resource required = offswitchPendingAsk.getPerAllocationResource();
    // Do we need containers at this 'priority'?
    if (application.getOutstandingAsksCount(schedulerKey) <= 0) {
        ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE);
        return ContainerAllocation.PRIORITY_SKIPPED;
    }
    // avoid painful of preempt an AM container
    if (schedulingMode == SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY) {
        if (application.isWaitingForAMContainer()) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Skip allocating AM container to app_attempt=" + application.getApplicationAttemptId() + ", don't allow to allocate AM container in non-exclusive mode");
            }
            application.updateAppSkipNodeDiagnostics("Skipping assigning to Node in Ignore Exclusivity mode. ");
            ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.SKIP_IN_IGNORE_EXCLUSIVITY_MODE);
            return ContainerAllocation.APP_SKIPPED;
        }
    }
    // If not match, jump to next priority.
    if (!appInfo.acceptNodePartition(schedulerKey, node.getPartition(), schedulingMode)) {
        ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.PRIORITY_SKIPPED_BECAUSE_NODE_PARTITION_DOES_NOT_MATCH_REQUEST);
        return ContainerAllocation.PRIORITY_SKIPPED;
    }
    if (!application.getCSLeafQueue().getReservationContinueLooking()) {
        if (!shouldAllocOrReserveNewContainer(schedulerKey, required)) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("doesn't need containers based on reservation algo!");
            }
            ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.DO_NOT_NEED_ALLOCATIONATTEMPTINFOS);
            return ContainerAllocation.PRIORITY_SKIPPED;
        }
    }
    if (!checkHeadroom(clusterResource, resourceLimits, required, ps.getPartition())) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("cannot allocate required resource=" + required + " because of headroom");
        }
        ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.QUEUE_SKIPPED_HEADROOM);
        return ContainerAllocation.QUEUE_SKIPPED;
    }
    // Increase missed-non-partitioned-resource-request-opportunity.
    // This is to make sure non-partitioned-resource-request will prefer
    // to be allocated to non-partitioned nodes
    int missedNonPartitionedRequestSchedulingOpportunity = 0;
    // NO_LABEL under RESPECT_EXCLUSIVITY mode
    if (StringUtils.equals(RMNodeLabelsManager.NO_LABEL, appInfo.getSchedulingPlacementSet(schedulerKey).getPrimaryRequestedNodePartition())) {
        missedNonPartitionedRequestSchedulingOpportunity = application.addMissedNonPartitionedRequestSchedulingOpportunity(schedulerKey);
    }
    if (schedulingMode == SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY) {
        // non-partitioned partition first.
        if (missedNonPartitionedRequestSchedulingOpportunity < rmContext.getScheduler().getNumClusterNodes()) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Skip app_attempt=" + application.getApplicationAttemptId() + " priority=" + schedulerKey.getPriority() + " because missed-non-partitioned-resource-request" + " opportunity under required:" + " Now=" + missedNonPartitionedRequestSchedulingOpportunity + " required=" + rmContext.getScheduler().getNumClusterNodes());
            }
            ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(activitiesManager, node, application, priority, ActivityDiagnosticConstant.NON_PARTITIONED_PARTITION_FIRST);
            return ContainerAllocation.APP_SKIPPED;
        }
    }
    return null;
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) Priority(org.apache.hadoop.yarn.api.records.Priority) Resource(org.apache.hadoop.yarn.api.records.Resource) PendingAsk(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.PendingAsk)

Example 7 with FiCaSchedulerNode

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.

the class RegularContainerAllocator method allocate.

private ContainerAllocation allocate(Resource clusterResource, PlacementSet<FiCaSchedulerNode> ps, SchedulingMode schedulingMode, ResourceLimits resourceLimits, SchedulerRequestKey schedulerKey, RMContainer reservedContainer) {
    // Do checks before determining which node to allocate
    // Directly return if this check fails.
    ContainerAllocation result;
    if (reservedContainer == null) {
        result = preCheckForPlacementSet(clusterResource, ps, schedulingMode, resourceLimits, schedulerKey);
        if (null != result) {
            return result;
        }
    } else {
        // pre-check when allocating reserved container
        if (application.getOutstandingAsksCount(schedulerKey) == 0) {
            // Release
            return new ContainerAllocation(reservedContainer, null, AllocationState.QUEUE_SKIPPED);
        }
    }
    SchedulingPlacementSet<FiCaSchedulerNode> schedulingPS = application.getAppSchedulingInfo().getSchedulingPlacementSet(schedulerKey);
    result = ContainerAllocation.PRIORITY_SKIPPED;
    Iterator<FiCaSchedulerNode> iter = schedulingPS.getPreferredNodeIterator(ps);
    while (iter.hasNext()) {
        FiCaSchedulerNode node = iter.next();
        result = tryAllocateOnNode(clusterResource, node, schedulingMode, resourceLimits, schedulerKey, reservedContainer);
        if (AllocationState.ALLOCATED == result.state || AllocationState.RESERVED == result.state) {
            result = doAllocation(result, node, schedulerKey, reservedContainer);
            break;
        }
    }
    return result;
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)

Example 8 with FiCaSchedulerNode

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.

the class ParentQueue method assignContainers.

@Override
public CSAssignment assignContainers(Resource clusterResource, PlacementSet<FiCaSchedulerNode> ps, ResourceLimits resourceLimits, SchedulingMode schedulingMode) {
    FiCaSchedulerNode node = PlacementSetUtils.getSingleNode(ps);
    // if our queue cannot access this node, just return
    if (schedulingMode == SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY && !accessibleToPartition(ps.getPartition())) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Skip this queue=" + getQueuePath() + ", because it is not able to access partition=" + ps.getPartition());
        }
        ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.REJECTED, ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION + node.getPartition());
        if (rootQueue) {
            ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node);
        }
        return CSAssignment.NULL_ASSIGNMENT;
    }
    // queue doesn't need more resources.
    if (!super.hasPendingResourceRequest(ps.getPartition(), clusterResource, schedulingMode)) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Skip this queue=" + getQueuePath() + ", because it doesn't need more resource, schedulingMode=" + schedulingMode.name() + " node-partition=" + ps.getPartition());
        }
        ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.SKIPPED, ActivityDiagnosticConstant.QUEUE_DO_NOT_NEED_MORE_RESOURCE);
        if (rootQueue) {
            ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node);
        }
        return CSAssignment.NULL_ASSIGNMENT;
    }
    CSAssignment assignment = new CSAssignment(Resources.createResource(0, 0), NodeType.NODE_LOCAL);
    while (canAssign(clusterResource, node)) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Trying to assign containers to child-queue of " + getQueueName());
        }
        // looking
        if (!super.canAssignToThisQueue(clusterResource, ps.getPartition(), resourceLimits, Resources.createResource(getMetrics().getReservedMB(), getMetrics().getReservedVirtualCores()), schedulingMode)) {
            ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.SKIPPED, ActivityDiagnosticConstant.QUEUE_MAX_CAPACITY_LIMIT);
            if (rootQueue) {
                ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node);
            }
            break;
        }
        // Schedule
        CSAssignment assignedToChild = assignContainersToChildQueues(clusterResource, ps, resourceLimits, schedulingMode);
        assignment.setType(assignedToChild.getType());
        assignment.setRequestLocalityType(assignedToChild.getRequestLocalityType());
        assignment.setExcessReservation(assignedToChild.getExcessReservation());
        assignment.setContainersToKill(assignedToChild.getContainersToKill());
        // Done if no child-queue assigned anything
        if (Resources.greaterThan(resourceCalculator, clusterResource, assignedToChild.getResource(), Resources.none())) {
            ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.ACCEPTED, ActivityDiagnosticConstant.EMPTY);
            boolean isReserved = assignedToChild.getAssignmentInformation().getReservationDetails() != null && !assignedToChild.getAssignmentInformation().getReservationDetails().isEmpty();
            if (node != null && !isReserved) {
                if (rootQueue) {
                    ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, node, assignedToChild.getAssignmentInformation().getFirstAllocatedOrReservedContainerId(), AllocationState.ALLOCATED);
                }
            } else {
                if (rootQueue) {
                    ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, node, assignedToChild.getAssignmentInformation().getFirstAllocatedOrReservedContainerId(), AllocationState.RESERVED);
                }
            }
            // Track resource utilization in this pass of the scheduler
            Resources.addTo(assignment.getResource(), assignedToChild.getResource());
            Resources.addTo(assignment.getAssignmentInformation().getAllocated(), assignedToChild.getAssignmentInformation().getAllocated());
            Resources.addTo(assignment.getAssignmentInformation().getReserved(), assignedToChild.getAssignmentInformation().getReserved());
            assignment.getAssignmentInformation().incrAllocations(assignedToChild.getAssignmentInformation().getNumAllocations());
            assignment.getAssignmentInformation().incrReservations(assignedToChild.getAssignmentInformation().getNumReservations());
            assignment.getAssignmentInformation().getAllocationDetails().addAll(assignedToChild.getAssignmentInformation().getAllocationDetails());
            assignment.getAssignmentInformation().getReservationDetails().addAll(assignedToChild.getAssignmentInformation().getReservationDetails());
            assignment.setIncreasedAllocation(assignedToChild.isIncreasedAllocation());
            LOG.info("assignedContainer" + " queue=" + getQueueName() + " usedCapacity=" + getUsedCapacity() + " absoluteUsedCapacity=" + getAbsoluteUsedCapacity() + " used=" + queueUsage.getUsed() + " cluster=" + clusterResource);
            if (LOG.isDebugEnabled()) {
                LOG.debug("ParentQ=" + getQueueName() + " assignedSoFarInThisIteration=" + assignment.getResource() + " usedCapacity=" + getUsedCapacity() + " absoluteUsedCapacity=" + getAbsoluteUsedCapacity());
            }
        } else {
            assignment.setSkippedType(assignedToChild.getSkippedType());
            ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY);
            if (rootQueue) {
                ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node);
            }
            break;
        }
        /*
       * Previously here, we can allocate more than one container for each
       * allocation under rootQ. Now this logic is not proper any more
       * in global scheduling world.
       *
       * So here do not try to allocate more than one container for each
       * allocation, let top scheduler make the decision.
       */
        break;
    }
    return assignment;
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)

Example 9 with FiCaSchedulerNode

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.

the class ParentQueue method attachContainer.

@Override
public void attachContainer(Resource clusterResource, FiCaSchedulerApp application, RMContainer rmContainer) {
    if (application != null) {
        FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId());
        allocateResource(clusterResource, rmContainer.getContainer().getResource(), node.getPartition());
        LOG.info("movedContainer" + " queueMoveIn=" + getQueueName() + " usedCapacity=" + getUsedCapacity() + " absoluteUsedCapacity=" + getAbsoluteUsedCapacity() + " used=" + queueUsage.getUsed() + " cluster=" + clusterResource);
        // Inform the parent
        if (parent != null) {
            parent.attachContainer(clusterResource, application, rmContainer);
        }
    }
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)

Example 10 with FiCaSchedulerNode

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode in project hadoop by apache.

the class CapacityScheduler method addNode.

private void addNode(RMNode nodeManager) {
    try {
        writeLock.lock();
        FiCaSchedulerNode schedulerNode = new FiCaSchedulerNode(nodeManager, usePortForNodeName, nodeManager.getNodeLabels());
        nodeTracker.addNode(schedulerNode);
        // update this node to node label manager
        if (labelManager != null) {
            labelManager.activateNode(nodeManager.getNodeID(), schedulerNode.getTotalResource());
        }
        Resource clusterResource = getClusterResource();
        getRootQueue().updateClusterResource(clusterResource, new ResourceLimits(clusterResource));
        LOG.info("Added node " + nodeManager.getNodeAddress() + " clusterResource: " + clusterResource);
        if (scheduleAsynchronously && getNumClusterNodes() == 1) {
            for (AsyncScheduleThread t : asyncSchedulerThreads) {
                t.beginSchedule();
            }
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) Resource(org.apache.hadoop.yarn.api.records.Resource)

Aggregations

FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)79 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)47 Resource (org.apache.hadoop.yarn.api.records.Resource)46 Test (org.junit.Test)39 ResourceLimits (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits)37 NodeId (org.apache.hadoop.yarn.api.records.NodeId)35 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)34 Priority (org.apache.hadoop.yarn.api.records.Priority)34 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)21 ActiveUsersManager (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager)20 ArrayList (java.util.ArrayList)14 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)11 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)10 SchedulerRequestKey (org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey)9 HashMap (java.util.HashMap)8 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)7 AMState (org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState)7 Container (org.apache.hadoop.yarn.api.records.Container)6 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)5 RMContext (org.apache.hadoop.yarn.server.resourcemanager.RMContext)5