Search in sources :

Example 31 with ResourceLimits

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.

the class LeafQueue method setupQueueConfigs.

protected void setupQueueConfigs(Resource clusterResource) throws IOException {
    try {
        writeLock.lock();
        super.setupQueueConfigs(clusterResource);
        this.lastClusterResource = clusterResource;
        this.cachedResourceLimitsForHeadroom = new ResourceLimits(clusterResource);
        // Initialize headroom info, also used for calculating application
        // master resource limits.  Since this happens during queue initialization
        // and all queues may not be realized yet, we'll use (optimistic)
        // absoluteMaxCapacity (it will be replaced with the more accurate
        // absoluteMaxAvailCapacity during headroom/userlimit/allocation events)
        setQueueResourceLimitsInfo(clusterResource);
        CapacitySchedulerConfiguration conf = csContext.getConfiguration();
        setOrderingPolicy(conf.<FiCaSchedulerApp>getAppOrderingPolicy(getQueuePath()));
        usersManager.setUserLimit(conf.getUserLimit(getQueuePath()));
        usersManager.setUserLimitFactor(conf.getUserLimitFactor(getQueuePath()));
        maxApplications = conf.getMaximumApplicationsPerQueue(getQueuePath());
        if (maxApplications < 0) {
            int maxGlobalPerQueueApps = conf.getGlobalMaximumApplicationsPerQueue();
            if (maxGlobalPerQueueApps > 0) {
                maxApplications = maxGlobalPerQueueApps;
            } else {
                int maxSystemApps = conf.getMaximumSystemApplications();
                maxApplications = (int) (maxSystemApps * queueCapacities.getAbsoluteCapacity());
            }
        }
        maxApplicationsPerUser = Math.min(maxApplications, (int) (maxApplications * (usersManager.getUserLimit() / 100.0f) * usersManager.getUserLimitFactor()));
        maxAMResourcePerQueuePercent = conf.getMaximumApplicationMasterResourcePerQueuePercent(getQueuePath());
        priorityAcls = conf.getPriorityAcls(getQueuePath(), scheduler.getMaxClusterLevelAppPriority());
        if (!SchedulerUtils.checkQueueLabelExpression(this.accessibleLabels, this.defaultLabelExpression, null)) {
            throw new IOException("Invalid default label expression of " + " queue=" + getQueueName() + " doesn't have permission to access all labels " + "in default label expression. labelExpression of resource request=" + (this.defaultLabelExpression == null ? "" : this.defaultLabelExpression) + ". Queue labels=" + (getAccessibleNodeLabels() == null ? "" : StringUtils.join(getAccessibleNodeLabels().iterator(), ',')));
        }
        nodeLocalityDelay = conf.getNodeLocalityDelay();
        rackLocalityFullReset = conf.getRackLocalityFullReset();
        // re-init this since max allocation could have changed
        this.minimumAllocationFactor = Resources.ratio(resourceCalculator, Resources.subtract(maximumAllocation, minimumAllocation), maximumAllocation);
        StringBuilder aclsString = new StringBuilder();
        for (Map.Entry<AccessType, AccessControlList> e : acls.entrySet()) {
            aclsString.append(e.getKey() + ":" + e.getValue().getAclString());
        }
        StringBuilder labelStrBuilder = new StringBuilder();
        if (accessibleLabels != null) {
            for (String s : accessibleLabels) {
                labelStrBuilder.append(s);
                labelStrBuilder.append(",");
            }
        }
        defaultAppPriorityPerQueue = Priority.newInstance(conf.getDefaultApplicationPriorityConfPerQueue(getQueuePath()));
        LOG.info("Initializing " + queueName + "\n" + "capacity = " + queueCapacities.getCapacity() + " [= (float) configuredCapacity / 100 ]" + "\n" + "absoluteCapacity = " + queueCapacities.getAbsoluteCapacity() + " [= parentAbsoluteCapacity * capacity ]" + "\n" + "maxCapacity = " + queueCapacities.getMaximumCapacity() + " [= configuredMaxCapacity ]" + "\n" + "absoluteMaxCapacity = " + queueCapacities.getAbsoluteMaximumCapacity() + " [= 1.0 maximumCapacity undefined, " + "(parentAbsoluteMaxCapacity * maximumCapacity) / 100 otherwise ]" + "\n" + "userLimit = " + usersManager.getUserLimit() + " [= configuredUserLimit ]" + "\n" + "userLimitFactor = " + usersManager.getUserLimitFactor() + " [= configuredUserLimitFactor ]" + "\n" + "maxApplications = " + maxApplications + " [= configuredMaximumSystemApplicationsPerQueue or" + " (int)(configuredMaximumSystemApplications * absoluteCapacity)]" + "\n" + "maxApplicationsPerUser = " + maxApplicationsPerUser + " [= (int)(maxApplications * (userLimit / 100.0f) * " + "userLimitFactor) ]" + "\n" + "usedCapacity = " + queueCapacities.getUsedCapacity() + " [= usedResourcesMemory / " + "(clusterResourceMemory * absoluteCapacity)]" + "\n" + "absoluteUsedCapacity = " + absoluteUsedCapacity + " [= usedResourcesMemory / clusterResourceMemory]" + "\n" + "maxAMResourcePerQueuePercent = " + maxAMResourcePerQueuePercent + " [= configuredMaximumAMResourcePercent ]" + "\n" + "minimumAllocationFactor = " + minimumAllocationFactor + " [= (float)(maximumAllocationMemory - minimumAllocationMemory) / " + "maximumAllocationMemory ]" + "\n" + "maximumAllocation = " + maximumAllocation + " [= configuredMaxAllocation ]" + "\n" + "numContainers = " + numContainers + " [= currentNumContainers ]" + "\n" + "state = " + getState() + " [= configuredState ]" + "\n" + "acls = " + aclsString + " [= configuredAcls ]" + "\n" + "nodeLocalityDelay = " + nodeLocalityDelay + "\n" + "labels=" + labelStrBuilder.toString() + "\n" + "reservationsContinueLooking = " + reservationsContinueLooking + "\n" + "preemptionDisabled = " + getPreemptionDisabled() + "\n" + "defaultAppPriorityPerQueue = " + defaultAppPriorityPerQueue + "\npriority = " + priority);
    } finally {
        writeLock.unlock();
    }
}
Also used : AccessControlList(org.apache.hadoop.security.authorize.AccessControlList) ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) IOException(java.io.IOException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) AccessType(org.apache.hadoop.yarn.security.AccessType)

Example 32 with ResourceLimits

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.

the class CapacityScheduler method updateNodeLabelsAndQueueResource.

/**
   * Process node labels update.
   */
private void updateNodeLabelsAndQueueResource(NodeLabelsUpdateSchedulerEvent labelUpdateEvent) {
    try {
        writeLock.lock();
        for (Entry<NodeId, Set<String>> entry : labelUpdateEvent.getUpdatedNodeToLabels().entrySet()) {
            NodeId id = entry.getKey();
            Set<String> labels = entry.getValue();
            updateLabelsOnNode(id, labels);
        }
        Resource clusterResource = getClusterResource();
        getRootQueue().updateClusterResource(clusterResource, new ResourceLimits(clusterResource));
    } finally {
        writeLock.unlock();
    }
}
Also used : EnumSet(java.util.EnumSet) Set(java.util.Set) SimplePlacementSet(org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.SimplePlacementSet) PlacementSet(org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.PlacementSet) HashSet(java.util.HashSet) ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Resource(org.apache.hadoop.yarn.api.records.Resource)

Example 33 with ResourceLimits

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.

the class CapacityScheduler method updateNodeAndQueueResource.

/**
   * Process resource update on a node.
   */
private void updateNodeAndQueueResource(RMNode nm, ResourceOption resourceOption) {
    try {
        writeLock.lock();
        updateNodeResource(nm, resourceOption);
        Resource clusterResource = getClusterResource();
        getRootQueue().updateClusterResource(clusterResource, new ResourceLimits(clusterResource));
    } finally {
        writeLock.unlock();
    }
}
Also used : ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) Resource(org.apache.hadoop.yarn.api.records.Resource)

Example 34 with ResourceLimits

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.

the class ParentQueue method assignContainersToChildQueues.

private CSAssignment assignContainersToChildQueues(Resource cluster, PlacementSet<FiCaSchedulerNode> ps, ResourceLimits limits, SchedulingMode schedulingMode) {
    CSAssignment assignment = CSAssignment.NULL_ASSIGNMENT;
    Resource parentLimits = limits.getLimit();
    printChildQueues();
    // Try to assign to most 'under-served' sub-queue
    for (Iterator<CSQueue> iter = sortAndGetChildrenAllocationIterator(ps.getPartition()); iter.hasNext(); ) {
        CSQueue childQueue = iter.next();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Trying to assign to queue: " + childQueue.getQueuePath() + " stats: " + childQueue);
        }
        // Get ResourceLimits of child queue before assign containers
        ResourceLimits childLimits = getResourceLimitsOfChild(childQueue, cluster, parentLimits, ps.getPartition());
        CSAssignment childAssignment = childQueue.assignContainers(cluster, ps, childLimits, schedulingMode);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Assigned to queue: " + childQueue.getQueuePath() + " stats: " + childQueue + " --> " + childAssignment.getResource() + ", " + childAssignment.getType());
        }
        if (Resources.greaterThan(resourceCalculator, cluster, childAssignment.getResource(), Resources.none())) {
            assignment = childAssignment;
            break;
        } else if (childAssignment.getSkippedType() == CSAssignment.SkippedType.QUEUE_LIMIT) {
            if (assignment.getSkippedType() != CSAssignment.SkippedType.QUEUE_LIMIT) {
                assignment = childAssignment;
            }
            Resource resourceToSubtract = Resources.max(resourceCalculator, cluster, childLimits.getHeadroom(), Resources.none());
            if (LOG.isDebugEnabled()) {
                LOG.debug("Decrease parentLimits " + parentLimits + " for " + this.getQueueName() + " by " + resourceToSubtract + " as childQueue=" + childQueue.getQueueName() + " is blocked");
            }
            parentLimits = Resources.subtract(parentLimits, resourceToSubtract);
        }
    }
    return assignment;
}
Also used : ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) Resource(org.apache.hadoop.yarn.api.records.Resource)

Example 35 with ResourceLimits

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.

the class CapacityScheduler method removeNode.

private void removeNode(RMNode nodeInfo) {
    try {
        writeLock.lock();
        // update this node to node label manager
        if (labelManager != null) {
            labelManager.deactivateNode(nodeInfo.getNodeID());
        }
        NodeId nodeId = nodeInfo.getNodeID();
        FiCaSchedulerNode node = nodeTracker.getNode(nodeId);
        if (node == null) {
            LOG.error("Attempting to remove non-existent node " + nodeId);
            return;
        }
        // Remove running containers
        List<RMContainer> runningContainers = node.getCopiedListOfRunningContainers();
        for (RMContainer container : runningContainers) {
            super.completedContainer(container, SchedulerUtils.createAbnormalContainerStatus(container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL);
        }
        // Remove reservations, if any
        RMContainer reservedContainer = node.getReservedContainer();
        if (reservedContainer != null) {
            super.completedContainer(reservedContainer, SchedulerUtils.createAbnormalContainerStatus(reservedContainer.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL);
        }
        nodeTracker.removeNode(nodeId);
        Resource clusterResource = getClusterResource();
        getRootQueue().updateClusterResource(clusterResource, new ResourceLimits(clusterResource));
        int numNodes = nodeTracker.nodeCount();
        if (scheduleAsynchronously && numNodes == 0) {
            for (AsyncScheduleThread t : asyncSchedulerThreads) {
                t.suspendSchedule();
            }
        }
        LOG.info("Removed node " + nodeInfo.getNodeAddress() + " clusterResource: " + getClusterResource());
    } finally {
        writeLock.unlock();
    }
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Resource(org.apache.hadoop.yarn.api.records.Resource) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)

Aggregations

ResourceLimits (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits)52 Resource (org.apache.hadoop.yarn.api.records.Resource)48 Test (org.junit.Test)40 FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)37 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)34 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)32 Priority (org.apache.hadoop.yarn.api.records.Priority)31 NodeId (org.apache.hadoop.yarn.api.records.NodeId)29 ActiveUsersManager (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager)20 HashMap (java.util.HashMap)10 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)10 ArrayList (java.util.ArrayList)9 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)9 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)9 AMState (org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState)7 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)6 PlacementSet (org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.PlacementSet)6 SchedulerRequestKey (org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey)6 InOrder (org.mockito.InOrder)5 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)4