Search in sources :

Example 51 with ResourceLimits

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.

the class TestLeafQueue method testLocalityConstraints.

@Test
public void testLocalityConstraints() throws Exception {
    // Manipulate queue 'a'
    LeafQueue a = stubLeafQueue((LeafQueue) queues.get(A));
    // User
    String user_0 = "user_0";
    // Submit applications
    final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0);
    FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
    a.submitApplicationAttempt(app_0, user_0);
    final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0);
    FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext);
    a.submitApplicationAttempt(app_1, user_0);
    Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app_0.getApplicationAttemptId(), app_0, app_1.getApplicationAttemptId(), app_1);
    // Setup some nodes and racks
    String host_0_0 = "127.0.0.1";
    String rack_0 = "rack_0";
    String host_0_1 = "127.0.0.2";
    FiCaSchedulerNode node_0_1 = TestUtils.getMockNode(host_0_1, rack_0, 0, 8 * GB);
    String host_1_0 = "127.0.0.3";
    String rack_1 = "rack_1";
    FiCaSchedulerNode node_1_0 = TestUtils.getMockNode(host_1_0, rack_1, 0, 8 * GB);
    String host_1_1 = "127.0.0.4";
    FiCaSchedulerNode node_1_1 = TestUtils.getMockNode(host_1_1, rack_1, 0, 8 * GB);
    Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node_0_1.getNodeID(), node_0_1, node_1_0.getNodeID(), node_1_0, node_1_1.getNodeID(), node_1_1);
    final int numNodes = 4;
    Resource clusterResource = Resources.createResource(numNodes * (8 * GB), numNodes * 1);
    when(csContext.getNumClusterNodes()).thenReturn(numNodes);
    // Setup resource-requests
    // resourceName: <priority, memory, #containers, relaxLocality>
    // host_0_0: < 1, 1GB, 1, true >
    // host_0_1: < null >
    // rack_0:   < null >                     <----
    // host_1_0: < 1, 1GB, 1, true >
    // host_1_1: < null >
    // rack_1:   < 1, 1GB, 1, false >         <----
    // ANY:      < 1, 1GB, 1, false >         <----
    // Availability:
    // host_0_0: 8G
    // host_0_1: 8G
    // host_1_0: 8G
    // host_1_1: 8G
    // Blacklist: <host_0_0>
    Priority priority = TestUtils.createMockPriority(1);
    SchedulerRequestKey schedulerKey = toSchedulerKey(priority);
    List<ResourceRequest> app_0_requests_0 = new ArrayList<ResourceRequest>();
    app_0_requests_0.add(TestUtils.createResourceRequest(host_0_0, 1 * GB, 1, true, priority, recordFactory));
    app_0_requests_0.add(TestUtils.createResourceRequest(host_1_0, 1 * GB, 1, true, priority, recordFactory));
    app_0_requests_0.add(TestUtils.createResourceRequest(rack_1, 1 * GB, 1, false, priority, recordFactory));
    app_0_requests_0.add(// only one
    TestUtils.createResourceRequest(// only one
    ResourceRequest.ANY, // only one
    1 * GB, // only one
    1, false, priority, recordFactory));
    app_0.updateResourceRequests(app_0_requests_0);
    app_0.updateBlacklist(Collections.singletonList(host_0_0), null);
    app_0_requests_0.clear();
    //
    // Start testing...
    //
    // node_0_1  
    // Shouldn't allocate since RR(rack_0) = null && RR(ANY) = relax: false
    CSAssignment assignment = a.assignContainers(clusterResource, node_0_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
    applyCSAssignment(clusterResource, assignment, a, nodes, apps);
    verifyNoContainerAllocated(assignment);
    // should be 0
    assertEquals(0, app_0.getSchedulingOpportunities(schedulerKey));
    // resourceName: <priority, memory, #containers, relaxLocality>
    // host_0_0: < 1, 1GB, 1, true >
    // host_0_1: < null >
    // rack_0:   < null >                     <----
    // host_1_0: < 1, 1GB, 1, true >
    // host_1_1: < null >
    // rack_1:   < 1, 1GB, 1, false >         <----
    // ANY:      < 1, 1GB, 1, false >         <----
    // Availability:
    // host_0_0: 8G
    // host_0_1: 8G
    // host_1_0: 8G
    // host_1_1: 8G
    // Blacklist: <host_0_0>
    // node_1_1  
    // Shouldn't allocate since RR(rack_1) = relax: false
    assignment = a.assignContainers(clusterResource, node_1_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
    applyCSAssignment(clusterResource, assignment, a, nodes, apps);
    verifyNoContainerAllocated(assignment);
    // should be 0
    assertEquals(0, app_0.getSchedulingOpportunities(schedulerKey));
    // Allow rack-locality for rack_1, but blacklist node_1_1
    app_0_requests_0.add(TestUtils.createResourceRequest(rack_1, 1 * GB, 1, true, priority, recordFactory));
    app_0.updateResourceRequests(app_0_requests_0);
    app_0.updateBlacklist(Collections.singletonList(host_1_1), null);
    app_0_requests_0.clear();
    // resourceName: <priority, memory, #containers, relaxLocality>
    // host_0_0: < 1, 1GB, 1, true >
    // host_0_1: < null >
    // rack_0:   < null >                     
    // host_1_0: < 1, 1GB, 1, true >
    // host_1_1: < null >
    // rack_1:   < 1, 1GB, 1, true >         
    // ANY:      < 1, 1GB, 1, false >         
    // Availability:
    // host_0_0: 8G
    // host_0_1: 8G
    // host_1_0: 8G
    // host_1_1: 8G
    // Blacklist: < host_0_0 , host_1_1 >       <----
    // node_1_1  
    // Shouldn't allocate since node_1_1 is blacklisted
    assignment = a.assignContainers(clusterResource, node_1_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
    applyCSAssignment(clusterResource, assignment, a, nodes, apps);
    verifyNoContainerAllocated(assignment);
    // should be 0
    assertEquals(0, app_0.getSchedulingOpportunities(schedulerKey));
    // Now, remove node_1_1 from blacklist, but add rack_1 to blacklist
    app_0.updateResourceRequests(app_0_requests_0);
    app_0.updateBlacklist(Collections.singletonList(rack_1), Collections.singletonList(host_1_1));
    app_0_requests_0.clear();
    // resourceName: <priority, memory, #containers, relaxLocality>
    // host_0_0: < 1, 1GB, 1, true >
    // host_0_1: < null >
    // rack_0:   < null >                     
    // host_1_0: < 1, 1GB, 1, true >
    // host_1_1: < null >
    // rack_1:   < 1, 1GB, 1, true >         
    // ANY:      < 1, 1GB, 1, false >         
    // Availability:
    // host_0_0: 8G
    // host_0_1: 8G
    // host_1_0: 8G
    // host_1_1: 8G
    // Blacklist: < host_0_0 , rack_1 >       <----
    // node_1_1  
    // Shouldn't allocate since rack_1 is blacklisted
    assignment = a.assignContainers(clusterResource, node_1_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
    applyCSAssignment(clusterResource, assignment, a, nodes, apps);
    verifyNoContainerAllocated(assignment);
    // should be 0
    assertEquals(0, app_0.getSchedulingOpportunities(schedulerKey));
    // Now remove rack_1 from blacklist
    app_0.updateResourceRequests(app_0_requests_0);
    app_0.updateBlacklist(null, Collections.singletonList(rack_1));
    app_0_requests_0.clear();
    // resourceName: <priority, memory, #containers, relaxLocality>
    // host_0_0: < 1, 1GB, 1, true >
    // host_0_1: < null >
    // rack_0:   < null >                     
    // host_1_0: < 1, 1GB, 1, true >
    // host_1_1: < null >
    // rack_1:   < 1, 1GB, 1, true >         
    // ANY:      < 1, 1GB, 1, false >         
    // Availability:
    // host_0_0: 8G
    // host_0_1: 8G
    // host_1_0: 8G
    // host_1_1: 8G
    // Blacklist: < host_0_0 >       <----
    // Now, should allocate since RR(rack_1) = relax: true
    assignment = a.assignContainers(clusterResource, node_1_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
    applyCSAssignment(clusterResource, assignment, a, nodes, apps);
    verifyNoContainerAllocated(assignment);
    assertEquals(0, app_0.getSchedulingOpportunities(schedulerKey));
    assertEquals(1, app_0.getOutstandingAsksCount(schedulerKey));
    // Now sanity-check node_local
    app_0_requests_0.add(TestUtils.createResourceRequest(rack_1, 1 * GB, 1, false, priority, recordFactory));
    app_0_requests_0.add(// only one
    TestUtils.createResourceRequest(// only one
    ResourceRequest.ANY, // only one
    1 * GB, // only one
    1, false, priority, recordFactory));
    app_0.updateResourceRequests(app_0_requests_0);
    app_0_requests_0.clear();
    // resourceName: <priority, memory, #containers, relaxLocality>
    // host_0_0: < 1, 1GB, 1, true >
    // host_0_1: < null >
    // rack_0:   < null >                     
    // host_1_0: < 1, 1GB, 1, true >
    // host_1_1: < null >
    // rack_1:   < 1, 1GB, 1, false >          <----
    // ANY:      < 1, 1GB, 1, false >          <----
    // Availability:
    // host_0_0: 8G
    // host_0_1: 8G
    // host_1_0: 8G
    // host_1_1: 7G
    assignment = a.assignContainers(clusterResource, node_1_0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
    applyCSAssignment(clusterResource, assignment, a, nodes, apps);
    verifyContainerAllocated(assignment, NodeType.NODE_LOCAL);
    assertEquals(0, app_0.getSchedulingOpportunities(schedulerKey));
    assertEquals(0, app_0.getOutstandingAsksCount(schedulerKey));
}
Also used : FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) Priority(org.apache.hadoop.yarn.api.records.Priority) Resource(org.apache.hadoop.yarn.api.records.Resource) ArrayList(java.util.ArrayList) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) SchedulerRequestKey(org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ActiveUsersManager(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) Test(org.junit.Test)

Example 52 with ResourceLimits

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits in project hadoop by apache.

the class LeafQueue method setupQueueConfigs.

protected void setupQueueConfigs(Resource clusterResource) throws IOException {
    try {
        writeLock.lock();
        super.setupQueueConfigs(clusterResource);
        this.lastClusterResource = clusterResource;
        this.cachedResourceLimitsForHeadroom = new ResourceLimits(clusterResource);
        // Initialize headroom info, also used for calculating application
        // master resource limits.  Since this happens during queue initialization
        // and all queues may not be realized yet, we'll use (optimistic)
        // absoluteMaxCapacity (it will be replaced with the more accurate
        // absoluteMaxAvailCapacity during headroom/userlimit/allocation events)
        setQueueResourceLimitsInfo(clusterResource);
        CapacitySchedulerConfiguration conf = csContext.getConfiguration();
        setOrderingPolicy(conf.<FiCaSchedulerApp>getAppOrderingPolicy(getQueuePath()));
        usersManager.setUserLimit(conf.getUserLimit(getQueuePath()));
        usersManager.setUserLimitFactor(conf.getUserLimitFactor(getQueuePath()));
        maxApplications = conf.getMaximumApplicationsPerQueue(getQueuePath());
        if (maxApplications < 0) {
            int maxGlobalPerQueueApps = conf.getGlobalMaximumApplicationsPerQueue();
            if (maxGlobalPerQueueApps > 0) {
                maxApplications = maxGlobalPerQueueApps;
            } else {
                int maxSystemApps = conf.getMaximumSystemApplications();
                maxApplications = (int) (maxSystemApps * queueCapacities.getAbsoluteCapacity());
            }
        }
        maxApplicationsPerUser = Math.min(maxApplications, (int) (maxApplications * (usersManager.getUserLimit() / 100.0f) * usersManager.getUserLimitFactor()));
        maxAMResourcePerQueuePercent = conf.getMaximumApplicationMasterResourcePerQueuePercent(getQueuePath());
        priorityAcls = conf.getPriorityAcls(getQueuePath(), scheduler.getMaxClusterLevelAppPriority());
        if (!SchedulerUtils.checkQueueLabelExpression(this.accessibleLabels, this.defaultLabelExpression, null)) {
            throw new IOException("Invalid default label expression of " + " queue=" + getQueueName() + " doesn't have permission to access all labels " + "in default label expression. labelExpression of resource request=" + (this.defaultLabelExpression == null ? "" : this.defaultLabelExpression) + ". Queue labels=" + (getAccessibleNodeLabels() == null ? "" : StringUtils.join(getAccessibleNodeLabels().iterator(), ',')));
        }
        nodeLocalityDelay = conf.getNodeLocalityDelay();
        rackLocalityFullReset = conf.getRackLocalityFullReset();
        // re-init this since max allocation could have changed
        this.minimumAllocationFactor = Resources.ratio(resourceCalculator, Resources.subtract(maximumAllocation, minimumAllocation), maximumAllocation);
        StringBuilder aclsString = new StringBuilder();
        for (Map.Entry<AccessType, AccessControlList> e : acls.entrySet()) {
            aclsString.append(e.getKey() + ":" + e.getValue().getAclString());
        }
        StringBuilder labelStrBuilder = new StringBuilder();
        if (accessibleLabels != null) {
            for (String s : accessibleLabels) {
                labelStrBuilder.append(s);
                labelStrBuilder.append(",");
            }
        }
        defaultAppPriorityPerQueue = Priority.newInstance(conf.getDefaultApplicationPriorityConfPerQueue(getQueuePath()));
        LOG.info("Initializing " + queueName + "\n" + "capacity = " + queueCapacities.getCapacity() + " [= (float) configuredCapacity / 100 ]" + "\n" + "absoluteCapacity = " + queueCapacities.getAbsoluteCapacity() + " [= parentAbsoluteCapacity * capacity ]" + "\n" + "maxCapacity = " + queueCapacities.getMaximumCapacity() + " [= configuredMaxCapacity ]" + "\n" + "absoluteMaxCapacity = " + queueCapacities.getAbsoluteMaximumCapacity() + " [= 1.0 maximumCapacity undefined, " + "(parentAbsoluteMaxCapacity * maximumCapacity) / 100 otherwise ]" + "\n" + "userLimit = " + usersManager.getUserLimit() + " [= configuredUserLimit ]" + "\n" + "userLimitFactor = " + usersManager.getUserLimitFactor() + " [= configuredUserLimitFactor ]" + "\n" + "maxApplications = " + maxApplications + " [= configuredMaximumSystemApplicationsPerQueue or" + " (int)(configuredMaximumSystemApplications * absoluteCapacity)]" + "\n" + "maxApplicationsPerUser = " + maxApplicationsPerUser + " [= (int)(maxApplications * (userLimit / 100.0f) * " + "userLimitFactor) ]" + "\n" + "usedCapacity = " + queueCapacities.getUsedCapacity() + " [= usedResourcesMemory / " + "(clusterResourceMemory * absoluteCapacity)]" + "\n" + "absoluteUsedCapacity = " + absoluteUsedCapacity + " [= usedResourcesMemory / clusterResourceMemory]" + "\n" + "maxAMResourcePerQueuePercent = " + maxAMResourcePerQueuePercent + " [= configuredMaximumAMResourcePercent ]" + "\n" + "minimumAllocationFactor = " + minimumAllocationFactor + " [= (float)(maximumAllocationMemory - minimumAllocationMemory) / " + "maximumAllocationMemory ]" + "\n" + "maximumAllocation = " + maximumAllocation + " [= configuredMaxAllocation ]" + "\n" + "numContainers = " + numContainers + " [= currentNumContainers ]" + "\n" + "state = " + getState() + " [= configuredState ]" + "\n" + "acls = " + aclsString + " [= configuredAcls ]" + "\n" + "nodeLocalityDelay = " + nodeLocalityDelay + "\n" + "labels=" + labelStrBuilder.toString() + "\n" + "reservationsContinueLooking = " + reservationsContinueLooking + "\n" + "preemptionDisabled = " + getPreemptionDisabled() + "\n" + "defaultAppPriorityPerQueue = " + defaultAppPriorityPerQueue + "\npriority = " + priority);
    } finally {
        writeLock.unlock();
    }
}
Also used : AccessControlList(org.apache.hadoop.security.authorize.AccessControlList) ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) IOException(java.io.IOException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) AccessType(org.apache.hadoop.yarn.security.AccessType)

Aggregations

ResourceLimits (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits)52 Resource (org.apache.hadoop.yarn.api.records.Resource)48 Test (org.junit.Test)40 FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)37 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)34 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)32 Priority (org.apache.hadoop.yarn.api.records.Priority)31 NodeId (org.apache.hadoop.yarn.api.records.NodeId)29 ActiveUsersManager (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager)20 HashMap (java.util.HashMap)10 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)10 ArrayList (java.util.ArrayList)9 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)9 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)9 AMState (org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState)7 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)6 PlacementSet (org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.PlacementSet)6 SchedulerRequestKey (org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey)6 InOrder (org.mockito.InOrder)5 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)4