Search in sources :

Example 6 with User

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User in project hadoop by apache.

the class TestLeafQueue method testDRFUserLimits.

@Test
public void testDRFUserLimits() throws Exception {
    setUpWithDominantResourceCalculator();
    // Mock the queue
    LeafQueue b = stubLeafQueue((LeafQueue) queues.get(B));
    // unset maxCapacity
    b.setMaxCapacity(1.0f);
    // Users
    final String user0 = "user_0";
    final String user1 = "user_1";
    // Submit applications
    final ApplicationAttemptId appAttemptId0 = TestUtils.getMockApplicationAttemptId(0, 0);
    FiCaSchedulerApp app0 = new FiCaSchedulerApp(appAttemptId0, user0, b, b.getAbstractUsersManager(), spyRMContext);
    b.submitApplicationAttempt(app0, user0);
    final ApplicationAttemptId appAttemptId2 = TestUtils.getMockApplicationAttemptId(2, 0);
    FiCaSchedulerApp app2 = new FiCaSchedulerApp(appAttemptId2, user1, b, b.getAbstractUsersManager(), spyRMContext);
    b.submitApplicationAttempt(app2, user1);
    // Setup some nodes
    String host0 = "127.0.0.1";
    FiCaSchedulerNode node0 = TestUtils.getMockNode(host0, DEFAULT_RACK, 0, 8 * GB, 100);
    String host1 = "127.0.0.2";
    FiCaSchedulerNode node1 = TestUtils.getMockNode(host1, DEFAULT_RACK, 0, 8 * GB, 100);
    Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node0.getNodeID(), node0, node1.getNodeID(), node1);
    Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(app0.getApplicationAttemptId(), app0, app2.getApplicationAttemptId(), app2);
    int numNodes = 2;
    Resource clusterResource = Resources.createResource(numNodes * (8 * GB), numNodes * 100);
    when(csContext.getNumClusterNodes()).thenReturn(numNodes);
    when(csContext.getClusterResource()).thenReturn(clusterResource);
    // Setup resource-requests so that one application is memory dominant
    // and other application is vcores dominant
    Priority priority = TestUtils.createMockPriority(1);
    app0.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 1 * GB, 40, 10, true, priority, recordFactory, RMNodeLabelsManager.NO_LABEL)));
    app2.updateResourceRequests(Collections.singletonList(TestUtils.createResourceRequest(ResourceRequest.ANY, 2 * GB, 10, 10, true, priority, recordFactory, RMNodeLabelsManager.NO_LABEL)));
    /**
     * Start testing...
     */
    // Set user-limit
    b.setUserLimit(50);
    b.setUserLimitFactor(2);
    User queueUser0 = b.getUser(user0);
    User queueUser1 = b.getUser(user1);
    assertEquals("There should 2 active users!", 2, b.getAbstractUsersManager().getNumActiveUsers());
    // Fill both Nodes as far as we can
    CSAssignment assign;
    do {
        assign = b.assignContainers(clusterResource, node0, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
        LOG.info(assign.toString());
        applyCSAssignment(clusterResource, assign, b, nodes, apps);
    } while (assign.getResource().getMemorySize() > 0 && assign.getAssignmentInformation().getNumReservations() == 0);
    do {
        assign = b.assignContainers(clusterResource, node1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
        applyCSAssignment(clusterResource, assign, b, nodes, apps);
    } while (assign.getResource().getMemorySize() > 0 && assign.getAssignmentInformation().getNumReservations() == 0);
    //LOG.info("user_0: " + queueUser0.getUsed());
    //LOG.info("user_1: " + queueUser1.getUsed());
    assertTrue("Verify user_0 got resources ", queueUser0.getUsed().getMemorySize() > 0);
    assertTrue("Verify user_1 got resources ", queueUser1.getUsed().getMemorySize() > 0);
    assertTrue("Expected AbsoluteUsedCapacity > 0.95, got: " + b.getAbsoluteUsedCapacity(), b.getAbsoluteUsedCapacity() > 0.95);
    // Verify consumedRatio is based on dominant resources
    float expectedRatio = queueUser0.getUsed().getVirtualCores() / (numNodes * 100.0f) + queueUser1.getUsed().getMemorySize() / (numNodes * 8.0f * GB);
    assertEquals(expectedRatio, b.getUsersManager().getUsageRatio(""), 0.001);
    // Add another node and make sure consumedRatio is adjusted
    // accordingly.
    numNodes = 3;
    clusterResource = Resources.createResource(numNodes * (8 * GB), numNodes * 100);
    when(csContext.getNumClusterNodes()).thenReturn(numNodes);
    root.updateClusterResource(clusterResource, new ResourceLimits(clusterResource));
    expectedRatio = queueUser0.getUsed().getVirtualCores() / (numNodes * 100.0f) + queueUser1.getUsed().getMemorySize() / (numNodes * 8.0f * GB);
    assertEquals(expectedRatio, b.getUsersManager().getUsageRatio(""), 0.001);
}
Also used : User(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User) FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) Priority(org.apache.hadoop.yarn.api.records.Priority) Resource(org.apache.hadoop.yarn.api.records.Resource) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) NodeId(org.apache.hadoop.yarn.api.records.NodeId) Test(org.junit.Test)

Example 7 with User

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User in project hadoop by apache.

the class LeafQueue method removeApplicationAttempt.

private void removeApplicationAttempt(FiCaSchedulerApp application, String userName) {
    try {
        writeLock.lock();
        // TODO, should use getUser, use this method just to avoid UT failure
        // which is caused by wrong invoking order, will fix UT separately
        User user = usersManager.getUserAndAddIfAbsent(userName);
        String partitionName = application.getAppAMNodePartitionName();
        boolean wasActive = orderingPolicy.removeSchedulableEntity(application);
        if (!wasActive) {
            pendingOrderingPolicy.removeSchedulableEntity(application);
        } else {
            queueUsage.decAMUsed(partitionName, application.getAMResource(partitionName));
            user.getResourceUsage().decAMUsed(partitionName, application.getAMResource(partitionName));
            metrics.decAMUsed(application.getUser(), application.getAMResource(partitionName));
        }
        applicationAttemptMap.remove(application.getApplicationAttemptId());
        user.finishApplication(wasActive);
        if (user.getTotalApplications() == 0) {
            usersManager.removeUser(application.getUser());
        }
        // Check if we can activate more applications
        activateApplications();
        LOG.info("Application removed -" + " appId: " + application.getApplicationId() + " user: " + application.getUser() + " queue: " + getQueueName() + " #user-pending-applications: " + user.getPendingApplications() + " #user-active-applications: " + user.getActiveApplications() + " #queue-pending-applications: " + getNumPendingApplications() + " #queue-active-applications: " + getNumActiveApplications());
    } finally {
        writeLock.unlock();
    }
}
Also used : User(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User)

Example 8 with User

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User in project hadoop by apache.

the class LeafQueue method canAssignToUser.

@Private
protected boolean canAssignToUser(Resource clusterResource, String userName, Resource limit, FiCaSchedulerApp application, String nodePartition, ResourceLimits currentResourceLimits) {
    try {
        readLock.lock();
        User user = getUser(userName);
        currentResourceLimits.setAmountNeededUnreserve(Resources.none());
        // overhead of the AM, but it's a > check, not a >= check, so...
        if (Resources.greaterThan(resourceCalculator, clusterResource, user.getUsed(nodePartition), limit)) {
            // of a reserved node if the application has reserved containers
            if (this.reservationsContinueLooking && nodePartition.equals(CommonNodeLabelsManager.NO_LABEL)) {
                if (Resources.lessThanOrEqual(resourceCalculator, clusterResource, Resources.subtract(user.getUsed(), application.getCurrentReservation()), limit)) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("User " + userName + " in queue " + getQueueName() + " will exceed limit based on reservations - " + " consumed: " + user.getUsed() + " reserved: " + application.getCurrentReservation() + " limit: " + limit);
                    }
                    Resource amountNeededToUnreserve = Resources.subtract(user.getUsed(nodePartition), limit);
                    // we can only acquire a new container if we unreserve first to
                    // respect user-limit
                    currentResourceLimits.setAmountNeededUnreserve(amountNeededToUnreserve);
                    return true;
                }
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("User " + userName + " in queue " + getQueueName() + " will exceed limit - " + " consumed: " + user.getUsed(nodePartition) + " limit: " + limit);
            }
            return false;
        }
        return true;
    } finally {
        readLock.unlock();
    }
}
Also used : User(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User) Resource(org.apache.hadoop.yarn.api.records.Resource) Private(org.apache.hadoop.classification.InterfaceAudience.Private)

Example 9 with User

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User in project hadoop by apache.

the class LeafQueue method validateSubmitApplication.

public void validateSubmitApplication(ApplicationId applicationId, String userName, String queue) throws AccessControlException {
    try {
        writeLock.lock();
        // Check if the queue is accepting jobs
        if (getState() != QueueState.RUNNING) {
            String msg = "Queue " + getQueuePath() + " is STOPPED. Cannot accept submission of application: " + applicationId;
            LOG.info(msg);
            throw new AccessControlException(msg);
        }
        // Check submission limits for queues
        if (getNumApplications() >= getMaxApplications()) {
            String msg = "Queue " + getQueuePath() + " already has " + getNumApplications() + " applications," + " cannot accept submission of application: " + applicationId;
            LOG.info(msg);
            throw new AccessControlException(msg);
        }
        // Check submission limits for the user on this queue
        User user = usersManager.getUserAndAddIfAbsent(userName);
        if (user.getTotalApplications() >= getMaxApplicationsPerUser()) {
            String msg = "Queue " + getQueuePath() + " already has " + user.getTotalApplications() + " applications from user " + userName + " cannot accept submission of application: " + applicationId;
            LOG.info(msg);
            throw new AccessControlException(msg);
        }
    } finally {
        writeLock.unlock();
    }
    try {
        getParent().validateSubmitApplication(applicationId, userName, queue);
    } catch (AccessControlException ace) {
        LOG.info("Failed to submit application to parent-queue: " + getParent().getQueuePath(), ace);
        throw ace;
    }
}
Also used : User(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User) AccessControlException(org.apache.hadoop.security.AccessControlException)

Example 10 with User

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User in project hadoop by apache.

the class LeafQueue method getNumPendingApplications.

@Private
public int getNumPendingApplications(String user) {
    try {
        readLock.lock();
        User u = getUser(user);
        if (null == u) {
            return 0;
        }
        return u.getPendingApplications();
    } finally {
        readLock.unlock();
    }
}
Also used : User(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User) Private(org.apache.hadoop.classification.InterfaceAudience.Private)

Aggregations

User (org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UsersManager.User)12 Resource (org.apache.hadoop.yarn.api.records.Resource)5 Private (org.apache.hadoop.classification.InterfaceAudience.Private)3 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)3 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)2 AccessControlException (org.apache.hadoop.security.AccessControlException)1 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)1 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)1 NodeId (org.apache.hadoop.yarn.api.records.NodeId)1 Priority (org.apache.hadoop.yarn.api.records.Priority)1 ResourceLimits (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits)1 FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)1 Lock (org.apache.hadoop.yarn.server.utils.Lock)1 NoLock (org.apache.hadoop.yarn.server.utils.Lock.NoLock)1 Test (org.junit.Test)1