Search in sources :

Example 1 with Queue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue in project hadoop by apache.

the class TestFairSchedulerPlanFollower method assertReservationQueueDoesNotExist.

@Override
protected void assertReservationQueueDoesNotExist(ReservationId r) {
    Queue q = getReservationQueue(r.toString());
    assertNull(q);
}
Also used : Queue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue) FSQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue) FSLeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue)

Example 2 with Queue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue in project hadoop by apache.

the class TestLeafQueue method testApplicationQueuePercent.

@Test
public void testApplicationQueuePercent() throws Exception {
    Resource res = Resource.newInstance(10 * 1024, 10);
    CapacityScheduler scheduler = mock(CapacityScheduler.class);
    when(scheduler.getClusterResource()).thenReturn(res);
    when(scheduler.getResourceCalculator()).thenReturn(new DefaultResourceCalculator());
    ApplicationAttemptId appAttId = createAppAttemptId(0, 0);
    RMContext rmContext = mock(RMContext.class);
    when(rmContext.getEpoch()).thenReturn(3L);
    when(rmContext.getScheduler()).thenReturn(scheduler);
    when(rmContext.getRMApps()).thenReturn(new ConcurrentHashMap<ApplicationId, RMApp>());
    RMNodeLabelsManager nlm = mock(RMNodeLabelsManager.class);
    when(nlm.getResourceByLabel(any(), any())).thenReturn(res);
    when(rmContext.getNodeLabelManager()).thenReturn(nlm);
    // Queue "test" consumes 100% of the cluster, so its capacity and absolute
    // capacity are both 1.0f.
    Queue queue = createQueue("test", null, 1.0f, 1.0f);
    final String user = "user1";
    FiCaSchedulerApp app = new FiCaSchedulerApp(appAttId, user, queue, queue.getAbstractUsersManager(), rmContext);
    // Resource request
    Resource requestedResource = Resource.newInstance(1536, 2);
    app.getAppAttemptResourceUsage().incUsed(requestedResource);
    // In "test" queue, 1536 used is 15% of both the queue and the cluster
    assertEquals(15.0f, app.getResourceUsageReport().getQueueUsagePercentage(), 0.01f);
    assertEquals(15.0f, app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f);
    // Queue "test2" is a child of root and its capacity is 50% of root. As a
    // child of root, its absolute capaicty is also 50%.
    queue = createQueue("test2", null, 0.5f, 0.5f);
    app = new FiCaSchedulerApp(appAttId, user, queue, queue.getAbstractUsersManager(), rmContext);
    app.getAppAttemptResourceUsage().incUsed(requestedResource);
    // In "test2" queue, 1536 used is 30% of "test2" and 15% of the cluster.
    assertEquals(30.0f, app.getResourceUsageReport().getQueueUsagePercentage(), 0.01f);
    assertEquals(15.0f, app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f);
    // Queue "test2.1" is 50% of queue "test2", which is 50% of the cluster.
    // Therefore, "test2.1" capacity is 50% and absolute capacity is 25%.
    AbstractCSQueue qChild = createQueue("test2.1", queue, 0.5f, 0.25f);
    app = new FiCaSchedulerApp(appAttId, user, qChild, qChild.getAbstractUsersManager(), rmContext);
    app.getAppAttemptResourceUsage().incUsed(requestedResource);
    // In "test2.1" queue, 1536 used is 60% of "test2.1" and 15% of the cluster.
    assertEquals(60.0f, app.getResourceUsageReport().getQueueUsagePercentage(), 0.01f);
    assertEquals(15.0f, app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f);
}
Also used : RMContext(org.apache.hadoop.yarn.server.resourcemanager.RMContext) RMApp(org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp) Resource(org.apache.hadoop.yarn.api.records.Resource) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) DefaultResourceCalculator(org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Queue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue) RMNodeLabelsManager(org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager) Test(org.junit.Test)

Example 3 with Queue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue in project hadoop by apache.

the class AbstractSchedulerPlanFollower method synchronizePlan.

@Override
public synchronized void synchronizePlan(Plan plan, boolean shouldReplan) {
    String planQueueName = plan.getQueueName();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Running plan follower edit policy for plan: " + planQueueName);
    }
    // align with plan step
    long step = plan.getStep();
    long now = clock.getTime();
    if (now % step != 0) {
        now += step - (now % step);
    }
    Queue planQueue = getPlanQueue(planQueueName);
    if (planQueue == null)
        return;
    // first we publish to the plan the current availability of resources
    Resource clusterResources = scheduler.getClusterResource();
    Resource planResources = getPlanResources(plan, planQueue, clusterResources);
    Set<ReservationAllocation> currentReservations = plan.getReservationsAtTime(now);
    Set<String> curReservationNames = new HashSet<String>();
    Resource reservedResources = Resource.newInstance(0, 0);
    int numRes = getReservedResources(now, currentReservations, curReservationNames, reservedResources);
    // create the default reservation queue if it doesnt exist
    String defReservationId = getReservationIdFromQueueName(planQueueName) + ReservationConstants.DEFAULT_QUEUE_SUFFIX;
    String defReservationQueue = getReservationQueueName(planQueueName, defReservationId);
    createDefaultReservationQueue(planQueueName, planQueue, defReservationId);
    curReservationNames.add(defReservationId);
    // if the resources dedicated to this plan has shrunk invoke replanner
    boolean shouldResize = false;
    if (arePlanResourcesLessThanReservations(plan.getResourceCalculator(), clusterResources, planResources, reservedResources)) {
        if (shouldReplan) {
            try {
                plan.getReplanner().plan(plan, null);
            } catch (PlanningException e) {
                LOG.warn("Exception while trying to replan: {}", planQueueName, e);
            }
        } else {
            shouldResize = true;
        }
    }
    // identify the reservations that have expired and new reservations that
    // have to be activated
    List<? extends Queue> resQueues = getChildReservationQueues(planQueue);
    Set<String> expired = new HashSet<String>();
    for (Queue resQueue : resQueues) {
        String resQueueName = resQueue.getQueueName();
        String reservationId = getReservationIdFromQueueName(resQueueName);
        if (curReservationNames.contains(reservationId)) {
            // it is already existing reservation, so needed not create new
            // reservation queue
            curReservationNames.remove(reservationId);
        } else {
            // the reservation has termination, mark for cleanup
            expired.add(reservationId);
        }
    }
    // garbage collect expired reservations
    cleanupExpiredQueues(planQueueName, plan.getMoveOnExpiry(), expired, defReservationQueue);
    // Add new reservations and update existing ones
    float totalAssignedCapacity = 0f;
    if (currentReservations != null) {
        // first release all excess capacity in default queue
        try {
            setQueueEntitlement(planQueueName, defReservationQueue, 0f, 1.0f);
        } catch (YarnException e) {
            LOG.warn("Exception while trying to release default queue capacity for plan: {}", planQueueName, e);
        }
        // sort allocations from the one giving up the most resources, to the
        // one asking for the most avoid order-of-operation errors that
        // temporarily violate 100% capacity bound
        List<ReservationAllocation> sortedAllocations = sortByDelta(new ArrayList<ReservationAllocation>(currentReservations), now, plan);
        for (ReservationAllocation res : sortedAllocations) {
            String currResId = res.getReservationId().toString();
            if (curReservationNames.contains(currResId)) {
                addReservationQueue(planQueueName, planQueue, currResId);
            }
            Resource capToAssign = res.getResourcesAtTime(now);
            float targetCapacity = 0f;
            if (planResources.getMemorySize() > 0 && planResources.getVirtualCores() > 0) {
                if (shouldResize) {
                    capToAssign = calculateReservationToPlanProportion(plan.getResourceCalculator(), planResources, reservedResources, capToAssign);
                }
                targetCapacity = calculateReservationToPlanRatio(plan.getResourceCalculator(), clusterResources, planResources, capToAssign);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("Assigning capacity of {} to queue {} with target capacity {}", capToAssign, currResId, targetCapacity);
            }
            // set maxCapacity to 100% unless the job requires gang, in which
            // case we stick to capacity (as running early/before is likely a
            // waste of resources)
            float maxCapacity = 1.0f;
            if (res.containsGangs()) {
                maxCapacity = targetCapacity;
            }
            try {
                setQueueEntitlement(planQueueName, currResId, targetCapacity, maxCapacity);
            } catch (YarnException e) {
                LOG.warn("Exception while trying to size reservation for plan: {}", currResId, planQueueName, e);
            }
            totalAssignedCapacity += targetCapacity;
        }
    }
    // compute the default queue capacity
    float defQCap = 1.0f - totalAssignedCapacity;
    if (LOG.isDebugEnabled()) {
        LOG.debug("PlanFollowerEditPolicyTask: total Plan Capacity: {} " + "currReservation: {} default-queue capacity: {}", planResources, numRes, defQCap);
    }
    // set the default queue to eat-up all remaining capacity
    try {
        setQueueEntitlement(planQueueName, defReservationQueue, defQCap, 1.0f);
    } catch (YarnException e) {
        LOG.warn("Exception while trying to reclaim default queue capacity for plan: {}", planQueueName, e);
    }
    // garbage collect finished reservations from plan
    try {
        plan.archiveCompletedReservations(now);
    } catch (PlanningException e) {
        LOG.error("Exception in archiving completed reservations: ", e);
    }
    LOG.info("Finished iteration of plan follower edit policy for plan: " + planQueueName);
// Extension: update plan with app states,
// useful to support smart replanning
}
Also used : Resource(org.apache.hadoop.yarn.api.records.Resource) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) PlanningException(org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException) Queue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue) HashSet(java.util.HashSet)

Example 4 with Queue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue in project hadoop by apache.

the class TestFairSchedulerPlanFollower method assertReservationQueueExists.

@Override
protected void assertReservationQueueExists(ReservationId r) {
    Queue q = getReservationQueue(r.toString());
    assertNotNull(q);
}
Also used : Queue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue) FSQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue) FSLeafQueue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue)

Example 5 with Queue

use of org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue in project hadoop by apache.

the class TestSchedulerPlanFollowerBase method testPlanFollower.

protected void testPlanFollower(boolean isMove) throws PlanningException, InterruptedException, AccessControlException {
    // Initialize plan based on move flag
    plan = new InMemoryPlan(scheduler.getRootQueueMetrics(), policy, mAgent, scheduler.getClusterResource(), 1L, res, scheduler.getMinimumResourceCapability(), maxAlloc, "dedicated", null, isMove, context);
    // add a few reservations to the plan
    long ts = System.currentTimeMillis();
    ReservationId r1 = ReservationId.newInstance(ts, 1);
    int[] f1 = { 10, 10, 10, 10, 10 };
    ReservationDefinition rDef = ReservationSystemTestUtil.createSimpleReservationDefinition(0, 0 + f1.length + 1, f1.length);
    assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r1, rDef, "u3", "dedicated", 0, 0 + f1.length, ReservationSystemTestUtil.generateAllocation(0L, 1L, f1), res, minAlloc), false));
    ReservationId r2 = ReservationId.newInstance(ts, 2);
    assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r2, rDef, "u3", "dedicated", 3, 3 + f1.length, ReservationSystemTestUtil.generateAllocation(3L, 1L, f1), res, minAlloc), false));
    ReservationId r3 = ReservationId.newInstance(ts, 3);
    int[] f2 = { 0, 10, 20, 10, 0 };
    assertTrue(plan.toString(), plan.addReservation(new InMemoryReservationAllocation(r3, rDef, "u4", "dedicated", 10, 10 + f2.length, ReservationSystemTestUtil.generateAllocation(10L, 1L, f2), res, minAlloc), false));
    AbstractSchedulerPlanFollower planFollower = createPlanFollower();
    when(mClock.getTime()).thenReturn(0L);
    planFollower.run();
    Queue q = getReservationQueue(r1.toString());
    assertReservationQueueExists(r1);
    // submit an app to r1
    String user_0 = "test-user";
    ApplicationId appId = ApplicationId.newInstance(0, 1);
    ApplicationAttemptId appAttemptId_0 = ApplicationAttemptId.newInstance(appId, 0);
    AppAddedSchedulerEvent addAppEvent = new AppAddedSchedulerEvent(appId, q.getQueueName(), user_0);
    scheduler.handle(addAppEvent);
    AppAttemptAddedSchedulerEvent appAttemptAddedEvent = new AppAttemptAddedSchedulerEvent(appAttemptId_0, false);
    scheduler.handle(appAttemptAddedEvent);
    // initial default reservation queue should have no apps
    Queue defQ = getDefaultQueue();
    Assert.assertEquals(0, getNumberOfApplications(defQ));
    assertReservationQueueExists(r1, 0.1, 0.1);
    Assert.assertEquals(1, getNumberOfApplications(q));
    assertReservationQueueDoesNotExist(r2);
    assertReservationQueueDoesNotExist(r3);
    when(mClock.getTime()).thenReturn(3L);
    planFollower.run();
    Assert.assertEquals(0, getNumberOfApplications(defQ));
    assertReservationQueueExists(r1, 0.1, 0.1);
    Assert.assertEquals(1, getNumberOfApplications(q));
    assertReservationQueueExists(r2, 0.1, 0.1);
    assertReservationQueueDoesNotExist(r3);
    when(mClock.getTime()).thenReturn(10L);
    planFollower.run();
    q = getReservationQueue(r1.toString());
    if (isMove) {
        // app should have been moved to default reservation queue
        Assert.assertEquals(1, getNumberOfApplications(defQ));
        assertNull(q);
    } else {
        // app should be killed
        Assert.assertEquals(0, getNumberOfApplications(defQ));
        assertNotNull(q);
        AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent = new AppAttemptRemovedSchedulerEvent(appAttemptId_0, RMAppAttemptState.KILLED, false);
        scheduler.handle(appAttemptRemovedEvent);
    }
    assertReservationQueueDoesNotExist(r2);
    assertReservationQueueExists(r3, 0, 1.0);
    when(mClock.getTime()).thenReturn(11L);
    planFollower.run();
    if (isMove) {
        // app should have been moved to default reservation queue
        Assert.assertEquals(1, getNumberOfApplications(defQ));
    } else {
        // app should be killed
        Assert.assertEquals(0, getNumberOfApplications(defQ));
    }
    assertReservationQueueDoesNotExist(r1);
    assertReservationQueueDoesNotExist(r2);
    assertReservationQueueExists(r3, 0.1, 0.1);
    when(mClock.getTime()).thenReturn(12L);
    planFollower.run();
    assertReservationQueueDoesNotExist(r1);
    assertReservationQueueDoesNotExist(r2);
    assertReservationQueueExists(r3, 0.2, 0.2);
    when(mClock.getTime()).thenReturn(16L);
    planFollower.run();
    assertReservationQueueDoesNotExist(r1);
    assertReservationQueueDoesNotExist(r2);
    assertReservationQueueDoesNotExist(r3);
    verifyCapacity(defQ);
}
Also used : AppAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent) ReservationDefinition(org.apache.hadoop.yarn.api.records.ReservationDefinition) ApplicationAttemptId(org.apache.hadoop.yarn.api.records.ApplicationAttemptId) ReservationId(org.apache.hadoop.yarn.api.records.ReservationId) AppAttemptRemovedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent) AppAttemptAddedSchedulerEvent(org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId) Queue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue)

Aggregations

Queue (org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue)5 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)2 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)2 Resource (org.apache.hadoop.yarn.api.records.Resource)2 FSLeafQueue (org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue)2 FSQueue (org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue)2 HashSet (java.util.HashSet)1 ReservationDefinition (org.apache.hadoop.yarn.api.records.ReservationDefinition)1 ReservationId (org.apache.hadoop.yarn.api.records.ReservationId)1 YarnException (org.apache.hadoop.yarn.exceptions.YarnException)1 RMContext (org.apache.hadoop.yarn.server.resourcemanager.RMContext)1 RMNodeLabelsManager (org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager)1 PlanningException (org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException)1 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)1 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)1 AppAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent)1 AppAttemptAddedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent)1 AppAttemptRemovedSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent)1 DefaultResourceCalculator (org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator)1 Test (org.junit.Test)1