Example 1 with RLESparseResourceAllocation

use of org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation in project hadoop by apache.

the class StageAllocatorGreedy method computeStageAllocation.

public Map<ReservationInterval, Resource> computeStageAllocation(Plan plan, Map<Long, Resource> planLoads, RLESparseResourceAllocation planModifications, ReservationRequest rr, long stageEarliestStart, long stageDeadline, String user, ReservationId oldId) throws PlanningException {
    Resource totalCapacity = plan.getTotalCapacity();
    Map<ReservationInterval, Resource> allocationRequests = new HashMap<ReservationInterval, Resource>();
    // compute the gang as a resource and get the duration
    Resource gang = Resources.multiply(rr.getCapability(), rr.getConcurrency());
    long dur = rr.getDuration();
    long step = plan.getStep();
    // ceil the duration to the next multiple of the plan step
    if (dur % step != 0) {
        dur += (step - (dur % step));
    // we know for sure that this division has no remainder (part of contract
    // with user, validate before
    int gangsToPlace = rr.getNumContainers() / rr.getConcurrency();
    int maxGang = 0;
    RLESparseResourceAllocation netAvailable = plan.getAvailableResourceOverTime(user, oldId, stageEarliestStart, stageDeadline);
    netAvailable = RLESparseResourceAllocation.merge(plan.getResourceCalculator(), plan.getTotalCapacity(), netAvailable, planModifications, RLEOperator.subtract, stageEarliestStart, stageDeadline);
    // an invalid range of times
    while (gangsToPlace > 0 && stageDeadline - dur >= stageEarliestStart) {
        // as we run along we remember how many gangs we can fit, and what
        // was the most constraining moment in time (we will restart just
        // after that to place the next batch)
        maxGang = gangsToPlace;
        long minPoint = stageDeadline;
        int curMaxGang = maxGang;
        // move backward
        for (long t = stageDeadline - plan.getStep(); t >= stageDeadline - dur && maxGang > 0; t = t - plan.getStep()) {
            Resource netAvailableRes = netAvailable.getCapacityAtTime(t);
            // compute maximum number of gangs we could fit
            curMaxGang = (int) Math.floor(Resources.divide(plan.getResourceCalculator(), totalCapacity, netAvailableRes, gang));
            // pick the minimum between available resources in this instant, and how
            // many gangs we have to place
            curMaxGang = Math.min(gangsToPlace, curMaxGang);
            // the minimum (useful for next attempts)
            if (curMaxGang <= maxGang) {
                maxGang = curMaxGang;
                minPoint = t;
        // gangsToPlace
        if (maxGang > 0) {
            gangsToPlace -= maxGang;
            ReservationInterval reservationInt = new ReservationInterval(stageDeadline - dur, stageDeadline);
            Resource reservationRes = Resources.multiply(rr.getCapability(), rr.getConcurrency() * maxGang);
            // remember occupied space (plan is read-only till we find a plausible
            // allocation for the entire request). This is needed since we might be
            // placing other ReservationRequest within the same
            // ReservationDefinition,
            // and we must avoid double-counting the available resources
            planModifications.addInterval(reservationInt, reservationRes);
            allocationRequests.put(reservationInt, reservationRes);
        // reset our new starting point (curDeadline) to the most constraining
        // point so far, we will look "left" of that to find more places where
        // to schedule gangs (for sure nothing on the "right" of this point can
        // fit a full gang.
        stageDeadline = minPoint;
    // if no gangs are left to place we succeed and return the allocation
    if (gangsToPlace == 0) {
        return allocationRequests;
    } else {
        // for ANY).
        for (Map.Entry<ReservationInterval, Resource> tempAllocation : allocationRequests.entrySet()) {
            planModifications.removeInterval(tempAllocation.getKey(), tempAllocation.getValue());
        // and return null to signal failure in this allocation
        return null;
Also used : RLESparseResourceAllocation(org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation) HashMap(java.util.HashMap) Resource(org.apache.hadoop.yarn.api.records.Resource) ReservationInterval(org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationInterval) Map(java.util.Map) HashMap(java.util.HashMap)

Example 2 with RLESparseResourceAllocation

use of org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation in project hadoop by apache.

the class StageAllocatorGreedyRLE method computeStageAllocation.

public Map<ReservationInterval, Resource> computeStageAllocation(Plan plan, Map<Long, Resource> planLoads, RLESparseResourceAllocation planModifications, ReservationRequest rr, long stageEarliestStart, long stageDeadline, String user, ReservationId oldId) throws PlanningException {
    // abort early if the interval is not satisfiable
    if (stageEarliestStart + rr.getDuration() > stageDeadline) {
        return null;
    Map<ReservationInterval, Resource> allocationRequests = new HashMap<ReservationInterval, Resource>();
    Resource totalCapacity = plan.getTotalCapacity();
    // compute the gang as a resource and get the duration
    Resource sizeOfGang = Resources.multiply(rr.getCapability(), rr.getConcurrency());
    long dur = rr.getDuration();
    long step = plan.getStep();
    // ceil the duration to the next multiple of the plan step
    if (dur % step != 0) {
        dur += (step - (dur % step));
    // we know for sure that this division has no remainder (part of contract
    // with user, validate before
    int gangsToPlace = rr.getNumContainers() / rr.getConcurrency();
    // get available resources from plan
    RLESparseResourceAllocation netRLERes = plan.getAvailableResourceOverTime(user, oldId, stageEarliestStart, stageDeadline);
    // remove plan modifications
    netRLERes = RLESparseResourceAllocation.merge(plan.getResourceCalculator(), totalCapacity, netRLERes, planModifications, RLEOperator.subtract, stageEarliestStart, stageDeadline);
    // an invalid range of times
    while (gangsToPlace > 0 && stageEarliestStart + dur <= stageDeadline) {
        // as we run along we remember how many gangs we can fit, and what
        // was the most constraining moment in time (we will restart just
        // after that to place the next batch)
        int maxGang = gangsToPlace;
        long minPoint = -1;
        // focus our attention to a time-range under consideration
        NavigableMap<Long, Resource> partialMap = netRLERes.getRangeOverlapping(stageEarliestStart, stageDeadline).getCumulative();
        // revert the map for right-to-left allocation
        if (!allocateLeft) {
            partialMap = partialMap.descendingMap();
        Iterator<Entry<Long, Resource>> netIt = partialMap.entrySet().iterator();
        long oldT = stageDeadline;
        // interval (with outside loop)
        while (maxGang > 0 && netIt.hasNext()) {
            long t;
            Resource curAvailRes;
            Entry<Long, Resource> e =;
            if (allocateLeft) {
                t = Math.max(e.getKey(), stageEarliestStart);
                curAvailRes = e.getValue();
            } else {
                t = oldT;
                oldT = e.getKey();
                //attention: higher means lower, because we reversed the map direction
                curAvailRes = partialMap.higherEntry(t).getValue();
            // check exit/skip conditions/
            if (curAvailRes == null) {
                //skip undefined regions (should not happen beside borders)
            if (exitCondition(t, stageEarliestStart, stageDeadline, dur)) {
            // compute maximum number of gangs we could fit
            int curMaxGang = (int) Math.floor(Resources.divide(plan.getResourceCalculator(), totalCapacity, curAvailRes, sizeOfGang));
            curMaxGang = Math.min(gangsToPlace, curMaxGang);
            // the minimum (useful for next attempts)
            if (curMaxGang <= maxGang) {
                maxGang = curMaxGang;
                minPoint = t;
        // update data structures that retain the progress made so far
        gangsToPlace = trackProgress(planModifications, rr, stageEarliestStart, stageDeadline, allocationRequests, dur, gangsToPlace, maxGang);
        // reset the next range of time-intervals to deal with
        if (allocateLeft) {
            // end of this allocation
            if (partialMap.higherKey(minPoint) == null) {
                stageEarliestStart = stageEarliestStart + dur;
            } else {
                stageEarliestStart = Math.min(partialMap.higherKey(minPoint), stageEarliestStart + dur);
        } else {
            // same as above moving right-to-left
            if (partialMap.higherKey(minPoint) == null) {
                stageDeadline = stageDeadline - dur;
            } else {
                stageDeadline = Math.max(partialMap.higherKey(minPoint), stageDeadline - dur);
    // if no gangs are left to place we succeed and return the allocation
    if (gangsToPlace == 0) {
        return allocationRequests;
    } else {
        // for ANY).
        for (Map.Entry<ReservationInterval, Resource> tempAllocation : allocationRequests.entrySet()) {
            planModifications.removeInterval(tempAllocation.getKey(), tempAllocation.getValue());
        // and return null to signal failure in this allocation
        return null;
Also used : HashMap(java.util.HashMap) Resource(org.apache.hadoop.yarn.api.records.Resource) ReservationInterval(org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationInterval) Entry(java.util.Map.Entry) RLESparseResourceAllocation(org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation) HashMap(java.util.HashMap) NavigableMap(java.util.NavigableMap) Map(java.util.Map)

Example 3 with RLESparseResourceAllocation

use of org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation in project hadoop by apache.

the class IterativePlanner method computeJobAllocation.

public RLESparseResourceAllocation computeJobAllocation(Plan plan, ReservationId reservationId, ReservationDefinition reservation, String user) throws PlanningException {
    // Initialize
    initialize(plan, reservationId, reservation);
    // Create the allocations data structure
    RLESparseResourceAllocation allocations = new RLESparseResourceAllocation(plan.getResourceCalculator());
    StageProvider stageProvider = new StageProvider(allocateLeft, reservation);
    // Current stage
    ReservationRequest currentReservationStage;
    // Stage deadlines
    long stageDeadline = stepRoundDown(reservation.getDeadline(), step);
    long successorStartingTime = -1;
    long predecessorEndTime = stepRoundDown(reservation.getArrival(), step);
    long stageArrivalTime = -1;
    // Iterate the stages in reverse order
    while (stageProvider.hasNext()) {
        // Get current stage
        currentReservationStage =;
        // Validate that the ReservationRequest respects basic constraints
        validateInputStage(plan, currentReservationStage);
        if (allocateLeft) {
            stageArrivalTime = predecessorEndTime;
        } else {
            stageArrivalTime = reservation.getArrival();
            if (jobType == ReservationRequestInterpreter.R_ORDER || jobType == ReservationRequestInterpreter.R_ORDER_NO_GAP) {
                stageArrivalTime = computeEarliestStartingTime(plan, reservation, stageProvider.getCurrentIndex(), currentReservationStage, stageDeadline);
            stageArrivalTime = stepRoundUp(stageArrivalTime, step);
            stageArrivalTime = Math.max(stageArrivalTime, reservation.getArrival());
        // Compute the allocation of a single stage
        Map<ReservationInterval, Resource> curAlloc = computeStageAllocation(plan, currentReservationStage, stageArrivalTime, stageDeadline, user, reservationId);
        // (unless it's an ANY job, then we simply continue).
        if (curAlloc == null) {
            // If it's an ANY job, we can move to the next possible request
            if (jobType == ReservationRequestInterpreter.R_ANY) {
            // Otherwise, the job cannot be allocated
            throw new PlanningException("The request cannot be satisfied");
        // Get the start & end time of the current allocation
        Long stageStartTime = findEarliestTime(curAlloc);
        Long stageEndTime = findLatestTime(curAlloc);
        // If we did find an allocation for the stage, add it
        for (Entry<ReservationInterval, Resource> entry : curAlloc.entrySet()) {
            allocations.addInterval(entry.getKey(), entry.getValue());
        // If this is an ANY clause, we have finished
        if (jobType == ReservationRequestInterpreter.R_ANY) {
        // If ORDER job, set the stageDeadline of the next stage to be processed
        if (jobType == ReservationRequestInterpreter.R_ORDER || jobType == ReservationRequestInterpreter.R_ORDER_NO_GAP) {
            // note that the test is different left-to-right and right-to-left
            if (jobType == ReservationRequestInterpreter.R_ORDER_NO_GAP && successorStartingTime != -1 && ((allocateLeft && predecessorEndTime < stageStartTime) || (!allocateLeft && (stageEndTime < successorStartingTime))) || (!isNonPreemptiveAllocation(curAlloc))) {
                throw new PlanningException("The allocation found does not respect ORDER_NO_GAP");
            if (allocateLeft) {
                // Store the stageStartTime and set the new stageDeadline
                predecessorEndTime = stageEndTime;
            } else {
                // Store the stageStartTime and set the new stageDeadline
                successorStartingTime = stageStartTime;
                stageDeadline = stageStartTime;
    // If the allocation is empty, return an error
    if (allocations.isEmpty()) {
        throw new PlanningException("The request cannot be satisfied");
    return allocations;
Also used : RLESparseResourceAllocation(org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation) ReservationRequest(org.apache.hadoop.yarn.api.records.ReservationRequest) Resource(org.apache.hadoop.yarn.api.records.Resource) ReservationInterval(org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationInterval) PlanningException(org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException)

Example 4 with RLESparseResourceAllocation

use of org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation in project hadoop by apache.

the class PlanningAlgorithm method allocateUser.

   * Performs the actual allocation for a ReservationDefinition within a Plan.
   * @param reservationId the identifier of the reservation
   * @param user the user who owns the reservation
   * @param plan the Plan to which the reservation must be fitted
   * @param contract encapsulates the resources required by the user for his
   *          session
   * @param oldReservation the existing reservation (null if none)
   * @return whether the allocateUser function was successful or not
   * @throws PlanningException if the session cannot be fitted into the plan
   * @throws ContractValidationException
protected boolean allocateUser(ReservationId reservationId, String user, Plan plan, ReservationDefinition contract, ReservationAllocation oldReservation) throws PlanningException, ContractValidationException {
    // Adjust the ResourceDefinition to account for system "imperfections"
    // (e.g., scheduling delays for large containers).
    ReservationDefinition adjustedContract = adjustContract(plan, contract);
    // Compute the job allocation
    RLESparseResourceAllocation allocation = computeJobAllocation(plan, reservationId, adjustedContract, user);
    // If no job allocation was found, fail
    if (allocation == null) {
        throw new PlanningException("The planning algorithm could not find a valid allocation" + " for your request");
    // Translate the allocation to a map (with zero paddings)
    long step = plan.getStep();
    long jobArrival = stepRoundUp(adjustedContract.getArrival(), step);
    long jobDeadline = stepRoundUp(adjustedContract.getDeadline(), step);
    Map<ReservationInterval, Resource> mapAllocations = allocationsToPaddedMap(allocation, jobArrival, jobDeadline);
    // Create the reservation
    ReservationAllocation capReservation = new // ID
    InMemoryReservationAllocation(// ID
    reservationId, // Contract
    adjustedContract, // User name
    user, // Queue name
    plan.getQueueName(), // Earliest start time
    findEarliestTime(mapAllocations), // Latest end time
    findLatestTime(mapAllocations), // Allocations
    mapAllocations, // Resource calculator
    plan.getResourceCalculator(), // Minimum allocation
    // Add (or update) the reservation allocation
    if (oldReservation != null) {
        return plan.updateReservation(capReservation);
    } else {
        return plan.addReservation(capReservation, false);
Also used : RLESparseResourceAllocation(org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation) InMemoryReservationAllocation(org.apache.hadoop.yarn.server.resourcemanager.reservation.InMemoryReservationAllocation) ReservationDefinition(org.apache.hadoop.yarn.api.records.ReservationDefinition) Resource(org.apache.hadoop.yarn.api.records.Resource) PlanningException(org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException) ReservationInterval(org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationInterval) InMemoryReservationAllocation(org.apache.hadoop.yarn.server.resourcemanager.reservation.InMemoryReservationAllocation) ReservationAllocation(org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationAllocation)

Example 5 with RLESparseResourceAllocation

use of org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation in project hadoop by apache.

the class StageAllocatorLowCostAligned method computeStageAllocation.

// computeJobAllocation()
public Map<ReservationInterval, Resource> computeStageAllocation(Plan plan, Map<Long, Resource> planLoads, RLESparseResourceAllocation planModifications, ReservationRequest rr, long stageEarliestStart, long stageDeadline, String user, ReservationId oldId) {
    // Initialize
    ResourceCalculator resCalc = plan.getResourceCalculator();
    Resource capacity = plan.getTotalCapacity();
    long step = plan.getStep();
    // Create allocationRequestsearlies
    RLESparseResourceAllocation allocationRequests = new RLESparseResourceAllocation(plan.getResourceCalculator());
    // Initialize parameters
    long duration = stepRoundUp(rr.getDuration(), step);
    int windowSizeInDurations = (int) ((stageDeadline - stageEarliestStart) / duration);
    int totalGangs = rr.getNumContainers() / rr.getConcurrency();
    int numContainersPerGang = rr.getConcurrency();
    Resource gang = Resources.multiply(rr.getCapability(), numContainersPerGang);
    // Set maxGangsPerUnit
    int maxGangsPerUnit = (int) Math.max(Math.floor(((double) totalGangs) / windowSizeInDurations), 1);
    maxGangsPerUnit = Math.max(maxGangsPerUnit / smoothnessFactor, 1);
    // If window size is too small, return null
    if (windowSizeInDurations <= 0) {
        return null;
    // Initialize tree sorted by costs
    TreeSet<DurationInterval> durationIntervalsSortedByCost = new TreeSet<DurationInterval>(new Comparator<DurationInterval>() {

        public int compare(DurationInterval val1, DurationInterval val2) {
            int cmp =, val2.getTotalCost());
            if (cmp != 0) {
                return cmp;
            return (-1) *, val2.getEndTime());
    // Add durationIntervals that end at (endTime - n*duration) for some n.
    for (long intervalEnd = stageDeadline; intervalEnd >= stageEarliestStart + duration; intervalEnd -= duration) {
        long intervalStart = intervalEnd - duration;
        // Get duration interval [intervalStart,intervalEnd)
        DurationInterval durationInterval = getDurationInterval(intervalStart, intervalEnd, planLoads, planModifications, capacity, resCalc, step);
        // If the interval can fit a gang, add it to the tree
        if (durationInterval.canAllocate(gang, capacity, resCalc)) {
    // Allocate
    int remainingGangs = totalGangs;
    while (remainingGangs > 0) {
        // If no durationInterval can fit a gang, break and return null
        if (durationIntervalsSortedByCost.isEmpty()) {
        // Get best duration interval
        DurationInterval bestDurationInterval = durationIntervalsSortedByCost.first();
        int numGangsToAllocate = Math.min(maxGangsPerUnit, remainingGangs);
        numGangsToAllocate = Math.min(numGangsToAllocate, bestDurationInterval.numCanFit(gang, capacity, resCalc));
        // Add it
        remainingGangs -= numGangsToAllocate;
        ReservationInterval reservationInt = new ReservationInterval(bestDurationInterval.getStartTime(), bestDurationInterval.getEndTime());
        Resource reservationRes = Resources.multiply(rr.getCapability(), rr.getConcurrency() * numGangsToAllocate);
        planModifications.addInterval(reservationInt, reservationRes);
        allocationRequests.addInterval(reservationInt, reservationRes);
        // Remove from tree
        // Get updated interval
        DurationInterval updatedDurationInterval = getDurationInterval(bestDurationInterval.getStartTime(), bestDurationInterval.getStartTime() + duration, planLoads, planModifications, capacity, resCalc, step);
        // Add to tree, if possible
        if (updatedDurationInterval.canAllocate(gang, capacity, resCalc)) {
    // Get the final allocation
    Map<ReservationInterval, Resource> allocations = allocationRequests.toIntervalMap();
    // If no gangs are left to place we succeed and return the allocation
    if (remainingGangs <= 0) {
        return allocations;
    } else {
        // We remove unwanted side-effect from planModifications (needed for ANY).
        for (Map.Entry<ReservationInterval, Resource> tempAllocation : allocations.entrySet()) {
            planModifications.removeInterval(tempAllocation.getKey(), tempAllocation.getValue());
        // Return null to signal failure in this allocation
        return null;
Also used : Resource(org.apache.hadoop.yarn.api.records.Resource) ReservationInterval(org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationInterval) ResourceCalculator(org.apache.hadoop.yarn.util.resource.ResourceCalculator) RLESparseResourceAllocation(org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation) TreeSet(java.util.TreeSet) Map(java.util.Map)


