Examples with PlanningException - org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException

Example 16 with PlanningException

use of org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException in project hadoop by apache.

the class ClientRMService method deleteReservation.

@Override
public ReservationDeleteResponse deleteReservation(ReservationDeleteRequest request) throws YarnException, IOException {
    // Check if reservation system is enabled
    checkReservationSytem(AuditConstants.DELETE_RESERVATION_REQUEST);
    ReservationDeleteResponse response = recordFactory.newRecordInstance(ReservationDeleteResponse.class);
    // Validate the input
    Plan plan = rValidator.validateReservationDeleteRequest(reservationSystem, request);
    ReservationId reservationId = request.getReservationId();
    String queueName = reservationSystem.getQueueForReservation(reservationId);
    // Check ACLs
    String user = checkReservationACLs(queueName, AuditConstants.DELETE_RESERVATION_REQUEST, reservationId);
    // Try to update the reservation using default agent
    try {
        boolean result = plan.getReservationAgent().deleteReservation(reservationId, user, plan);
        if (!result) {
            String errMsg = "Could not delete reservation: " + reservationId;
            RMAuditLogger.logFailure(user, AuditConstants.DELETE_RESERVATION_REQUEST, errMsg, "ClientRMService", errMsg);
            throw RPCUtil.getRemoteException(errMsg);
        }
    } catch (PlanningException e) {
        RMAuditLogger.logFailure(user, AuditConstants.DELETE_RESERVATION_REQUEST, e.getMessage(), "ClientRMService", "Unable to delete the reservation: " + reservationId);
        throw RPCUtil.getRemoteException(e);
    }
    RMAuditLogger.logSuccess(user, AuditConstants.DELETE_RESERVATION_REQUEST, "ClientRMService: " + reservationId);
    return response;
}

Also used : ReservationId(org.apache.hadoop.yarn.api.records.ReservationId) PlanningException(org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException) ReservationDeleteResponse(org.apache.hadoop.yarn.api.protocolrecords.ReservationDeleteResponse) Plan(org.apache.hadoop.yarn.server.resourcemanager.reservation.Plan)

Example 17 with PlanningException

use of org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException in project hadoop by apache.

the class AbstractSchedulerPlanFollower method synchronizePlan.

@Override
public synchronized void synchronizePlan(Plan plan, boolean shouldReplan) {
    String planQueueName = plan.getQueueName();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Running plan follower edit policy for plan: " + planQueueName);
    }
    // align with plan step
    long step = plan.getStep();
    long now = clock.getTime();
    if (now % step != 0) {
        now += step - (now % step);
    }
    Queue planQueue = getPlanQueue(planQueueName);
    if (planQueue == null)
        return;
    // first we publish to the plan the current availability of resources
    Resource clusterResources = scheduler.getClusterResource();
    Resource planResources = getPlanResources(plan, planQueue, clusterResources);
    Set<ReservationAllocation> currentReservations = plan.getReservationsAtTime(now);
    Set<String> curReservationNames = new HashSet<String>();
    Resource reservedResources = Resource.newInstance(0, 0);
    int numRes = getReservedResources(now, currentReservations, curReservationNames, reservedResources);
    // create the default reservation queue if it doesnt exist
    String defReservationId = getReservationIdFromQueueName(planQueueName) + ReservationConstants.DEFAULT_QUEUE_SUFFIX;
    String defReservationQueue = getReservationQueueName(planQueueName, defReservationId);
    createDefaultReservationQueue(planQueueName, planQueue, defReservationId);
    curReservationNames.add(defReservationId);
    // if the resources dedicated to this plan has shrunk invoke replanner
    boolean shouldResize = false;
    if (arePlanResourcesLessThanReservations(plan.getResourceCalculator(), clusterResources, planResources, reservedResources)) {
        if (shouldReplan) {
            try {
                plan.getReplanner().plan(plan, null);
            } catch (PlanningException e) {
                LOG.warn("Exception while trying to replan: {}", planQueueName, e);
            }
        } else {
            shouldResize = true;
        }
    }
    // identify the reservations that have expired and new reservations that
    // have to be activated
    List<? extends Queue> resQueues = getChildReservationQueues(planQueue);
    Set<String> expired = new HashSet<String>();
    for (Queue resQueue : resQueues) {
        String resQueueName = resQueue.getQueueName();
        String reservationId = getReservationIdFromQueueName(resQueueName);
        if (curReservationNames.contains(reservationId)) {
            // it is already existing reservation, so needed not create new
            // reservation queue
            curReservationNames.remove(reservationId);
        } else {
            // the reservation has termination, mark for cleanup
            expired.add(reservationId);
        }
    }
    // garbage collect expired reservations
    cleanupExpiredQueues(planQueueName, plan.getMoveOnExpiry(), expired, defReservationQueue);
    // Add new reservations and update existing ones
    float totalAssignedCapacity = 0f;
    if (currentReservations != null) {
        // first release all excess capacity in default queue
        try {
            setQueueEntitlement(planQueueName, defReservationQueue, 0f, 1.0f);
        } catch (YarnException e) {
            LOG.warn("Exception while trying to release default queue capacity for plan: {}", planQueueName, e);
        }
        // sort allocations from the one giving up the most resources, to the
        // one asking for the most avoid order-of-operation errors that
        // temporarily violate 100% capacity bound
        List<ReservationAllocation> sortedAllocations = sortByDelta(new ArrayList<ReservationAllocation>(currentReservations), now, plan);
        for (ReservationAllocation res : sortedAllocations) {
            String currResId = res.getReservationId().toString();
            if (curReservationNames.contains(currResId)) {
                addReservationQueue(planQueueName, planQueue, currResId);
            }
            Resource capToAssign = res.getResourcesAtTime(now);
            float targetCapacity = 0f;
            if (planResources.getMemorySize() > 0 && planResources.getVirtualCores() > 0) {
                if (shouldResize) {
                    capToAssign = calculateReservationToPlanProportion(plan.getResourceCalculator(), planResources, reservedResources, capToAssign);
                }
                targetCapacity = calculateReservationToPlanRatio(plan.getResourceCalculator(), clusterResources, planResources, capToAssign);
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug("Assigning capacity of {} to queue {} with target capacity {}", capToAssign, currResId, targetCapacity);
            }
            // set maxCapacity to 100% unless the job requires gang, in which
            // case we stick to capacity (as running early/before is likely a
            // waste of resources)
            float maxCapacity = 1.0f;
            if (res.containsGangs()) {
                maxCapacity = targetCapacity;
            }
            try {
                setQueueEntitlement(planQueueName, currResId, targetCapacity, maxCapacity);
            } catch (YarnException e) {
                LOG.warn("Exception while trying to size reservation for plan: {}", currResId, planQueueName, e);
            }
            totalAssignedCapacity += targetCapacity;
        }
    }
    // compute the default queue capacity
    float defQCap = 1.0f - totalAssignedCapacity;
    if (LOG.isDebugEnabled()) {
        LOG.debug("PlanFollowerEditPolicyTask: total Plan Capacity: {} " + "currReservation: {} default-queue capacity: {}", planResources, numRes, defQCap);
    }
    // set the default queue to eat-up all remaining capacity
    try {
        setQueueEntitlement(planQueueName, defReservationQueue, defQCap, 1.0f);
    } catch (YarnException e) {
        LOG.warn("Exception while trying to reclaim default queue capacity for plan: {}", planQueueName, e);
    }
    // garbage collect finished reservations from plan
    try {
        plan.archiveCompletedReservations(now);
    } catch (PlanningException e) {
        LOG.error("Exception in archiving completed reservations: ", e);
    }
    LOG.info("Finished iteration of plan follower edit policy for plan: " + planQueueName);
// Extension: update plan with app states,
// useful to support smart replanning
}

Also used : Resource(org.apache.hadoop.yarn.api.records.Resource) YarnException(org.apache.hadoop.yarn.exceptions.YarnException) PlanningException(org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException) Queue(org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue) HashSet(java.util.HashSet)

Example 18 with PlanningException

use of org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException in project hadoop by apache.

the class CapacityOverTimePolicy method validate.

/**
   * The validation algorithm walks over the RLE encoded allocation and
   * checks that for all transition points (when the start or end of the
   * checking window encounters a value in the RLE). At this point it
   * checkes whether the integral computed exceeds the quota limit. Note that
   * this might not find the exact time of a violation, but if a violation
   * exists it will find it. The advantage is a much lower number of checks
   * as compared to time-slot by time-slot checks.
   *
   * @param plan the plan to validate against
   * @param reservation the reservation allocation to test.
   * @throws PlanningException if the validation fails.
   */
@Override
public void validate(Plan plan, ReservationAllocation reservation) throws PlanningException {
    // cluster limits, and 3) maxInst (via override of available)
    try {
        super.validate(plan, reservation);
    } catch (PlanningException p) {
        //wrap it in proper quota exception
        throw new PlanningQuotaException(p);
    }
    //---- check for integral violations of capacity --------
    // Gather a view of what to check (curr allocation of user, minus old
    // version of this reservation, plus new version)
    RLESparseResourceAllocation consumptionForUserOverTime = plan.getConsumptionForUserOverTime(reservation.getUser(), reservation.getStartTime() - validWindow, reservation.getEndTime() + validWindow);
    ReservationAllocation old = plan.getReservationById(reservation.getReservationId());
    if (old != null) {
        consumptionForUserOverTime = RLESparseResourceAllocation.merge(plan.getResourceCalculator(), plan.getTotalCapacity(), consumptionForUserOverTime, old.getResourcesOverTime(), RLEOperator.add, reservation.getStartTime() - validWindow, reservation.getEndTime() + validWindow);
    }
    RLESparseResourceAllocation resRLE = reservation.getResourcesOverTime();
    RLESparseResourceAllocation toCheck = RLESparseResourceAllocation.merge(plan.getResourceCalculator(), plan.getTotalCapacity(), consumptionForUserOverTime, resRLE, RLEOperator.add, Long.MIN_VALUE, Long.MAX_VALUE);
    NavigableMap<Long, Resource> integralUp = new TreeMap<>();
    NavigableMap<Long, Resource> integralDown = new TreeMap<>();
    long prevTime = toCheck.getEarliestStartTime();
    IntegralResource prevResource = new IntegralResource(0L, 0L);
    IntegralResource runningTot = new IntegralResource(0L, 0L);
    // add intermediate points
    Map<Long, Resource> temp = new TreeMap<>();
    for (Map.Entry<Long, Resource> pointToCheck : toCheck.getCumulative().entrySet()) {
        Long timeToCheck = pointToCheck.getKey();
        Resource resourceToCheck = pointToCheck.getValue();
        Long nextPoint = toCheck.getCumulative().higherKey(timeToCheck);
        if (nextPoint == null || toCheck.getCumulative().get(nextPoint) == null) {
            continue;
        }
        for (int i = 1; i <= (nextPoint - timeToCheck) / validWindow; i++) {
            temp.put(timeToCheck + (i * validWindow), resourceToCheck);
        }
    }
    temp.putAll(toCheck.getCumulative());
    // compute point-wise integral for the up-fronts and down-fronts
    for (Map.Entry<Long, Resource> currPoint : temp.entrySet()) {
        Long currTime = currPoint.getKey();
        Resource currResource = currPoint.getValue();
        //add to running total current contribution
        prevResource.multiplyBy(currTime - prevTime);
        runningTot.add(prevResource);
        integralUp.put(currTime, normalizeToResource(runningTot, validWindow));
        integralDown.put(currTime + validWindow, normalizeToResource(runningTot, validWindow));
        if (currResource != null) {
            prevResource.memory = currResource.getMemorySize();
            prevResource.vcores = currResource.getVirtualCores();
        } else {
            prevResource.memory = 0L;
            prevResource.vcores = 0L;
        }
        prevTime = currTime;
    }
    // compute final integral as delta of up minus down transitions
    RLESparseResourceAllocation intUp = new RLESparseResourceAllocation(integralUp, plan.getResourceCalculator());
    RLESparseResourceAllocation intDown = new RLESparseResourceAllocation(integralDown, plan.getResourceCalculator());
    RLESparseResourceAllocation integral = RLESparseResourceAllocation.merge(plan.getResourceCalculator(), plan.getTotalCapacity(), intUp, intDown, RLEOperator.subtract, Long.MIN_VALUE, Long.MAX_VALUE);
    // define over-time integral limit
    // note: this is aligned with the normalization done above
    NavigableMap<Long, Resource> tlimit = new TreeMap<>();
    Resource maxAvgRes = Resources.multiply(plan.getTotalCapacity(), maxAvg);
    tlimit.put(toCheck.getEarliestStartTime() - validWindow, maxAvgRes);
    RLESparseResourceAllocation targetLimit = new RLESparseResourceAllocation(tlimit, plan.getResourceCalculator());
    // compare using merge() limit with integral
    try {
        RLESparseResourceAllocation.merge(plan.getResourceCalculator(), plan.getTotalCapacity(), targetLimit, integral, RLEOperator.subtractTestNonNegative, reservation.getStartTime() - validWindow, reservation.getEndTime() + validWindow);
    } catch (PlanningException p) {
        throw new PlanningQuotaException("Integral (avg over time) quota capacity " + maxAvg + " over a window of " + validWindow / 1000 + " seconds, " + " would be exceeded by accepting reservation: " + reservation.getReservationId(), p);
    }
}

Also used : PlanningQuotaException(org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningQuotaException) Resource(org.apache.hadoop.yarn.api.records.Resource) TreeMap(java.util.TreeMap) PlanningException(org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException) TreeMap(java.util.TreeMap) Map(java.util.Map) NavigableMap(java.util.NavigableMap)

Example 19 with PlanningException

use of org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException in project hadoop by apache.

the class InMemoryPlan method updateReservation.

@Override
public boolean updateReservation(ReservationAllocation reservation) throws PlanningException {
    writeLock.lock();
    boolean result = false;
    try {
        ReservationId resId = reservation.getReservationId();
        ReservationAllocation currReservation = getReservationById(resId);
        if (currReservation == null) {
            String errMsg = "The specified Reservation with ID " + resId + " does not exist in the plan";
            LOG.error(errMsg);
            throw new IllegalArgumentException(errMsg);
        }
        // validate if we can accept this reservation, throws exception if
        // validation fails
        policy.validate(this, reservation);
        if (!removeReservation(currReservation)) {
            LOG.error("Unable to replace reservation: {} from plan.", reservation.getReservationId());
            return result;
        }
        try {
            result = addReservation(reservation, false);
        } catch (PlanningException e) {
            LOG.error("Unable to update reservation: {} from plan due to {}.", reservation.getReservationId(), e.getMessage());
        }
        if (result) {
            LOG.info("Successfully updated reservation: {} in plan.", reservation.getReservationId());
            return result;
        } else {
            // rollback delete
            addReservation(currReservation, false);
            LOG.info("Rollbacked update reservation: {} from plan.", reservation.getReservationId());
            return result;
        }
    } finally {
        writeLock.unlock();
    }
}

Also used : ReservationId(org.apache.hadoop.yarn.api.records.ReservationId) PlanningException(org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException)

Example 20 with PlanningException

use of org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException in project hadoop by apache.

the class IterativePlanner method computeJobAllocation.

@Override
public RLESparseResourceAllocation computeJobAllocation(Plan plan, ReservationId reservationId, ReservationDefinition reservation, String user) throws PlanningException {
    // Initialize
    initialize(plan, reservationId, reservation);
    // Create the allocations data structure
    RLESparseResourceAllocation allocations = new RLESparseResourceAllocation(plan.getResourceCalculator());
    StageProvider stageProvider = new StageProvider(allocateLeft, reservation);
    // Current stage
    ReservationRequest currentReservationStage;
    // Stage deadlines
    long stageDeadline = stepRoundDown(reservation.getDeadline(), step);
    long successorStartingTime = -1;
    long predecessorEndTime = stepRoundDown(reservation.getArrival(), step);
    long stageArrivalTime = -1;
    // Iterate the stages in reverse order
    while (stageProvider.hasNext()) {
        // Get current stage
        currentReservationStage = stageProvider.next();
        // Validate that the ReservationRequest respects basic constraints
        validateInputStage(plan, currentReservationStage);
        if (allocateLeft) {
            stageArrivalTime = predecessorEndTime;
        } else {
            stageArrivalTime = reservation.getArrival();
            if (jobType == ReservationRequestInterpreter.R_ORDER || jobType == ReservationRequestInterpreter.R_ORDER_NO_GAP) {
                stageArrivalTime = computeEarliestStartingTime(plan, reservation, stageProvider.getCurrentIndex(), currentReservationStage, stageDeadline);
            }
            stageArrivalTime = stepRoundUp(stageArrivalTime, step);
            stageArrivalTime = Math.max(stageArrivalTime, reservation.getArrival());
        }
        // Compute the allocation of a single stage
        Map<ReservationInterval, Resource> curAlloc = computeStageAllocation(plan, currentReservationStage, stageArrivalTime, stageDeadline, user, reservationId);
        // (unless it's an ANY job, then we simply continue).
        if (curAlloc == null) {
            // If it's an ANY job, we can move to the next possible request
            if (jobType == ReservationRequestInterpreter.R_ANY) {
                continue;
            }
            // Otherwise, the job cannot be allocated
            throw new PlanningException("The request cannot be satisfied");
        }
        // Get the start & end time of the current allocation
        Long stageStartTime = findEarliestTime(curAlloc);
        Long stageEndTime = findLatestTime(curAlloc);
        // If we did find an allocation for the stage, add it
        for (Entry<ReservationInterval, Resource> entry : curAlloc.entrySet()) {
            allocations.addInterval(entry.getKey(), entry.getValue());
        }
        // If this is an ANY clause, we have finished
        if (jobType == ReservationRequestInterpreter.R_ANY) {
            break;
        }
        // If ORDER job, set the stageDeadline of the next stage to be processed
        if (jobType == ReservationRequestInterpreter.R_ORDER || jobType == ReservationRequestInterpreter.R_ORDER_NO_GAP) {
            // note that the test is different left-to-right and right-to-left
            if (jobType == ReservationRequestInterpreter.R_ORDER_NO_GAP && successorStartingTime != -1 && ((allocateLeft && predecessorEndTime < stageStartTime) || (!allocateLeft && (stageEndTime < successorStartingTime))) || (!isNonPreemptiveAllocation(curAlloc))) {
                throw new PlanningException("The allocation found does not respect ORDER_NO_GAP");
            }
            if (allocateLeft) {
                // Store the stageStartTime and set the new stageDeadline
                predecessorEndTime = stageEndTime;
            } else {
                // Store the stageStartTime and set the new stageDeadline
                successorStartingTime = stageStartTime;
                stageDeadline = stageStartTime;
            }
        }
    }
    // If the allocation is empty, return an error
    if (allocations.isEmpty()) {
        throw new PlanningException("The request cannot be satisfied");
    }
    return allocations;
}

Also used : RLESparseResourceAllocation(org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation) ReservationRequest(org.apache.hadoop.yarn.api.records.ReservationRequest) Resource(org.apache.hadoop.yarn.api.records.Resource) ReservationInterval(org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationInterval) PlanningException(org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException)

Aggregations

PlanningException (org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException)35 ReservationId (org.apache.hadoop.yarn.api.records.ReservationId)27 Test (org.junit.Test)24 ReservationDefinition (org.apache.hadoop.yarn.api.records.ReservationDefinition)12 Resource (org.apache.hadoop.yarn.api.records.Resource)8 ReservationRequest (org.apache.hadoop.yarn.api.records.ReservationRequest)5 ArrayList (java.util.ArrayList)4 ReservationRequests (org.apache.hadoop.yarn.api.records.ReservationRequests)4 ReservationDefinitionPBImpl (org.apache.hadoop.yarn.api.records.impl.pb.ReservationDefinitionPBImpl)4 ReservationRequestsPBImpl (org.apache.hadoop.yarn.api.records.impl.pb.ReservationRequestsPBImpl)4 TreeMap (java.util.TreeMap)3 InMemoryReservationAllocation (org.apache.hadoop.yarn.server.resourcemanager.reservation.InMemoryReservationAllocation)3 Plan (org.apache.hadoop.yarn.server.resourcemanager.reservation.Plan)3 ReservationAllocation (org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationAllocation)3 RLESparseResourceAllocation (org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation)2 ReservationInterval (org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationInterval)2 ReservationSchedulerConfiguration (org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSchedulerConfiguration)2 HashSet (java.util.HashSet)1 Map (java.util.Map)1 NavigableMap (java.util.NavigableMap)1