use of org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException in project hadoop by apache.
the class ClientRMService method deleteReservation.
@Override
public ReservationDeleteResponse deleteReservation(ReservationDeleteRequest request) throws YarnException, IOException {
// Check if reservation system is enabled
checkReservationSytem(AuditConstants.DELETE_RESERVATION_REQUEST);
ReservationDeleteResponse response = recordFactory.newRecordInstance(ReservationDeleteResponse.class);
// Validate the input
Plan plan = rValidator.validateReservationDeleteRequest(reservationSystem, request);
ReservationId reservationId = request.getReservationId();
String queueName = reservationSystem.getQueueForReservation(reservationId);
// Check ACLs
String user = checkReservationACLs(queueName, AuditConstants.DELETE_RESERVATION_REQUEST, reservationId);
// Try to update the reservation using default agent
try {
boolean result = plan.getReservationAgent().deleteReservation(reservationId, user, plan);
if (!result) {
String errMsg = "Could not delete reservation: " + reservationId;
RMAuditLogger.logFailure(user, AuditConstants.DELETE_RESERVATION_REQUEST, errMsg, "ClientRMService", errMsg);
throw RPCUtil.getRemoteException(errMsg);
}
} catch (PlanningException e) {
RMAuditLogger.logFailure(user, AuditConstants.DELETE_RESERVATION_REQUEST, e.getMessage(), "ClientRMService", "Unable to delete the reservation: " + reservationId);
throw RPCUtil.getRemoteException(e);
}
RMAuditLogger.logSuccess(user, AuditConstants.DELETE_RESERVATION_REQUEST, "ClientRMService: " + reservationId);
return response;
}
use of org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException in project hadoop by apache.
the class AbstractSchedulerPlanFollower method synchronizePlan.
@Override
public synchronized void synchronizePlan(Plan plan, boolean shouldReplan) {
String planQueueName = plan.getQueueName();
if (LOG.isDebugEnabled()) {
LOG.debug("Running plan follower edit policy for plan: " + planQueueName);
}
// align with plan step
long step = plan.getStep();
long now = clock.getTime();
if (now % step != 0) {
now += step - (now % step);
}
Queue planQueue = getPlanQueue(planQueueName);
if (planQueue == null)
return;
// first we publish to the plan the current availability of resources
Resource clusterResources = scheduler.getClusterResource();
Resource planResources = getPlanResources(plan, planQueue, clusterResources);
Set<ReservationAllocation> currentReservations = plan.getReservationsAtTime(now);
Set<String> curReservationNames = new HashSet<String>();
Resource reservedResources = Resource.newInstance(0, 0);
int numRes = getReservedResources(now, currentReservations, curReservationNames, reservedResources);
// create the default reservation queue if it doesnt exist
String defReservationId = getReservationIdFromQueueName(planQueueName) + ReservationConstants.DEFAULT_QUEUE_SUFFIX;
String defReservationQueue = getReservationQueueName(planQueueName, defReservationId);
createDefaultReservationQueue(planQueueName, planQueue, defReservationId);
curReservationNames.add(defReservationId);
// if the resources dedicated to this plan has shrunk invoke replanner
boolean shouldResize = false;
if (arePlanResourcesLessThanReservations(plan.getResourceCalculator(), clusterResources, planResources, reservedResources)) {
if (shouldReplan) {
try {
plan.getReplanner().plan(plan, null);
} catch (PlanningException e) {
LOG.warn("Exception while trying to replan: {}", planQueueName, e);
}
} else {
shouldResize = true;
}
}
// identify the reservations that have expired and new reservations that
// have to be activated
List<? extends Queue> resQueues = getChildReservationQueues(planQueue);
Set<String> expired = new HashSet<String>();
for (Queue resQueue : resQueues) {
String resQueueName = resQueue.getQueueName();
String reservationId = getReservationIdFromQueueName(resQueueName);
if (curReservationNames.contains(reservationId)) {
// it is already existing reservation, so needed not create new
// reservation queue
curReservationNames.remove(reservationId);
} else {
// the reservation has termination, mark for cleanup
expired.add(reservationId);
}
}
// garbage collect expired reservations
cleanupExpiredQueues(planQueueName, plan.getMoveOnExpiry(), expired, defReservationQueue);
// Add new reservations and update existing ones
float totalAssignedCapacity = 0f;
if (currentReservations != null) {
// first release all excess capacity in default queue
try {
setQueueEntitlement(planQueueName, defReservationQueue, 0f, 1.0f);
} catch (YarnException e) {
LOG.warn("Exception while trying to release default queue capacity for plan: {}", planQueueName, e);
}
// sort allocations from the one giving up the most resources, to the
// one asking for the most avoid order-of-operation errors that
// temporarily violate 100% capacity bound
List<ReservationAllocation> sortedAllocations = sortByDelta(new ArrayList<ReservationAllocation>(currentReservations), now, plan);
for (ReservationAllocation res : sortedAllocations) {
String currResId = res.getReservationId().toString();
if (curReservationNames.contains(currResId)) {
addReservationQueue(planQueueName, planQueue, currResId);
}
Resource capToAssign = res.getResourcesAtTime(now);
float targetCapacity = 0f;
if (planResources.getMemorySize() > 0 && planResources.getVirtualCores() > 0) {
if (shouldResize) {
capToAssign = calculateReservationToPlanProportion(plan.getResourceCalculator(), planResources, reservedResources, capToAssign);
}
targetCapacity = calculateReservationToPlanRatio(plan.getResourceCalculator(), clusterResources, planResources, capToAssign);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Assigning capacity of {} to queue {} with target capacity {}", capToAssign, currResId, targetCapacity);
}
// set maxCapacity to 100% unless the job requires gang, in which
// case we stick to capacity (as running early/before is likely a
// waste of resources)
float maxCapacity = 1.0f;
if (res.containsGangs()) {
maxCapacity = targetCapacity;
}
try {
setQueueEntitlement(planQueueName, currResId, targetCapacity, maxCapacity);
} catch (YarnException e) {
LOG.warn("Exception while trying to size reservation for plan: {}", currResId, planQueueName, e);
}
totalAssignedCapacity += targetCapacity;
}
}
// compute the default queue capacity
float defQCap = 1.0f - totalAssignedCapacity;
if (LOG.isDebugEnabled()) {
LOG.debug("PlanFollowerEditPolicyTask: total Plan Capacity: {} " + "currReservation: {} default-queue capacity: {}", planResources, numRes, defQCap);
}
// set the default queue to eat-up all remaining capacity
try {
setQueueEntitlement(planQueueName, defReservationQueue, defQCap, 1.0f);
} catch (YarnException e) {
LOG.warn("Exception while trying to reclaim default queue capacity for plan: {}", planQueueName, e);
}
// garbage collect finished reservations from plan
try {
plan.archiveCompletedReservations(now);
} catch (PlanningException e) {
LOG.error("Exception in archiving completed reservations: ", e);
}
LOG.info("Finished iteration of plan follower edit policy for plan: " + planQueueName);
// Extension: update plan with app states,
// useful to support smart replanning
}
use of org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException in project hadoop by apache.
the class CapacityOverTimePolicy method validate.
/**
* The validation algorithm walks over the RLE encoded allocation and
* checks that for all transition points (when the start or end of the
* checking window encounters a value in the RLE). At this point it
* checkes whether the integral computed exceeds the quota limit. Note that
* this might not find the exact time of a violation, but if a violation
* exists it will find it. The advantage is a much lower number of checks
* as compared to time-slot by time-slot checks.
*
* @param plan the plan to validate against
* @param reservation the reservation allocation to test.
* @throws PlanningException if the validation fails.
*/
@Override
public void validate(Plan plan, ReservationAllocation reservation) throws PlanningException {
// cluster limits, and 3) maxInst (via override of available)
try {
super.validate(plan, reservation);
} catch (PlanningException p) {
//wrap it in proper quota exception
throw new PlanningQuotaException(p);
}
//---- check for integral violations of capacity --------
// Gather a view of what to check (curr allocation of user, minus old
// version of this reservation, plus new version)
RLESparseResourceAllocation consumptionForUserOverTime = plan.getConsumptionForUserOverTime(reservation.getUser(), reservation.getStartTime() - validWindow, reservation.getEndTime() + validWindow);
ReservationAllocation old = plan.getReservationById(reservation.getReservationId());
if (old != null) {
consumptionForUserOverTime = RLESparseResourceAllocation.merge(plan.getResourceCalculator(), plan.getTotalCapacity(), consumptionForUserOverTime, old.getResourcesOverTime(), RLEOperator.add, reservation.getStartTime() - validWindow, reservation.getEndTime() + validWindow);
}
RLESparseResourceAllocation resRLE = reservation.getResourcesOverTime();
RLESparseResourceAllocation toCheck = RLESparseResourceAllocation.merge(plan.getResourceCalculator(), plan.getTotalCapacity(), consumptionForUserOverTime, resRLE, RLEOperator.add, Long.MIN_VALUE, Long.MAX_VALUE);
NavigableMap<Long, Resource> integralUp = new TreeMap<>();
NavigableMap<Long, Resource> integralDown = new TreeMap<>();
long prevTime = toCheck.getEarliestStartTime();
IntegralResource prevResource = new IntegralResource(0L, 0L);
IntegralResource runningTot = new IntegralResource(0L, 0L);
// add intermediate points
Map<Long, Resource> temp = new TreeMap<>();
for (Map.Entry<Long, Resource> pointToCheck : toCheck.getCumulative().entrySet()) {
Long timeToCheck = pointToCheck.getKey();
Resource resourceToCheck = pointToCheck.getValue();
Long nextPoint = toCheck.getCumulative().higherKey(timeToCheck);
if (nextPoint == null || toCheck.getCumulative().get(nextPoint) == null) {
continue;
}
for (int i = 1; i <= (nextPoint - timeToCheck) / validWindow; i++) {
temp.put(timeToCheck + (i * validWindow), resourceToCheck);
}
}
temp.putAll(toCheck.getCumulative());
// compute point-wise integral for the up-fronts and down-fronts
for (Map.Entry<Long, Resource> currPoint : temp.entrySet()) {
Long currTime = currPoint.getKey();
Resource currResource = currPoint.getValue();
//add to running total current contribution
prevResource.multiplyBy(currTime - prevTime);
runningTot.add(prevResource);
integralUp.put(currTime, normalizeToResource(runningTot, validWindow));
integralDown.put(currTime + validWindow, normalizeToResource(runningTot, validWindow));
if (currResource != null) {
prevResource.memory = currResource.getMemorySize();
prevResource.vcores = currResource.getVirtualCores();
} else {
prevResource.memory = 0L;
prevResource.vcores = 0L;
}
prevTime = currTime;
}
// compute final integral as delta of up minus down transitions
RLESparseResourceAllocation intUp = new RLESparseResourceAllocation(integralUp, plan.getResourceCalculator());
RLESparseResourceAllocation intDown = new RLESparseResourceAllocation(integralDown, plan.getResourceCalculator());
RLESparseResourceAllocation integral = RLESparseResourceAllocation.merge(plan.getResourceCalculator(), plan.getTotalCapacity(), intUp, intDown, RLEOperator.subtract, Long.MIN_VALUE, Long.MAX_VALUE);
// define over-time integral limit
// note: this is aligned with the normalization done above
NavigableMap<Long, Resource> tlimit = new TreeMap<>();
Resource maxAvgRes = Resources.multiply(plan.getTotalCapacity(), maxAvg);
tlimit.put(toCheck.getEarliestStartTime() - validWindow, maxAvgRes);
RLESparseResourceAllocation targetLimit = new RLESparseResourceAllocation(tlimit, plan.getResourceCalculator());
// compare using merge() limit with integral
try {
RLESparseResourceAllocation.merge(plan.getResourceCalculator(), plan.getTotalCapacity(), targetLimit, integral, RLEOperator.subtractTestNonNegative, reservation.getStartTime() - validWindow, reservation.getEndTime() + validWindow);
} catch (PlanningException p) {
throw new PlanningQuotaException("Integral (avg over time) quota capacity " + maxAvg + " over a window of " + validWindow / 1000 + " seconds, " + " would be exceeded by accepting reservation: " + reservation.getReservationId(), p);
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException in project hadoop by apache.
the class InMemoryPlan method updateReservation.
@Override
public boolean updateReservation(ReservationAllocation reservation) throws PlanningException {
writeLock.lock();
boolean result = false;
try {
ReservationId resId = reservation.getReservationId();
ReservationAllocation currReservation = getReservationById(resId);
if (currReservation == null) {
String errMsg = "The specified Reservation with ID " + resId + " does not exist in the plan";
LOG.error(errMsg);
throw new IllegalArgumentException(errMsg);
}
// validate if we can accept this reservation, throws exception if
// validation fails
policy.validate(this, reservation);
if (!removeReservation(currReservation)) {
LOG.error("Unable to replace reservation: {} from plan.", reservation.getReservationId());
return result;
}
try {
result = addReservation(reservation, false);
} catch (PlanningException e) {
LOG.error("Unable to update reservation: {} from plan due to {}.", reservation.getReservationId(), e.getMessage());
}
if (result) {
LOG.info("Successfully updated reservation: {} in plan.", reservation.getReservationId());
return result;
} else {
// rollback delete
addReservation(currReservation, false);
LOG.info("Rollbacked update reservation: {} from plan.", reservation.getReservationId());
return result;
}
} finally {
writeLock.unlock();
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.reservation.exceptions.PlanningException in project hadoop by apache.
the class IterativePlanner method computeJobAllocation.
@Override
public RLESparseResourceAllocation computeJobAllocation(Plan plan, ReservationId reservationId, ReservationDefinition reservation, String user) throws PlanningException {
// Initialize
initialize(plan, reservationId, reservation);
// Create the allocations data structure
RLESparseResourceAllocation allocations = new RLESparseResourceAllocation(plan.getResourceCalculator());
StageProvider stageProvider = new StageProvider(allocateLeft, reservation);
// Current stage
ReservationRequest currentReservationStage;
// Stage deadlines
long stageDeadline = stepRoundDown(reservation.getDeadline(), step);
long successorStartingTime = -1;
long predecessorEndTime = stepRoundDown(reservation.getArrival(), step);
long stageArrivalTime = -1;
// Iterate the stages in reverse order
while (stageProvider.hasNext()) {
// Get current stage
currentReservationStage = stageProvider.next();
// Validate that the ReservationRequest respects basic constraints
validateInputStage(plan, currentReservationStage);
if (allocateLeft) {
stageArrivalTime = predecessorEndTime;
} else {
stageArrivalTime = reservation.getArrival();
if (jobType == ReservationRequestInterpreter.R_ORDER || jobType == ReservationRequestInterpreter.R_ORDER_NO_GAP) {
stageArrivalTime = computeEarliestStartingTime(plan, reservation, stageProvider.getCurrentIndex(), currentReservationStage, stageDeadline);
}
stageArrivalTime = stepRoundUp(stageArrivalTime, step);
stageArrivalTime = Math.max(stageArrivalTime, reservation.getArrival());
}
// Compute the allocation of a single stage
Map<ReservationInterval, Resource> curAlloc = computeStageAllocation(plan, currentReservationStage, stageArrivalTime, stageDeadline, user, reservationId);
// (unless it's an ANY job, then we simply continue).
if (curAlloc == null) {
// If it's an ANY job, we can move to the next possible request
if (jobType == ReservationRequestInterpreter.R_ANY) {
continue;
}
// Otherwise, the job cannot be allocated
throw new PlanningException("The request cannot be satisfied");
}
// Get the start & end time of the current allocation
Long stageStartTime = findEarliestTime(curAlloc);
Long stageEndTime = findLatestTime(curAlloc);
// If we did find an allocation for the stage, add it
for (Entry<ReservationInterval, Resource> entry : curAlloc.entrySet()) {
allocations.addInterval(entry.getKey(), entry.getValue());
}
// If this is an ANY clause, we have finished
if (jobType == ReservationRequestInterpreter.R_ANY) {
break;
}
// If ORDER job, set the stageDeadline of the next stage to be processed
if (jobType == ReservationRequestInterpreter.R_ORDER || jobType == ReservationRequestInterpreter.R_ORDER_NO_GAP) {
// note that the test is different left-to-right and right-to-left
if (jobType == ReservationRequestInterpreter.R_ORDER_NO_GAP && successorStartingTime != -1 && ((allocateLeft && predecessorEndTime < stageStartTime) || (!allocateLeft && (stageEndTime < successorStartingTime))) || (!isNonPreemptiveAllocation(curAlloc))) {
throw new PlanningException("The allocation found does not respect ORDER_NO_GAP");
}
if (allocateLeft) {
// Store the stageStartTime and set the new stageDeadline
predecessorEndTime = stageEndTime;
} else {
// Store the stageStartTime and set the new stageDeadline
successorStartingTime = stageStartTime;
stageDeadline = stageStartTime;
}
}
}
// If the allocation is empty, return an error
if (allocations.isEmpty()) {
throw new PlanningException("The request cannot be satisfied");
}
return allocations;
}
Aggregations