use of org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation in project hadoop by apache.
the class StageAllocatorGreedy method computeStageAllocation.
@Override
public Map<ReservationInterval, Resource> computeStageAllocation(Plan plan, Map<Long, Resource> planLoads, RLESparseResourceAllocation planModifications, ReservationRequest rr, long stageEarliestStart, long stageDeadline, String user, ReservationId oldId) throws PlanningException {
Resource totalCapacity = plan.getTotalCapacity();
Map<ReservationInterval, Resource> allocationRequests = new HashMap<ReservationInterval, Resource>();
// compute the gang as a resource and get the duration
Resource gang = Resources.multiply(rr.getCapability(), rr.getConcurrency());
long dur = rr.getDuration();
long step = plan.getStep();
// ceil the duration to the next multiple of the plan step
if (dur % step != 0) {
dur += (step - (dur % step));
}
// we know for sure that this division has no remainder (part of contract
// with user, validate before
int gangsToPlace = rr.getNumContainers() / rr.getConcurrency();
int maxGang = 0;
RLESparseResourceAllocation netAvailable = plan.getAvailableResourceOverTime(user, oldId, stageEarliestStart, stageDeadline);
netAvailable = RLESparseResourceAllocation.merge(plan.getResourceCalculator(), plan.getTotalCapacity(), netAvailable, planModifications, RLEOperator.subtract, stageEarliestStart, stageDeadline);
// an invalid range of times
while (gangsToPlace > 0 && stageDeadline - dur >= stageEarliestStart) {
// as we run along we remember how many gangs we can fit, and what
// was the most constraining moment in time (we will restart just
// after that to place the next batch)
maxGang = gangsToPlace;
long minPoint = stageDeadline;
int curMaxGang = maxGang;
// move backward
for (long t = stageDeadline - plan.getStep(); t >= stageDeadline - dur && maxGang > 0; t = t - plan.getStep()) {
Resource netAvailableRes = netAvailable.getCapacityAtTime(t);
// compute maximum number of gangs we could fit
curMaxGang = (int) Math.floor(Resources.divide(plan.getResourceCalculator(), totalCapacity, netAvailableRes, gang));
// pick the minimum between available resources in this instant, and how
// many gangs we have to place
curMaxGang = Math.min(gangsToPlace, curMaxGang);
// the minimum (useful for next attempts)
if (curMaxGang <= maxGang) {
maxGang = curMaxGang;
minPoint = t;
}
}
// gangsToPlace
if (maxGang > 0) {
gangsToPlace -= maxGang;
ReservationInterval reservationInt = new ReservationInterval(stageDeadline - dur, stageDeadline);
Resource reservationRes = Resources.multiply(rr.getCapability(), rr.getConcurrency() * maxGang);
// remember occupied space (plan is read-only till we find a plausible
// allocation for the entire request). This is needed since we might be
// placing other ReservationRequest within the same
// ReservationDefinition,
// and we must avoid double-counting the available resources
planModifications.addInterval(reservationInt, reservationRes);
allocationRequests.put(reservationInt, reservationRes);
}
// reset our new starting point (curDeadline) to the most constraining
// point so far, we will look "left" of that to find more places where
// to schedule gangs (for sure nothing on the "right" of this point can
// fit a full gang.
stageDeadline = minPoint;
}
// if no gangs are left to place we succeed and return the allocation
if (gangsToPlace == 0) {
return allocationRequests;
} else {
// for ANY).
for (Map.Entry<ReservationInterval, Resource> tempAllocation : allocationRequests.entrySet()) {
planModifications.removeInterval(tempAllocation.getKey(), tempAllocation.getValue());
}
// and return null to signal failure in this allocation
return null;
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation in project hadoop by apache.
the class StageAllocatorGreedyRLE method computeStageAllocation.
@Override
public Map<ReservationInterval, Resource> computeStageAllocation(Plan plan, Map<Long, Resource> planLoads, RLESparseResourceAllocation planModifications, ReservationRequest rr, long stageEarliestStart, long stageDeadline, String user, ReservationId oldId) throws PlanningException {
// abort early if the interval is not satisfiable
if (stageEarliestStart + rr.getDuration() > stageDeadline) {
return null;
}
Map<ReservationInterval, Resource> allocationRequests = new HashMap<ReservationInterval, Resource>();
Resource totalCapacity = plan.getTotalCapacity();
// compute the gang as a resource and get the duration
Resource sizeOfGang = Resources.multiply(rr.getCapability(), rr.getConcurrency());
long dur = rr.getDuration();
long step = plan.getStep();
// ceil the duration to the next multiple of the plan step
if (dur % step != 0) {
dur += (step - (dur % step));
}
// we know for sure that this division has no remainder (part of contract
// with user, validate before
int gangsToPlace = rr.getNumContainers() / rr.getConcurrency();
// get available resources from plan
RLESparseResourceAllocation netRLERes = plan.getAvailableResourceOverTime(user, oldId, stageEarliestStart, stageDeadline);
// remove plan modifications
netRLERes = RLESparseResourceAllocation.merge(plan.getResourceCalculator(), totalCapacity, netRLERes, planModifications, RLEOperator.subtract, stageEarliestStart, stageDeadline);
// an invalid range of times
while (gangsToPlace > 0 && stageEarliestStart + dur <= stageDeadline) {
// as we run along we remember how many gangs we can fit, and what
// was the most constraining moment in time (we will restart just
// after that to place the next batch)
int maxGang = gangsToPlace;
long minPoint = -1;
// focus our attention to a time-range under consideration
NavigableMap<Long, Resource> partialMap = netRLERes.getRangeOverlapping(stageEarliestStart, stageDeadline).getCumulative();
// revert the map for right-to-left allocation
if (!allocateLeft) {
partialMap = partialMap.descendingMap();
}
Iterator<Entry<Long, Resource>> netIt = partialMap.entrySet().iterator();
long oldT = stageDeadline;
// interval (with outside loop)
while (maxGang > 0 && netIt.hasNext()) {
long t;
Resource curAvailRes;
Entry<Long, Resource> e = netIt.next();
if (allocateLeft) {
t = Math.max(e.getKey(), stageEarliestStart);
curAvailRes = e.getValue();
} else {
t = oldT;
oldT = e.getKey();
//attention: higher means lower, because we reversed the map direction
curAvailRes = partialMap.higherEntry(t).getValue();
}
// check exit/skip conditions/
if (curAvailRes == null) {
//skip undefined regions (should not happen beside borders)
continue;
}
if (exitCondition(t, stageEarliestStart, stageDeadline, dur)) {
break;
}
// compute maximum number of gangs we could fit
int curMaxGang = (int) Math.floor(Resources.divide(plan.getResourceCalculator(), totalCapacity, curAvailRes, sizeOfGang));
curMaxGang = Math.min(gangsToPlace, curMaxGang);
// the minimum (useful for next attempts)
if (curMaxGang <= maxGang) {
maxGang = curMaxGang;
minPoint = t;
}
}
// update data structures that retain the progress made so far
gangsToPlace = trackProgress(planModifications, rr, stageEarliestStart, stageDeadline, allocationRequests, dur, gangsToPlace, maxGang);
// reset the next range of time-intervals to deal with
if (allocateLeft) {
// end of this allocation
if (partialMap.higherKey(minPoint) == null) {
stageEarliestStart = stageEarliestStart + dur;
} else {
stageEarliestStart = Math.min(partialMap.higherKey(minPoint), stageEarliestStart + dur);
}
} else {
// same as above moving right-to-left
if (partialMap.higherKey(minPoint) == null) {
stageDeadline = stageDeadline - dur;
} else {
stageDeadline = Math.max(partialMap.higherKey(minPoint), stageDeadline - dur);
}
}
}
// if no gangs are left to place we succeed and return the allocation
if (gangsToPlace == 0) {
return allocationRequests;
} else {
// for ANY).
for (Map.Entry<ReservationInterval, Resource> tempAllocation : allocationRequests.entrySet()) {
planModifications.removeInterval(tempAllocation.getKey(), tempAllocation.getValue());
}
// and return null to signal failure in this allocation
return null;
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation in project hadoop by apache.
the class IterativePlanner method computeJobAllocation.
@Override
public RLESparseResourceAllocation computeJobAllocation(Plan plan, ReservationId reservationId, ReservationDefinition reservation, String user) throws PlanningException {
// Initialize
initialize(plan, reservationId, reservation);
// Create the allocations data structure
RLESparseResourceAllocation allocations = new RLESparseResourceAllocation(plan.getResourceCalculator());
StageProvider stageProvider = new StageProvider(allocateLeft, reservation);
// Current stage
ReservationRequest currentReservationStage;
// Stage deadlines
long stageDeadline = stepRoundDown(reservation.getDeadline(), step);
long successorStartingTime = -1;
long predecessorEndTime = stepRoundDown(reservation.getArrival(), step);
long stageArrivalTime = -1;
// Iterate the stages in reverse order
while (stageProvider.hasNext()) {
// Get current stage
currentReservationStage = stageProvider.next();
// Validate that the ReservationRequest respects basic constraints
validateInputStage(plan, currentReservationStage);
if (allocateLeft) {
stageArrivalTime = predecessorEndTime;
} else {
stageArrivalTime = reservation.getArrival();
if (jobType == ReservationRequestInterpreter.R_ORDER || jobType == ReservationRequestInterpreter.R_ORDER_NO_GAP) {
stageArrivalTime = computeEarliestStartingTime(plan, reservation, stageProvider.getCurrentIndex(), currentReservationStage, stageDeadline);
}
stageArrivalTime = stepRoundUp(stageArrivalTime, step);
stageArrivalTime = Math.max(stageArrivalTime, reservation.getArrival());
}
// Compute the allocation of a single stage
Map<ReservationInterval, Resource> curAlloc = computeStageAllocation(plan, currentReservationStage, stageArrivalTime, stageDeadline, user, reservationId);
// (unless it's an ANY job, then we simply continue).
if (curAlloc == null) {
// If it's an ANY job, we can move to the next possible request
if (jobType == ReservationRequestInterpreter.R_ANY) {
continue;
}
// Otherwise, the job cannot be allocated
throw new PlanningException("The request cannot be satisfied");
}
// Get the start & end time of the current allocation
Long stageStartTime = findEarliestTime(curAlloc);
Long stageEndTime = findLatestTime(curAlloc);
// If we did find an allocation for the stage, add it
for (Entry<ReservationInterval, Resource> entry : curAlloc.entrySet()) {
allocations.addInterval(entry.getKey(), entry.getValue());
}
// If this is an ANY clause, we have finished
if (jobType == ReservationRequestInterpreter.R_ANY) {
break;
}
// If ORDER job, set the stageDeadline of the next stage to be processed
if (jobType == ReservationRequestInterpreter.R_ORDER || jobType == ReservationRequestInterpreter.R_ORDER_NO_GAP) {
// note that the test is different left-to-right and right-to-left
if (jobType == ReservationRequestInterpreter.R_ORDER_NO_GAP && successorStartingTime != -1 && ((allocateLeft && predecessorEndTime < stageStartTime) || (!allocateLeft && (stageEndTime < successorStartingTime))) || (!isNonPreemptiveAllocation(curAlloc))) {
throw new PlanningException("The allocation found does not respect ORDER_NO_GAP");
}
if (allocateLeft) {
// Store the stageStartTime and set the new stageDeadline
predecessorEndTime = stageEndTime;
} else {
// Store the stageStartTime and set the new stageDeadline
successorStartingTime = stageStartTime;
stageDeadline = stageStartTime;
}
}
}
// If the allocation is empty, return an error
if (allocations.isEmpty()) {
throw new PlanningException("The request cannot be satisfied");
}
return allocations;
}
use of org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation in project hadoop by apache.
the class PlanningAlgorithm method allocateUser.
/**
* Performs the actual allocation for a ReservationDefinition within a Plan.
*
* @param reservationId the identifier of the reservation
* @param user the user who owns the reservation
* @param plan the Plan to which the reservation must be fitted
* @param contract encapsulates the resources required by the user for his
* session
* @param oldReservation the existing reservation (null if none)
* @return whether the allocateUser function was successful or not
*
* @throws PlanningException if the session cannot be fitted into the plan
* @throws ContractValidationException
*/
protected boolean allocateUser(ReservationId reservationId, String user, Plan plan, ReservationDefinition contract, ReservationAllocation oldReservation) throws PlanningException, ContractValidationException {
// Adjust the ResourceDefinition to account for system "imperfections"
// (e.g., scheduling delays for large containers).
ReservationDefinition adjustedContract = adjustContract(plan, contract);
// Compute the job allocation
RLESparseResourceAllocation allocation = computeJobAllocation(plan, reservationId, adjustedContract, user);
// If no job allocation was found, fail
if (allocation == null) {
throw new PlanningException("The planning algorithm could not find a valid allocation" + " for your request");
}
// Translate the allocation to a map (with zero paddings)
long step = plan.getStep();
long jobArrival = stepRoundUp(adjustedContract.getArrival(), step);
long jobDeadline = stepRoundUp(adjustedContract.getDeadline(), step);
Map<ReservationInterval, Resource> mapAllocations = allocationsToPaddedMap(allocation, jobArrival, jobDeadline);
// Create the reservation
ReservationAllocation capReservation = new // ID
InMemoryReservationAllocation(// ID
reservationId, // Contract
adjustedContract, // User name
user, // Queue name
plan.getQueueName(), // Earliest start time
findEarliestTime(mapAllocations), // Latest end time
findLatestTime(mapAllocations), // Allocations
mapAllocations, // Resource calculator
plan.getResourceCalculator(), // Minimum allocation
plan.getMinimumAllocation());
// Add (or update) the reservation allocation
if (oldReservation != null) {
return plan.updateReservation(capReservation);
} else {
return plan.addReservation(capReservation, false);
}
}
use of org.apache.hadoop.yarn.server.resourcemanager.reservation.RLESparseResourceAllocation in project hadoop by apache.
the class StageAllocatorLowCostAligned method computeStageAllocation.
// computeJobAllocation()
@Override
public Map<ReservationInterval, Resource> computeStageAllocation(Plan plan, Map<Long, Resource> planLoads, RLESparseResourceAllocation planModifications, ReservationRequest rr, long stageEarliestStart, long stageDeadline, String user, ReservationId oldId) {
// Initialize
ResourceCalculator resCalc = plan.getResourceCalculator();
Resource capacity = plan.getTotalCapacity();
long step = plan.getStep();
// Create allocationRequestsearlies
RLESparseResourceAllocation allocationRequests = new RLESparseResourceAllocation(plan.getResourceCalculator());
// Initialize parameters
long duration = stepRoundUp(rr.getDuration(), step);
int windowSizeInDurations = (int) ((stageDeadline - stageEarliestStart) / duration);
int totalGangs = rr.getNumContainers() / rr.getConcurrency();
int numContainersPerGang = rr.getConcurrency();
Resource gang = Resources.multiply(rr.getCapability(), numContainersPerGang);
// Set maxGangsPerUnit
int maxGangsPerUnit = (int) Math.max(Math.floor(((double) totalGangs) / windowSizeInDurations), 1);
maxGangsPerUnit = Math.max(maxGangsPerUnit / smoothnessFactor, 1);
// If window size is too small, return null
if (windowSizeInDurations <= 0) {
return null;
}
// Initialize tree sorted by costs
TreeSet<DurationInterval> durationIntervalsSortedByCost = new TreeSet<DurationInterval>(new Comparator<DurationInterval>() {
@Override
public int compare(DurationInterval val1, DurationInterval val2) {
int cmp = Double.compare(val1.getTotalCost(), val2.getTotalCost());
if (cmp != 0) {
return cmp;
}
return (-1) * Long.compare(val1.getEndTime(), val2.getEndTime());
}
});
// Add durationIntervals that end at (endTime - n*duration) for some n.
for (long intervalEnd = stageDeadline; intervalEnd >= stageEarliestStart + duration; intervalEnd -= duration) {
long intervalStart = intervalEnd - duration;
// Get duration interval [intervalStart,intervalEnd)
DurationInterval durationInterval = getDurationInterval(intervalStart, intervalEnd, planLoads, planModifications, capacity, resCalc, step);
// If the interval can fit a gang, add it to the tree
if (durationInterval.canAllocate(gang, capacity, resCalc)) {
durationIntervalsSortedByCost.add(durationInterval);
}
}
// Allocate
int remainingGangs = totalGangs;
while (remainingGangs > 0) {
// If no durationInterval can fit a gang, break and return null
if (durationIntervalsSortedByCost.isEmpty()) {
break;
}
// Get best duration interval
DurationInterval bestDurationInterval = durationIntervalsSortedByCost.first();
int numGangsToAllocate = Math.min(maxGangsPerUnit, remainingGangs);
numGangsToAllocate = Math.min(numGangsToAllocate, bestDurationInterval.numCanFit(gang, capacity, resCalc));
// Add it
remainingGangs -= numGangsToAllocate;
ReservationInterval reservationInt = new ReservationInterval(bestDurationInterval.getStartTime(), bestDurationInterval.getEndTime());
Resource reservationRes = Resources.multiply(rr.getCapability(), rr.getConcurrency() * numGangsToAllocate);
planModifications.addInterval(reservationInt, reservationRes);
allocationRequests.addInterval(reservationInt, reservationRes);
// Remove from tree
durationIntervalsSortedByCost.remove(bestDurationInterval);
// Get updated interval
DurationInterval updatedDurationInterval = getDurationInterval(bestDurationInterval.getStartTime(), bestDurationInterval.getStartTime() + duration, planLoads, planModifications, capacity, resCalc, step);
// Add to tree, if possible
if (updatedDurationInterval.canAllocate(gang, capacity, resCalc)) {
durationIntervalsSortedByCost.add(updatedDurationInterval);
}
}
// Get the final allocation
Map<ReservationInterval, Resource> allocations = allocationRequests.toIntervalMap();
// If no gangs are left to place we succeed and return the allocation
if (remainingGangs <= 0) {
return allocations;
} else {
// We remove unwanted side-effect from planModifications (needed for ANY).
for (Map.Entry<ReservationInterval, Resource> tempAllocation : allocations.entrySet()) {
planModifications.removeInterval(tempAllocation.getKey(), tempAllocation.getValue());
}
// Return null to signal failure in this allocation
return null;
}
}
Aggregations