use of org.apache.hadoop.yarn.api.records.Priority in project tez by apache.
the class YarnTaskSchedulerService method preemptIfNeeded.
boolean preemptIfNeeded() {
if (preemptionPercentage == 0) {
// turned off
return true;
}
ContainerId[] preemptedContainers = null;
int numPendingRequestsToService = 0;
synchronized (this) {
Resource freeResources = amRmClient.getAvailableResources();
if (LOG.isDebugEnabled()) {
LOG.debug(constructPreemptionPeriodicLog(freeResources));
} else {
if (numHeartbeats % 50 == 1) {
LOG.info(constructPreemptionPeriodicLog(freeResources));
}
}
assert freeResources.getMemory() >= 0;
CookieContainerRequest highestPriRequest = null;
int numHighestPriRequests = 0;
for (CookieContainerRequest request : taskRequests.values()) {
if (highestPriRequest == null) {
highestPriRequest = request;
numHighestPriRequests = 1;
} else if (isHigherPriority(request.getPriority(), highestPriRequest.getPriority())) {
highestPriRequest = request;
numHighestPriRequests = 1;
} else if (request.getPriority().equals(highestPriRequest.getPriority())) {
numHighestPriRequests++;
}
}
if (highestPriRequest == null) {
// nothing pending
resetHighestWaitingPriority(null);
return true;
}
// reset the wait time when waiting priority changes to prevent carry over of the value
if (highestWaitingRequestPriority == null || !highestPriRequest.getPriority().equals(highestWaitingRequestPriority)) {
resetHighestWaitingPriority(highestPriRequest.getPriority());
}
long currTime = System.currentTimeMillis();
if (highestWaitingRequestWaitStartTime == 0) {
highestWaitingRequestWaitStartTime = currTime;
}
boolean preemptionWaitDeadlineCrossed = (currTime - highestWaitingRequestWaitStartTime) > preemptionMaxWaitTime ? true : false;
if (!preemptionWaitDeadlineCrossed && fitsIn(highestPriRequest.getCapability(), freeResources)) {
if (LOG.isDebugEnabled()) {
LOG.debug(highestPriRequest + " fits in free resources");
} else {
if (numHeartbeats % 50 == 1) {
LOG.info(highestPriRequest + " fits in free resources");
}
}
return true;
}
if (preemptionWaitDeadlineCrossed) {
// check if anything lower priority is running - priority inversion
// this check could have been done earlier but in the common case
// this would be unnecessary since there are usually requests pending
// in the normal case without priority inversion. So do this expensive
// iteration now
boolean lowerPriRunning = false;
for (Map.Entry<Object, Container> entry : taskAllocations.entrySet()) {
HeldContainer heldContainer = heldContainers.get(entry.getValue().getId());
CookieContainerRequest lastTaskInfo = heldContainer.getLastTaskInfo();
Priority taskPriority = lastTaskInfo.getPriority();
Object signature = lastTaskInfo.getCookie().getContainerSignature();
if (isHigherPriority(highestPriRequest.getPriority(), taskPriority)) {
// lower priority task is running
if (containerSignatureMatcher.isExactMatch(highestPriRequest.getCookie().getContainerSignature(), signature)) {
// exact match with different priorities
continue;
}
lowerPriRunning = true;
break;
}
}
if (!lowerPriRunning) {
// nothing lower priority running
// normal case of many pending request without priority inversion
resetHighestWaitingPriority(null);
return true;
}
LOG.info("Preemption deadline crossed at pri: " + highestPriRequest.getPriority() + " numRequests: " + numHighestPriRequests + ". " + constructPreemptionPeriodicLog(freeResources));
}
// highest priority request will not fit in existing free resources
// free up some more
// TODO this is subject to error wrt RM resource normalization
numPendingRequestsToService = scaleDownByPreemptionPercentage(numHighestPriRequests, preemptionPercentage);
if (numPendingRequestsToService < 1) {
// nothing to preempt - reset preemption last heartbeat
return true;
}
if (LOG.isDebugEnabled()) {
LOG.debug("Trying to service " + numPendingRequestsToService + " out of total " + numHighestPriRequests + " pending requests at pri: " + highestPriRequest.getPriority());
}
for (int i = 0; i < numPendingRequestsToService; ++i) {
// This request must have been considered for matching with all existing
// containers when request was made.
Container lowestPriNewContainer = null;
// containers
for (HeldContainer heldContainer : delayedContainerManager.delayedContainers) {
if (!heldContainer.isNew()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Reused container exists. Wait for assignment loop to release it. " + heldContainer.getContainer().getId());
}
return true;
}
if (heldContainer.geNumAssignmentAttempts() < 3) {
// we havent tried to assign this container at node/rack/ANY
if (LOG.isDebugEnabled()) {
LOG.debug("Brand new container. Wait for assignment loop to match it. " + heldContainer.getContainer().getId());
}
return true;
}
Container container = heldContainer.getContainer();
if (lowestPriNewContainer == null || isHigherPriority(lowestPriNewContainer.getPriority(), container.getPriority())) {
// there is a lower priority new container
lowestPriNewContainer = container;
}
}
if (lowestPriNewContainer != null) {
LOG.info("Preempting new container: " + lowestPriNewContainer.getId() + " with priority: " + lowestPriNewContainer.getPriority() + " to free resource for request: " + highestPriRequest + " . Current free resources: " + freeResources);
numPendingRequestsToService--;
releaseUnassignedContainers(Collections.singletonList(lowestPriNewContainer));
// We are returning an unused resource back the RM. The RM thinks it
// has serviced our initial request and will not re-allocate this back
// to us anymore. So we need to ask for this again. If there is no
// outstanding request at that priority then its fine to not ask again.
// See TEZ-915 for more details
maybeRescheduleContainerAtPriority(lowestPriNewContainer.getPriority());
// come back and free more new containers if needed
continue;
}
}
if (numPendingRequestsToService < 1) {
return true;
}
// invalid assumptions during testing
assert delayedContainerManager.delayedContainers.isEmpty();
if (!delayedContainerManager.delayedContainers.isEmpty()) {
LOG.warn("Expected delayed containers to be empty. " + constructPreemptionPeriodicLog(freeResources));
}
Priority preemptedTaskPriority = null;
int numEntriesAtPreemptedPriority = 0;
for (Map.Entry<Object, Container> entry : taskAllocations.entrySet()) {
HeldContainer heldContainer = heldContainers.get(entry.getValue().getId());
CookieContainerRequest lastTaskInfo = heldContainer.getLastTaskInfo();
Priority taskPriority = lastTaskInfo.getPriority();
Object signature = lastTaskInfo.getCookie().getContainerSignature();
if (!isHigherPriority(highestPriRequest.getPriority(), taskPriority)) {
// higher or same priority
continue;
}
if (containerSignatureMatcher.isExactMatch(highestPriRequest.getCookie().getContainerSignature(), signature)) {
// exact match with different priorities
continue;
}
if (preemptedTaskPriority == null || !isHigherPriority(taskPriority, preemptedTaskPriority)) {
// keep the lower priority
if (taskPriority.equals(preemptedTaskPriority)) {
numEntriesAtPreemptedPriority++;
} else {
// this is at a lower priority than existing
numEntriesAtPreemptedPriority = 1;
}
preemptedTaskPriority = taskPriority;
}
}
if (preemptedTaskPriority != null) {
int newNumPendingRequestsToService = scaleDownByPreemptionPercentage(Math.min(numEntriesAtPreemptedPriority, numHighestPriRequests), preemptionPercentage);
numPendingRequestsToService = Math.min(newNumPendingRequestsToService, numPendingRequestsToService);
if (numPendingRequestsToService < 1) {
return true;
}
// wait for enough heartbeats since this request became active for preemption
if ((numHeartbeats - heartbeatAtLastPreemption) < numHeartbeatsBetweenPreemptions) {
// stop incrementing lastpreemption heartbeat count
return false;
}
LOG.info("Trying to service " + numPendingRequestsToService + " out of total " + numHighestPriRequests + " pending requests at pri: " + highestPriRequest.getPriority() + " by preempting from " + numEntriesAtPreemptedPriority + " running tasks at priority: " + preemptedTaskPriority);
// found something to preempt. get others of the same priority
preemptedContainers = new ContainerId[numPendingRequestsToService];
int currIndex = 0;
for (Map.Entry<Object, Container> entry : taskAllocations.entrySet()) {
HeldContainer heldContainer = heldContainers.get(entry.getValue().getId());
CookieContainerRequest lastTaskInfo = heldContainer.getLastTaskInfo();
Priority taskPriority = lastTaskInfo.getPriority();
Container container = entry.getValue();
if (preemptedTaskPriority.equals(taskPriority)) {
// taskAllocations map will iterate from oldest to newest assigned containers
// keep the N newest containersIds with the matching priority
preemptedContainers[currIndex++ % numPendingRequestsToService] = container.getId();
}
}
// app client will be notified when after container is killed
// and we get its completed container status
}
}
// upcall outside locks
if (preemptedContainers != null) {
for (int i = 0; i < numPendingRequestsToService; ++i) {
ContainerId cId = preemptedContainers[i];
if (cId != null) {
LOG.info("Preempting container: " + cId + " currently allocated to a task.");
getContext().preemptContainer(cId);
}
}
}
return true;
}
use of org.apache.hadoop.yarn.api.records.Priority in project tez by apache.
the class YarnTaskSchedulerService method assignDelayedContainer.
/**
* Try to assign a re-used container
* @param heldContainer Container to be used to assign to tasks
* @return Assigned container map
*/
private synchronized Map<CookieContainerRequest, Container> assignDelayedContainer(HeldContainer heldContainer) {
AMState state = getContext().getAMState();
boolean isNew = heldContainer.isNew();
if (LOG.isDebugEnabled()) {
LOG.debug("Trying to assign a delayed container" + ", containerId=" + heldContainer.getContainer().getId() + ", nextScheduleTime=" + heldContainer.getNextScheduleTime() + ", containerExpiryTime=" + heldContainer.getContainerExpiryTime() + ", AMState=" + state + ", matchLevel=" + heldContainer.getLocalityMatchLevel() + ", taskRequestsCount=" + taskRequests.size() + ", heldContainers=" + heldContainers.size() + ", delayedContainers=" + delayedContainerManager.delayedContainers.size() + ", isNew=" + isNew);
}
if (state.equals(AMState.IDLE) || taskRequests.isEmpty()) {
// Compute min held containers.
if (getContext().isSession() && sessionNumMinHeldContainers > 0 && sessionMinHeldContainers.isEmpty()) {
// session mode and need to hold onto containers and not done so already
determineMinHeldContainers();
}
heldContainer.resetLocalityMatchLevel();
long currentTime = System.currentTimeMillis();
boolean releaseContainer = false;
if (isNew || (heldContainer.getContainerExpiryTime() <= currentTime && idleContainerTimeoutMin != -1)) {
// new container is possibly a spurious race condition allocation.
if (getContext().isSession() && sessionMinHeldContainers.contains(heldContainer.getContainer().getId())) {
// There are no outstanding requests. So its safe to hold new containers.
// We may have received more containers than necessary and some are unused
// In session mode and container in set of chosen min held containers
// increase the idle container expire time to maintain sanity with
// the rest of the code.
heldContainer.setContainerExpiryTime(getHeldContainerExpireTime(currentTime));
} else {
releaseContainer = true;
}
}
if (releaseContainer) {
LOG.info("No taskRequests. Container's idle timeout delay expired or is new. " + "Releasing container" + ", containerId=" + heldContainer.getContainer().getId() + ", containerExpiryTime=" + heldContainer.getContainerExpiryTime() + ", idleTimeout=" + idleContainerTimeoutMin + ", taskRequestsCount=" + taskRequests.size() + ", heldContainers=" + heldContainers.size() + ", delayedContainers=" + delayedContainerManager.delayedContainers.size() + ", isNew=" + isNew);
releaseUnassignedContainers(Collections.singletonList((heldContainer.getContainer())));
} else {
// no outstanding work and container idle timeout not expired
if (LOG.isDebugEnabled()) {
LOG.debug("Holding onto idle container with no work. CId: " + heldContainer.getContainer().getId() + " with expiry: " + heldContainer.getContainerExpiryTime() + " currentTime: " + currentTime + " next look: " + (currentTime + localitySchedulingDelay));
}
// put back and wait for new requests until expiry
heldContainer.resetLocalityMatchLevel();
delayedContainerManager.addDelayedContainer(heldContainer.getContainer(), currentTime + localitySchedulingDelay);
}
} else if (state.equals(AMState.RUNNING_APP)) {
// clear min held containers since we need to allocate to tasks
if (!sessionMinHeldContainers.isEmpty()) {
// update the expire time of min held containers so that they are
// not released immediately, when new requests come in, if they come in
// just before these containers are about to expire (race condition)
long currentTime = System.currentTimeMillis();
for (ContainerId minHeldCId : sessionMinHeldContainers) {
HeldContainer minHeldContainer = heldContainers.get(minHeldCId);
if (minHeldContainer != null) {
// check in case it got removed because of external reasons
minHeldContainer.setContainerExpiryTime(getHeldContainerExpireTime(currentTime));
}
}
sessionMinHeldContainers.clear();
}
HeldContainer.LocalityMatchLevel localityMatchLevel = heldContainer.getLocalityMatchLevel();
Map<CookieContainerRequest, Container> assignedContainers = new HashMap<CookieContainerRequest, Container>();
Container containerToAssign = heldContainer.container;
heldContainer.incrementAssignmentAttempts();
// always try node local matches for other levels
if (isNew || localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.NEW) || localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.NODE) || localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.RACK) || localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.NON_LOCAL)) {
assignReUsedContainerWithLocation(containerToAssign, NODE_LOCAL_ASSIGNER, assignedContainers, true);
if (LOG.isDebugEnabled() && assignedContainers.isEmpty()) {
LOG.debug("Failed to assign tasks to delayed container using node" + ", containerId=" + heldContainer.getContainer().getId());
}
}
// if scheduling delay is 0, match at RACK allowed without a sleep
if (assignedContainers.isEmpty()) {
if ((reuseRackLocal || isNew) && (localitySchedulingDelay == 0 || (localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.RACK) || localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.NON_LOCAL)))) {
assignReUsedContainerWithLocation(containerToAssign, RACK_LOCAL_ASSIGNER, assignedContainers, false);
if (LOG.isDebugEnabled() && assignedContainers.isEmpty()) {
LOG.debug("Failed to assign tasks to delayed container using rack" + ", containerId=" + heldContainer.getContainer().getId());
}
}
}
// if scheduling delay is 0, match at NON-LOCAL allowed without a sleep
if (assignedContainers.isEmpty()) {
if ((reuseNonLocal || isNew) && (localitySchedulingDelay == 0 || localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.NON_LOCAL))) {
assignReUsedContainerWithLocation(containerToAssign, NON_LOCAL_ASSIGNER, assignedContainers, false);
if (LOG.isDebugEnabled() && assignedContainers.isEmpty()) {
LOG.debug("Failed to assign tasks to delayed container using non-local" + ", containerId=" + heldContainer.getContainer().getId());
}
}
}
if (assignedContainers.isEmpty()) {
long currentTime = System.currentTimeMillis();
// get new containers from YARN to match the pending request
if (!isNew && heldContainer.getContainerExpiryTime() <= currentTime && idleContainerTimeoutMin != -1) {
LOG.info("Container's idle timeout expired. Releasing container" + ", containerId=" + heldContainer.container.getId() + ", containerExpiryTime=" + heldContainer.getContainerExpiryTime() + ", idleTimeoutMin=" + idleContainerTimeoutMin);
releaseUnassignedContainers(Lists.newArrayList(heldContainer.container));
} else {
// Let's decide if this container has hit the end of the road
// EOL true if container's match level is NON-LOCAL
boolean hitFinalMatchLevel = localityMatchLevel.equals(HeldContainer.LocalityMatchLevel.NON_LOCAL);
if (!hitFinalMatchLevel) {
// EOL also true if locality delay is 0
// or rack-local or non-local is disabled
heldContainer.incrementLocalityMatchLevel();
if (localitySchedulingDelay == 0 || (!reuseRackLocal || (!reuseNonLocal && heldContainer.getLocalityMatchLevel().equals(HeldContainer.LocalityMatchLevel.NON_LOCAL)))) {
hitFinalMatchLevel = true;
}
// be short-circuited
if (localitySchedulingDelay > 0 && isNew) {
hitFinalMatchLevel = false;
}
}
if (hitFinalMatchLevel) {
boolean safeToRelease = true;
Priority topPendingPriority = amRmClient.getTopPriority();
Priority containerPriority = heldContainer.container.getPriority();
if (isNew && topPendingPriority != null && containerPriority.compareTo(topPendingPriority) < 0) {
// this container is of lower priority and given to us by the RM for
// a task that will be matched after the current top priority. Keep
// this container for those pending tasks since the RM is not going
// to give this container to us again
safeToRelease = false;
}
// release if there are tasks or this is not a session
if (safeToRelease && (!taskRequests.isEmpty() || !getContext().isSession())) {
LOG.info("Releasing held container as either there are pending but " + " unmatched requests or this is not a session" + ", containerId=" + heldContainer.container.getId() + ", pendingTasks=" + taskRequests.size() + ", isSession=" + getContext().isSession() + ". isNew=" + isNew);
releaseUnassignedContainers(Lists.newArrayList(heldContainer.container));
} else {
// if no tasks, treat this like an idle session
heldContainer.resetLocalityMatchLevel();
delayedContainerManager.addDelayedContainer(heldContainer.getContainer(), currentTime + localitySchedulingDelay);
}
} else {
// Schedule delay container to match at a later try
delayedContainerManager.addDelayedContainer(heldContainer.getContainer(), currentTime + localitySchedulingDelay);
}
}
} else if (LOG.isDebugEnabled()) {
LOG.debug("Delayed container assignment successful" + ", containerId=" + heldContainer.getContainer().getId());
}
return assignedContainers;
} else {
// ignore all other cases?
LOG.warn("Received a request to assign re-used containers when AM was " + " in state: " + state + ". Ignoring request and releasing container" + ": " + heldContainer.getContainer().getId());
releaseUnassignedContainers(Lists.newArrayList(heldContainer.container));
}
return null;
}
use of org.apache.hadoop.yarn.api.records.Priority in project drill by apache.
the class ContainerRequestSpec method makeRequest.
/**
* Create a YARN ContainerRequest object from the information in this object.
*
* @return
*/
public ContainerRequest makeRequest() {
assert memoryMb != 0;
Priority priorityRec = Records.newRecord(Priority.class);
priorityRec.setPriority(priority);
Resource capability = Records.newRecord(Resource.class);
capability.setMemory(memoryMb);
capability.setVirtualCores(vCores);
DoYUtil.callSetDiskIfExists(capability, disks);
boolean relaxLocality = true;
String[] nodeArr = null;
if (!hosts.isEmpty()) {
nodeArr = new String[hosts.size()];
hosts.toArray(nodeArr);
relaxLocality = false;
}
String[] rackArr = null;
if (!racks.isEmpty()) {
nodeArr = new String[racks.size()];
racks.toArray(rackArr);
relaxLocality = false;
}
String nodeExpr = null;
if (!DoYUtil.isBlank(nodeLabelExpr)) {
nodeExpr = nodeLabelExpr;
LOG.info("Requesting a container using node expression: " + nodeExpr);
}
return new ContainerRequest(capability, nodeArr, rackArr, priorityRec, relaxLocality, nodeExpr);
}
use of org.apache.hadoop.yarn.api.records.Priority in project apex-core by apache.
the class InlineAM method run.
public boolean run() throws Exception {
LOG.info("Starting Client");
// Connect to ResourceManager
rmClient.start();
try {
// Get a new application id
YarnClientApplication newApp = rmClient.createApplication();
ApplicationId appId = newApp.getNewApplicationResponse().getApplicationId();
// Create launch context for app master
LOG.info("Setting up application submission context for ASM");
ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);
// set the application id
appContext.setApplicationId(appId);
// set the application name
appContext.setApplicationName(appName);
// Set the priority for the application master
Priority pri = Records.newRecord(Priority.class);
pri.setPriority(amPriority);
appContext.setPriority(pri);
// Set the queue to which this application is to be submitted in the RM
appContext.setQueue(amQueue);
// Set up the container launch context for the application master
ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
appContext.setAMContainerSpec(amContainer);
// unmanaged AM
appContext.setUnmanagedAM(true);
LOG.info("Setting unmanaged AM");
// Submit the application to the applications manager
LOG.info("Submitting application to ASM");
rmClient.submitApplication(appContext);
// Monitor the application to wait for launch state
ApplicationReport appReport = monitorApplication(appId, EnumSet.of(YarnApplicationState.ACCEPTED));
ApplicationAttemptId attemptId = appReport.getCurrentApplicationAttemptId();
LOG.info("Launching application with id: " + attemptId);
// launch AM
runAM(attemptId);
// Monitor the application for end state
appReport = monitorApplication(appId, EnumSet.of(YarnApplicationState.KILLED, YarnApplicationState.FAILED, YarnApplicationState.FINISHED));
YarnApplicationState appState = appReport.getYarnApplicationState();
FinalApplicationStatus appStatus = appReport.getFinalApplicationStatus();
LOG.info("App ended with state: " + appReport.getYarnApplicationState() + " and status: " + appStatus);
boolean success;
if (YarnApplicationState.FINISHED == appState && FinalApplicationStatus.SUCCEEDED == appStatus) {
LOG.info("Application has completed successfully.");
success = true;
} else {
LOG.info("Application did finished unsuccessfully." + " YarnState=" + appState.toString() + ", FinalStatus=" + appStatus.toString());
success = false;
}
return success;
} finally {
rmClient.stop();
}
}
use of org.apache.hadoop.yarn.api.records.Priority in project flink by apache.
the class YarnResourceManagerDriver method requestResource.
@Override
public CompletableFuture<YarnWorkerNode> requestResource(TaskExecutorProcessSpec taskExecutorProcessSpec) {
checkInitialized();
final CompletableFuture<YarnWorkerNode> requestResourceFuture = new CompletableFuture<>();
final Optional<TaskExecutorProcessSpecContainerResourcePriorityAdapter.PriorityAndResource> priorityAndResourceOpt = taskExecutorProcessSpecContainerResourcePriorityAdapter.getPriorityAndResource(taskExecutorProcessSpec);
if (!priorityAndResourceOpt.isPresent()) {
requestResourceFuture.completeExceptionally(new ResourceManagerException(String.format("Could not compute the container Resource from the given TaskExecutorProcessSpec %s. " + "This usually indicates the requested resource is larger than Yarn's max container resource limit.", taskExecutorProcessSpec)));
} else {
final Priority priority = priorityAndResourceOpt.get().getPriority();
final Resource resource = priorityAndResourceOpt.get().getResource();
resourceManagerClient.addContainerRequest(ContainerRequestReflector.INSTANCE.getContainerRequest(resource, priority, taskManagerNodeLabel));
// make sure we transmit the request fast and receive fast news of granted allocations
resourceManagerClient.setHeartbeatInterval(containerRequestHeartbeatIntervalMillis);
requestResourceFutures.computeIfAbsent(taskExecutorProcessSpec, ignore -> new LinkedList<>()).add(requestResourceFuture);
log.info("Requesting new TaskExecutor container with resource {}, priority {}.", taskExecutorProcessSpec, priority);
}
return requestResourceFuture;
}
Aggregations