Search in sources :

Example 6 with Resource

use of org.apache.hadoop.yarn.api.records.Resource in project hadoop by apache.

the class RMContainerRequestor method applyRequestLimits.

private void applyRequestLimits() {
    Iterator<ResourceRequest> iter = requestLimits.values().iterator();
    while (iter.hasNext()) {
        ResourceRequest reqLimit = iter.next();
        int limit = reqLimit.getNumContainers();
        Map<String, Map<Resource, ResourceRequest>> remoteRequests = remoteRequestsTable.get(reqLimit.getPriority());
        Map<Resource, ResourceRequest> reqMap = (remoteRequests != null) ? remoteRequests.get(ResourceRequest.ANY) : null;
        ResourceRequest req = (reqMap != null) ? reqMap.get(reqLimit.getCapability()) : null;
        if (req == null) {
            continue;
        }
        // update an existing ask or send a new one if updating
        if (ask.remove(req) || requestLimitsToUpdate.contains(req)) {
            ResourceRequest newReq = req.getNumContainers() > limit ? reqLimit : req;
            ask.add(newReq);
            LOG.info("Applying ask limit of " + newReq.getNumContainers() + " for priority:" + reqLimit.getPriority() + " and capability:" + reqLimit.getCapability());
        }
        if (limit == Integer.MAX_VALUE) {
            iter.remove();
        }
    }
    requestLimitsToUpdate.clear();
}
Also used : Resource(org.apache.hadoop.yarn.api.records.Resource) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) TreeMap(java.util.TreeMap)

Example 7 with Resource

use of org.apache.hadoop.yarn.api.records.Resource in project hadoop by apache.

the class CheckpointAMPreemptionPolicy method preempt.

@Override
public void preempt(Context ctxt, PreemptionMessage preemptionRequests) {
    if (preemptionRequests != null) {
        // handling non-negotiable preemption
        StrictPreemptionContract cStrict = preemptionRequests.getStrictContract();
        if (cStrict != null && cStrict.getContainers() != null && cStrict.getContainers().size() > 0) {
            LOG.info("strict preemption :" + preemptionRequests.getStrictContract().getContainers().size() + " containers to kill");
            // handle strict preemptions. These containers are non-negotiable
            for (PreemptionContainer c : preemptionRequests.getStrictContract().getContainers()) {
                ContainerId reqCont = c.getId();
                TaskAttemptId reqTask = ctxt.getTaskAttempt(reqCont);
                if (reqTask != null) {
                    // ignore requests for preempting containers running maps
                    if (org.apache.hadoop.mapreduce.v2.api.records.TaskType.REDUCE.equals(reqTask.getTaskId().getTaskType())) {
                        toBePreempted.add(reqTask);
                        LOG.info("preempting " + reqCont + " running task:" + reqTask);
                    } else {
                        LOG.info("NOT preempting " + reqCont + " running task:" + reqTask);
                    }
                }
            }
        }
        // handling negotiable preemption
        PreemptionContract cNegot = preemptionRequests.getContract();
        if (cNegot != null && cNegot.getResourceRequest() != null && cNegot.getResourceRequest().size() > 0 && cNegot.getContainers() != null && cNegot.getContainers().size() > 0) {
            LOG.info("negotiable preemption :" + preemptionRequests.getContract().getResourceRequest().size() + " resourceReq, " + preemptionRequests.getContract().getContainers().size() + " containers");
            // handle fungible preemption. Here we only look at the total amount of
            // resources to be preempted and pick enough of our containers to
            // satisfy that. We only support checkpointing for reducers for now.
            List<PreemptionResourceRequest> reqResources = preemptionRequests.getContract().getResourceRequest();
            // compute the total amount of pending preemptions (to be discounted
            // from current request)
            int pendingPreemptionRam = 0;
            int pendingPreemptionCores = 0;
            for (Resource r : pendingFlexiblePreemptions.values()) {
                pendingPreemptionRam += r.getMemorySize();
                pendingPreemptionCores += r.getVirtualCores();
            }
            // discount preemption request based on currently pending preemption
            for (PreemptionResourceRequest rr : reqResources) {
                ResourceRequest reqRsrc = rr.getResourceRequest();
                if (!ResourceRequest.ANY.equals(reqRsrc.getResourceName())) {
                    // For now, only respond to aggregate requests and ignore locality
                    continue;
                }
                LOG.info("ResourceRequest:" + reqRsrc);
                int reqCont = reqRsrc.getNumContainers();
                long reqMem = reqRsrc.getCapability().getMemorySize();
                long totalMemoryToRelease = reqCont * reqMem;
                int reqCores = reqRsrc.getCapability().getVirtualCores();
                int totalCoresToRelease = reqCont * reqCores;
                // remove
                if (pendingPreemptionRam > 0) {
                    // if goes negative we simply exit
                    totalMemoryToRelease -= pendingPreemptionRam;
                    // decrement pending resources if zero or negatve we will
                    // ignore it while processing next PreemptionResourceRequest
                    pendingPreemptionRam -= totalMemoryToRelease;
                }
                if (pendingPreemptionCores > 0) {
                    totalCoresToRelease -= pendingPreemptionCores;
                    pendingPreemptionCores -= totalCoresToRelease;
                }
                // reverse order of allocation (for now)
                List<Container> listOfCont = ctxt.getContainers(TaskType.REDUCE);
                Collections.sort(listOfCont, new Comparator<Container>() {

                    @Override
                    public int compare(final Container o1, final Container o2) {
                        return o2.getId().compareTo(o1.getId());
                    }
                });
                // preempt reducers first
                for (Container cont : listOfCont) {
                    if (totalMemoryToRelease <= 0 && totalCoresToRelease <= 0) {
                        break;
                    }
                    TaskAttemptId reduceId = ctxt.getTaskAttempt(cont.getId());
                    int cMem = (int) cont.getResource().getMemorySize();
                    int cCores = cont.getResource().getVirtualCores();
                    if (!toBePreempted.contains(reduceId)) {
                        totalMemoryToRelease -= cMem;
                        totalCoresToRelease -= cCores;
                        toBePreempted.add(reduceId);
                        pendingFlexiblePreemptions.put(reduceId, cont.getResource());
                    }
                    LOG.info("ResourceRequest:" + reqRsrc + " satisfied preempting " + reduceId);
                }
            // if map was preemptable we would do add them to toBePreempted here
            }
        }
    }
}
Also used : TaskAttemptId(org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId) Resource(org.apache.hadoop.yarn.api.records.Resource) PreemptionContainer(org.apache.hadoop.yarn.api.records.PreemptionContainer) PreemptionResourceRequest(org.apache.hadoop.yarn.api.records.PreemptionResourceRequest) StrictPreemptionContract(org.apache.hadoop.yarn.api.records.StrictPreemptionContract) PreemptionContainer(org.apache.hadoop.yarn.api.records.PreemptionContainer) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) PreemptionResourceRequest(org.apache.hadoop.yarn.api.records.PreemptionResourceRequest) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) PreemptionContract(org.apache.hadoop.yarn.api.records.PreemptionContract) StrictPreemptionContract(org.apache.hadoop.yarn.api.records.StrictPreemptionContract)

Example 8 with Resource

use of org.apache.hadoop.yarn.api.records.Resource in project hadoop by apache.

the class RMContainerAllocator method handleMapContainerRequest.

@SuppressWarnings({ "unchecked" })
private void handleMapContainerRequest(ContainerRequestEvent reqEvent) {
    assert (reqEvent.getAttemptID().getTaskId().getTaskType().equals(TaskType.MAP));
    Resource supportedMaxContainerCapability = getMaxContainerCapability();
    JobId jobId = getJob().getID();
    if (mapResourceRequest.equals(Resources.none())) {
        mapResourceRequest = reqEvent.getCapability();
        eventHandler.handle(new JobHistoryEvent(jobId, new NormalizedResourceEvent(org.apache.hadoop.mapreduce.TaskType.MAP, mapResourceRequest.getMemorySize())));
        LOG.info("mapResourceRequest:" + mapResourceRequest);
    }
    boolean mapContainerRequestAccepted = true;
    if (mapResourceRequest.getMemorySize() > supportedMaxContainerCapability.getMemorySize() || mapResourceRequest.getVirtualCores() > supportedMaxContainerCapability.getVirtualCores()) {
        mapContainerRequestAccepted = false;
    }
    if (mapContainerRequestAccepted) {
        // set the resources
        reqEvent.getCapability().setMemorySize(mapResourceRequest.getMemorySize());
        reqEvent.getCapability().setVirtualCores(mapResourceRequest.getVirtualCores());
        //maps are immediately scheduled
        scheduledRequests.addMap(reqEvent);
    } else {
        String diagMsg = "The required MAP capability is more than the " + "supported max container capability in the cluster. Killing" + " the Job. mapResourceRequest: " + mapResourceRequest + " maxContainerCapability:" + supportedMaxContainerCapability;
        LOG.info(diagMsg);
        eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
        eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
    }
}
Also used : NormalizedResourceEvent(org.apache.hadoop.mapreduce.jobhistory.NormalizedResourceEvent) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) Resource(org.apache.hadoop.yarn.api.records.Resource) JobHistoryEvent(org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent) JobDiagnosticsUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId)

Example 9 with Resource

use of org.apache.hadoop.yarn.api.records.Resource in project hadoop by apache.

the class RMContainerAllocator method scheduleReduces.

@Private
public void scheduleReduces(int totalMaps, int completedMaps, int scheduledMaps, int scheduledReduces, int assignedMaps, int assignedReduces, Resource mapResourceReqt, Resource reduceResourceReqt, int numPendingReduces, float maxReduceRampupLimit, float reduceSlowStart) {
    if (numPendingReduces == 0) {
        return;
    }
    // get available resources for this job
    Resource headRoom = getAvailableResources();
    LOG.info("Recalculating schedule, headroom=" + headRoom);
    //check for slow start
    if (!getIsReduceStarted()) {
        //not set yet
        int completedMapsForReduceSlowstart = (int) Math.ceil(reduceSlowStart * totalMaps);
        if (completedMaps < completedMapsForReduceSlowstart) {
            LOG.info("Reduce slow start threshold not met. " + "completedMapsForReduceSlowstart " + completedMapsForReduceSlowstart);
            return;
        } else {
            LOG.info("Reduce slow start threshold reached. Scheduling reduces.");
            setIsReduceStarted(true);
        }
    }
    //headroom
    if (scheduledMaps == 0 && numPendingReduces > 0) {
        LOG.info("All maps assigned. " + "Ramping up all remaining reduces:" + numPendingReduces);
        scheduleAllReduces();
        return;
    }
    float completedMapPercent = 0f;
    if (totalMaps != 0) {
        //support for 0 maps
        completedMapPercent = (float) completedMaps / totalMaps;
    } else {
        completedMapPercent = 1;
    }
    Resource netScheduledMapResource = Resources.multiply(mapResourceReqt, (scheduledMaps + assignedMaps));
    Resource netScheduledReduceResource = Resources.multiply(reduceResourceReqt, (scheduledReduces + assignedReduces));
    Resource finalMapResourceLimit;
    Resource finalReduceResourceLimit;
    // ramp up the reduces based on completed map percentage
    Resource totalResourceLimit = getResourceLimit();
    Resource idealReduceResourceLimit = Resources.multiply(totalResourceLimit, Math.min(completedMapPercent, maxReduceRampupLimit));
    Resource ideaMapResourceLimit = Resources.subtract(totalResourceLimit, idealReduceResourceLimit);
    // dimension
    if (ResourceCalculatorUtils.computeAvailableContainers(ideaMapResourceLimit, mapResourceReqt, getSchedulerResourceTypes()) >= (scheduledMaps + assignedMaps)) {
        // enough resource given to maps, given the remaining to reduces
        Resource unusedMapResourceLimit = Resources.subtract(ideaMapResourceLimit, netScheduledMapResource);
        finalReduceResourceLimit = Resources.add(idealReduceResourceLimit, unusedMapResourceLimit);
        finalMapResourceLimit = Resources.subtract(totalResourceLimit, finalReduceResourceLimit);
    } else {
        finalMapResourceLimit = ideaMapResourceLimit;
        finalReduceResourceLimit = idealReduceResourceLimit;
    }
    LOG.info("completedMapPercent " + completedMapPercent + " totalResourceLimit:" + totalResourceLimit + " finalMapResourceLimit:" + finalMapResourceLimit + " finalReduceResourceLimit:" + finalReduceResourceLimit + " netScheduledMapResource:" + netScheduledMapResource + " netScheduledReduceResource:" + netScheduledReduceResource);
    int rampUp = ResourceCalculatorUtils.computeAvailableContainers(Resources.subtract(finalReduceResourceLimit, netScheduledReduceResource), reduceResourceReqt, getSchedulerResourceTypes());
    if (rampUp > 0) {
        rampUp = Math.min(rampUp, numPendingReduces);
        LOG.info("Ramping up " + rampUp);
        rampUpReduces(rampUp);
    } else if (rampUp < 0) {
        int rampDown = -1 * rampUp;
        rampDown = Math.min(rampDown, scheduledReduces);
        LOG.info("Ramping down " + rampDown);
        rampDownReduces(rampDown);
    }
}
Also used : Resource(org.apache.hadoop.yarn.api.records.Resource) Private(org.apache.hadoop.classification.InterfaceAudience.Private)

Example 10 with Resource

use of org.apache.hadoop.yarn.api.records.Resource in project hadoop by apache.

the class RMContainerAllocator method getResources.

@SuppressWarnings("unchecked")
private List<Container> getResources() throws Exception {
    applyConcurrentTaskLimits();
    // will be null the first time
    Resource headRoom = Resources.clone(getAvailableResources());
    AllocateResponse response;
    /*
     * If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
     * milliseconds before aborting. During this interval, AM will still try
     * to contact the RM.
     */
    try {
        response = makeRemoteRequest();
        // Reset retry count if no exception occurred.
        retrystartTime = System.currentTimeMillis();
    } catch (ApplicationAttemptNotFoundException e) {
        // This can happen if the RM has been restarted. If it is in that state,
        // this application must clean itself up.
        eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
        throw new RMContainerAllocationException("Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationAttemptId(), e);
    } catch (ApplicationMasterNotRegisteredException e) {
        LOG.info("ApplicationMaster is out of sync with ResourceManager," + " hence resync and send outstanding requests.");
        // RM may have restarted, re-register with RM.
        lastResponseID = 0;
        register();
        addOutstandingRequestOnResync();
        return null;
    } catch (InvalidLabelResourceRequestException e) {
        // If Invalid label exception is received means the requested label doesnt
        // have access so killing job in this case.
        String diagMsg = "Requested node-label-expression is invalid: " + StringUtils.stringifyException(e);
        LOG.info(diagMsg);
        JobId jobId = this.getJob().getID();
        eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
        eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
        throw e;
    } catch (Exception e) {
        // re-trying until the retryInterval has expired.
        if (System.currentTimeMillis() - retrystartTime >= retryInterval) {
            LOG.error("Could not contact RM after " + retryInterval + " milliseconds.");
            eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
            throw new RMContainerAllocationException("Could not contact RM after " + retryInterval + " milliseconds.");
        }
        // continue to attempt to contact the RM.
        throw e;
    }
    Resource newHeadRoom = getAvailableResources();
    List<Container> newContainers = response.getAllocatedContainers();
    // Setting NMTokens
    if (response.getNMTokens() != null) {
        for (NMToken nmToken : response.getNMTokens()) {
            NMTokenCache.setNMToken(nmToken.getNodeId().toString(), nmToken.getToken());
        }
    }
    // Setting AMRMToken
    if (response.getAMRMToken() != null) {
        updateAMRMToken(response.getAMRMToken());
    }
    List<ContainerStatus> finishedContainers = response.getCompletedContainersStatuses();
    // propagate preemption requests
    final PreemptionMessage preemptReq = response.getPreemptionMessage();
    if (preemptReq != null) {
        preemptionPolicy.preempt(new PreemptionContext(assignedRequests), preemptReq);
    }
    if (newContainers.size() + finishedContainers.size() > 0 || !headRoom.equals(newHeadRoom)) {
        //something changed
        recalculateReduceSchedule = true;
        if (LOG.isDebugEnabled() && !headRoom.equals(newHeadRoom)) {
            LOG.debug("headroom=" + newHeadRoom);
        }
    }
    if (LOG.isDebugEnabled()) {
        for (Container cont : newContainers) {
            LOG.debug("Received new Container :" + cont);
        }
    }
    //Called on each allocation. Will know about newly blacklisted/added hosts.
    computeIgnoreBlacklisting();
    handleUpdatedNodes(response);
    handleJobPriorityChange(response);
    // handle receiving the timeline collector address for this app
    String collectorAddr = response.getCollectorAddr();
    MRAppMaster.RunningAppContext appContext = (MRAppMaster.RunningAppContext) this.getContext();
    if (collectorAddr != null && !collectorAddr.isEmpty() && appContext.getTimelineV2Client() != null) {
        appContext.getTimelineV2Client().setTimelineServiceAddress(response.getCollectorAddr());
    }
    for (ContainerStatus cont : finishedContainers) {
        processFinishedContainer(cont);
    }
    return newContainers;
}
Also used : PreemptionMessage(org.apache.hadoop.yarn.api.records.PreemptionMessage) MRAppMaster(org.apache.hadoop.mapreduce.v2.app.MRAppMaster) NMToken(org.apache.hadoop.yarn.api.records.NMToken) Resource(org.apache.hadoop.yarn.api.records.Resource) JobDiagnosticsUpdateEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent) ApplicationMasterNotRegisteredException(org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException) InvalidLabelResourceRequestException(org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException) IOException(java.io.IOException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) YarnRuntimeException(org.apache.hadoop.yarn.exceptions.YarnRuntimeException) ApplicationAttemptNotFoundException(org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException) AllocateResponse(org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse) ApplicationMasterNotRegisteredException(org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException) Container(org.apache.hadoop.yarn.api.records.Container) ContainerStatus(org.apache.hadoop.yarn.api.records.ContainerStatus) JobEvent(org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent) InvalidLabelResourceRequestException(org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException) JobId(org.apache.hadoop.mapreduce.v2.api.records.JobId)

Aggregations

Resource (org.apache.hadoop.yarn.api.records.Resource)498 Test (org.junit.Test)190 NodeId (org.apache.hadoop.yarn.api.records.NodeId)89 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)82 Priority (org.apache.hadoop.yarn.api.records.Priority)79 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)67 HashMap (java.util.HashMap)62 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)56 RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)55 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)53 ArrayList (java.util.ArrayList)49 ResourceLimits (org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits)48 FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)45 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)43 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)42 Container (org.apache.hadoop.yarn.api.records.Container)41 Configuration (org.apache.hadoop.conf.Configuration)34 LocalResource (org.apache.hadoop.yarn.api.records.LocalResource)32 IOException (java.io.IOException)31 Map (java.util.Map)29