Search in sources :

Example 21 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class ContainerUpdateContext method checkAndAddToOutstandingIncreases.

/**
   * Add the container to outstanding increases.
   * @param rmContainer RMContainer.
   * @param schedulerNode SchedulerNode.
   * @param updateRequest UpdateContainerRequest.
   * @return true if updated to outstanding increases was successful.
   */
public synchronized boolean checkAndAddToOutstandingIncreases(RMContainer rmContainer, SchedulerNode schedulerNode, UpdateContainerRequest updateRequest) {
    Container container = rmContainer.getContainer();
    SchedulerRequestKey schedulerKey = SchedulerRequestKey.create(updateRequest, rmContainer.getAllocatedSchedulerKey());
    Map<Resource, Map<NodeId, Set<ContainerId>>> resourceMap = outstandingIncreases.get(schedulerKey);
    if (resourceMap == null) {
        resourceMap = new HashMap<>();
        outstandingIncreases.put(schedulerKey, resourceMap);
    } else {
        // Updating Resource for and existing increase container
        if (ContainerUpdateType.INCREASE_RESOURCE == updateRequest.getContainerUpdateType()) {
            cancelPreviousRequest(schedulerNode, schedulerKey);
        } else {
            return false;
        }
    }
    Resource resToIncrease = getResourceToIncrease(updateRequest, rmContainer);
    Map<NodeId, Set<ContainerId>> locationMap = resourceMap.get(resToIncrease);
    if (locationMap == null) {
        locationMap = new HashMap<>();
        resourceMap.put(resToIncrease, locationMap);
    }
    Set<ContainerId> containerIds = locationMap.get(container.getNodeId());
    if (containerIds == null) {
        containerIds = new HashSet<>();
        locationMap.put(container.getNodeId(), containerIds);
    }
    if (outstandingDecreases.containsKey(container.getId())) {
        return false;
    }
    containerIds.add(container.getId());
    if (!Resources.isNone(resToIncrease)) {
        Map<SchedulerRequestKey, Map<String, ResourceRequest>> updateResReqs = new HashMap<>();
        Map<String, ResourceRequest> resMap = createResourceRequests(rmContainer, schedulerNode, schedulerKey, resToIncrease);
        updateResReqs.put(schedulerKey, resMap);
        appSchedulingInfo.addToPlacementSets(false, updateResReqs);
    }
    return true;
}
Also used : SchedulingPlacementSet(org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.SchedulingPlacementSet) Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) Resource(org.apache.hadoop.yarn.api.records.Resource) SchedulerRequestKey(org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) NodeId(org.apache.hadoop.yarn.api.records.NodeId) ResourceRequest(org.apache.hadoop.yarn.api.records.ResourceRequest) HashMap(java.util.HashMap) Map(java.util.Map)

Example 22 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class LeafQueue method completedContainer.

@Override
public void completedContainer(Resource clusterResource, FiCaSchedulerApp application, FiCaSchedulerNode node, RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event, CSQueue childQueue, boolean sortQueues) {
    // Update SchedulerHealth for released / preempted container
    updateSchedulerHealthForCompletedContainer(rmContainer, containerStatus);
    if (application != null) {
        boolean removed = false;
        // Careful! Locking order is important!
        try {
            writeLock.lock();
            Container container = rmContainer.getContainer();
            // So, this is, in effect, a transaction across application & node
            if (rmContainer.getState() == RMContainerState.RESERVED) {
                removed = application.unreserve(rmContainer.getReservedSchedulerKey(), node, rmContainer);
            } else {
                removed = application.containerCompleted(rmContainer, containerStatus, event, node.getPartition());
                node.releaseContainer(rmContainer.getContainerId(), false);
            }
            // Book-keeping
            if (removed) {
                // Inform the ordering policy
                orderingPolicy.containerReleased(application, rmContainer);
                releaseResource(clusterResource, application, container.getResource(), node.getPartition(), rmContainer);
            }
        } finally {
            writeLock.unlock();
        }
        if (removed) {
            // Inform the parent queue _outside_ of the leaf-queue lock
            getParent().completedContainer(clusterResource, application, node, rmContainer, null, event, this, sortQueues);
        }
    }
    // Notify PreemptionManager
    csContext.getPreemptionManager().removeKillableContainer(new KillableContainer(rmContainer, node.getPartition(), queueName));
}
Also used : KillableContainer(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.KillableContainer) SchedulerContainer(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.SchedulerContainer) KillableContainer(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.KillableContainer) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container)

Example 23 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class CapacityScheduler method doneApplicationAttempt.

private void doneApplicationAttempt(ApplicationAttemptId applicationAttemptId, RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers) {
    try {
        writeLock.lock();
        LOG.info("Application Attempt " + applicationAttemptId + " is done." + " finalState=" + rmAppAttemptFinalState);
        FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId);
        SchedulerApplication<FiCaSchedulerApp> application = applications.get(applicationAttemptId.getApplicationId());
        if (application == null || attempt == null) {
            LOG.info("Unknown application " + applicationAttemptId + " has completed!");
            return;
        }
        // Release all the allocated, acquired, running containers
        for (RMContainer rmContainer : attempt.getLiveContainers()) {
            if (keepContainers && rmContainer.getState().equals(RMContainerState.RUNNING)) {
                // do not kill the running container in the case of work-preserving AM
                // restart.
                LOG.info("Skip killing " + rmContainer.getContainerId());
                continue;
            }
            super.completedContainer(rmContainer, SchedulerUtils.createAbnormalContainerStatus(rmContainer.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION), RMContainerEventType.KILL);
        }
        // Release all reserved containers
        for (RMContainer rmContainer : attempt.getReservedContainers()) {
            super.completedContainer(rmContainer, SchedulerUtils.createAbnormalContainerStatus(rmContainer.getContainerId(), "Application Complete"), RMContainerEventType.KILL);
        }
        // Clean up pending requests, metrics etc.
        attempt.stop(rmAppAttemptFinalState);
        // Inform the queue
        String queueName = attempt.getQueue().getQueueName();
        CSQueue queue = this.getQueue(queueName);
        if (!(queue instanceof LeafQueue)) {
            LOG.error("Cannot finish application " + "from non-leaf queue: " + queueName);
        } else {
            queue.finishApplicationAttempt(attempt, queue.getQueueName());
        }
    } finally {
        writeLock.unlock();
    }
}
Also used : FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)

Example 24 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class CapacityScheduler method completedContainerInternal.

@Override
protected void completedContainerInternal(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) {
    Container container = rmContainer.getContainer();
    ContainerId containerId = container.getId();
    // Get the application for the finished container
    FiCaSchedulerApp application = getCurrentAttemptForContainer(container.getId());
    ApplicationId appId = containerId.getApplicationAttemptId().getApplicationId();
    if (application == null) {
        LOG.info("Container " + container + " of" + " finished application " + appId + " completed with event " + event);
        return;
    }
    // Get the node on which the container was allocated
    FiCaSchedulerNode node = getNode(container.getNodeId());
    if (null == node) {
        LOG.info("Container " + container + " of" + " removed node " + container.getNodeId() + " completed with event " + event);
        return;
    }
    // Inform the queue
    LeafQueue queue = (LeafQueue) application.getQueue();
    queue.completedContainer(getClusterResource(), application, node, rmContainer, containerStatus, event, null, true);
}
Also used : RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer) Container(org.apache.hadoop.yarn.api.records.Container) SchedulerContainer(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.SchedulerContainer) KillableContainer(org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.KillableContainer) FiCaSchedulerNode(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode) ContainerId(org.apache.hadoop.yarn.api.records.ContainerId) FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) ApplicationId(org.apache.hadoop.yarn.api.records.ApplicationId)

Example 25 with RMContainer

use of org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer in project hadoop by apache.

the class CapacityScheduler method allocateContainerOnSingleNode.

/*
   * Logics of allocate container on a single node (Old behavior)
   */
private CSAssignment allocateContainerOnSingleNode(PlacementSet<FiCaSchedulerNode> ps, FiCaSchedulerNode node, boolean withNodeHeartbeat) {
    // driven by node heartbeat works.
    if (getNode(node.getNodeID()) != node) {
        LOG.error("Trying to schedule on a removed node, please double check.");
        return null;
    }
    CSAssignment assignment;
    // Assign new containers...
    // 1. Check for reserved applications
    // 2. Schedule if there are no reservations
    RMContainer reservedContainer = node.getReservedContainer();
    if (reservedContainer != null) {
        FiCaSchedulerApp reservedApplication = getCurrentAttemptForContainer(reservedContainer.getContainerId());
        // Try to fulfill the reservation
        LOG.info("Trying to fulfill reservation for application " + reservedApplication.getApplicationId() + " on node: " + node.getNodeID());
        LeafQueue queue = ((LeafQueue) reservedApplication.getQueue());
        assignment = queue.assignContainers(getClusterResource(), ps, // resources, should consider labeled resources as well.
        new ResourceLimits(labelManager.getResourceByLabel(RMNodeLabelsManager.NO_LABEL, getClusterResource())), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
        if (assignment.isFulfilledReservation()) {
            if (withNodeHeartbeat) {
                // Only update SchedulerHealth in sync scheduling, existing
                // Data structure of SchedulerHealth need to be updated for
                // Async mode
                updateSchedulerHealth(lastNodeUpdateTime, node.getNodeID(), assignment);
            }
            schedulerHealth.updateSchedulerFulfilledReservationCounts(1);
            ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, queue.getParent().getQueueName(), queue.getQueueName(), ActivityState.ACCEPTED, ActivityDiagnosticConstant.EMPTY);
            ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, node, reservedContainer.getContainerId(), AllocationState.ALLOCATED_FROM_RESERVED);
        } else {
            ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, queue.getParent().getQueueName(), queue.getQueueName(), ActivityState.ACCEPTED, ActivityDiagnosticConstant.EMPTY);
            ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, node, reservedContainer.getContainerId(), AllocationState.SKIPPED);
        }
        assignment.setSchedulingMode(SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
        submitResourceCommitRequest(getClusterResource(), assignment);
    }
    // Do not schedule if there are any reservations to fulfill on the node
    if (node.getReservedContainer() != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Skipping scheduling since node " + node.getNodeID() + " is reserved by application " + node.getReservedContainer().getContainerId().getApplicationAttemptId());
        }
        return null;
    }
    // has any available or killable resource
    if (calculator.computeAvailableContainers(Resources.add(node.getUnallocatedResource(), node.getTotalKillableResources()), minimumAllocation) <= 0) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("This node or this node partition doesn't have available or" + "killable resource");
        }
        return null;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Trying to schedule on node: " + node.getNodeName() + ", available: " + node.getUnallocatedResource());
    }
    return allocateOrReserveNewContainers(ps, withNodeHeartbeat);
}
Also used : FiCaSchedulerApp(org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp) ResourceLimits(org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits) RMContainer(org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)

Aggregations

RMContainer (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer)166 ContainerId (org.apache.hadoop.yarn.api.records.ContainerId)55 Resource (org.apache.hadoop.yarn.api.records.Resource)49 Container (org.apache.hadoop.yarn.api.records.Container)48 Test (org.junit.Test)45 ApplicationAttemptId (org.apache.hadoop.yarn.api.records.ApplicationAttemptId)41 ArrayList (java.util.ArrayList)29 NodeId (org.apache.hadoop.yarn.api.records.NodeId)29 FiCaSchedulerApp (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp)29 RMApp (org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp)28 FiCaSchedulerNode (org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode)21 RMContainerImpl (org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl)18 HashMap (java.util.HashMap)17 ResourceRequest (org.apache.hadoop.yarn.api.records.ResourceRequest)17 RMNode (org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode)17 NodeUpdateSchedulerEvent (org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent)17 ApplicationId (org.apache.hadoop.yarn.api.records.ApplicationId)15 Priority (org.apache.hadoop.yarn.api.records.Priority)14 MockRM (org.apache.hadoop.yarn.server.resourcemanager.MockRM)13 YarnConfiguration (org.apache.hadoop.yarn.conf.YarnConfiguration)12