Search in sources :

Example 1 with ObjectResourcesItem

use of org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem in project storm by apache.

the class NodeSorter method createClusterSummarizedResources.

private ObjectResourcesSummary createClusterSummarizedResources() {
    ObjectResourcesSummary clusterResourcesSummary = new ObjectResourcesSummary("Cluster");
    // This is the first time so initialize the resources.
    for (Map.Entry<String, List<String>> entry : networkTopography.entrySet()) {
        String rackId = entry.getKey();
        List<String> nodeHosts = entry.getValue();
        ObjectResourcesItem rack = new ObjectResourcesItem(rackId);
        for (String nodeHost : nodeHosts) {
            for (RasNode node : hostnameToNodes(nodeHost)) {
                rack.availableResources.add(node.getTotalAvailableResources());
                rack.totalResources.add(node.getTotalAvailableResources());
            }
        }
        clusterResourcesSummary.addObjectResourcesItem(rack);
    }
    LOG.debug("Cluster Overall Avail [ {} ] Total [ {} ]", clusterResourcesSummary.getAvailableResourcesOverall(), clusterResourcesSummary.getTotalResourcesOverall());
    return clusterResourcesSummary;
}
Also used : ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem) RasNode(org.apache.storm.scheduler.resource.RasNode) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with ObjectResourcesItem

use of org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem in project storm by apache.

the class NodeSorter method sortObjectResourcesCommon.

/**
 * Sort objects by the following three criteria.
 *
 * <li>
 *     The number executors of the topology that needs to be scheduled is already on the object (node or rack)
 *     in descending order. The reasoning to sort based on criterion 1 is so we schedule the rest of a topology on
 *     the same object (node or rack) as the existing executors of the topology.
 * </li>
 *
 * <li>
 *     The subordinate/subservient resource availability percentage of a rack in descending order We calculate the
 *     resource availability percentage by dividing the resource availability of the object (node or rack) by the
 *     resource availability of the entire rack or cluster depending on if object references a node or a rack.
 *     How this differs from the DefaultResourceAwareStrategy is that the percentage boosts the node or rack if it is
 *     requested by the executor that the sorting is being done for and pulls it down if it is not.
 *     By doing this calculation, objects (node or rack) that have exhausted or little of one of the resources mentioned
 *     above will be ranked after racks that have more balanced resource availability and nodes or racks that have
 *     resources that are not requested will be ranked below . So we will be less likely to pick a rack that
 *     have a lot of one resource but a low amount of another and have a lot of resources that are not requested by the executor.
 *     This is similar to logic used {@link #sortObjectResourcesGeneric(ObjectResourcesSummary, ExecutorDetails, ExistingScheduleFunc)}.
 * </li>
 *
 * <li>
 *     The tie between two nodes with same resource availability is broken by using the node with lower minimum
 *     percentage used. This comparison was used in {@link #sortObjectResourcesDefault(ObjectResourcesSummary, ExistingScheduleFunc)}
 *     but here it is made subservient to modified resource availbility used in
 *     {@link #sortObjectResourcesGeneric(ObjectResourcesSummary, ExecutorDetails, ExistingScheduleFunc)}.
 *
 * </li>
 *
 * @param allResources         contains all individual ObjectResources as well as cumulative stats
 * @param exec                 executor for which the sorting is done
 * @param existingScheduleFunc a function to get existing executors already scheduled on this object
 * @return a sorted list of ObjectResources
 */
private List<ObjectResourcesItem> sortObjectResourcesCommon(final ObjectResourcesSummary allResources, final ExecutorDetails exec, final ExistingScheduleFunc existingScheduleFunc) {
    // Copy and modify allResources
    ObjectResourcesSummary affinityBasedAllResources = new ObjectResourcesSummary(allResources);
    final NormalizedResourceOffer availableResourcesOverall = allResources.getAvailableResourcesOverall();
    final NormalizedResourceRequest requestedResources = (exec != null) ? topologyDetails.getTotalResources(exec) : null;
    affinityBasedAllResources.getObjectResources().forEach(x -> {
        x.minResourcePercent = availableResourcesOverall.calculateMinPercentageUsedBy(x.availableResources);
        if (requestedResources != null) {
            // negate unrequested resources
            x.availableResources.updateForRareResourceAffinity(requestedResources);
        }
        x.avgResourcePercent = availableResourcesOverall.calculateAveragePercentageUsedBy(x.availableResources);
        LOG.trace("for {}: minResourcePercent={}, avgResourcePercent={}, numExistingSchedule={}", x.id, x.minResourcePercent, x.avgResourcePercent, existingScheduleFunc.getNumExistingSchedule(x.id));
    });
    // Use the following comparator to return a sorted set
    List<ObjectResourcesItem> sortedObjectResources = new ArrayList();
    Comparator<ObjectResourcesItem> comparator = (o1, o2) -> {
        int execsScheduled1 = existingScheduleFunc.getNumExistingSchedule(o1.id);
        int execsScheduled2 = existingScheduleFunc.getNumExistingSchedule(o2.id);
        if (execsScheduled1 > execsScheduled2) {
            return -1;
        } else if (execsScheduled1 < execsScheduled2) {
            return 1;
        }
        double o1Avg = o1.avgResourcePercent;
        double o2Avg = o2.avgResourcePercent;
        if (o1Avg > o2Avg) {
            return -1;
        } else if (o1Avg < o2Avg) {
            return 1;
        }
        if (o1.minResourcePercent > o2.minResourcePercent) {
            return -1;
        } else if (o1.minResourcePercent < o2.minResourcePercent) {
            return 1;
        }
        return o1.id.compareTo(o2.id);
    };
    sortedObjectResources.addAll(affinityBasedAllResources.getObjectResources());
    sortedObjectResources.sort(comparator);
    LOG.debug("Sorted Object Resources: {}", sortedObjectResources);
    return sortedObjectResources;
}
Also used : NormalizedResourceOffer(org.apache.storm.scheduler.resource.normalization.NormalizedResourceOffer) NormalizedResourceRequest(org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest) RasNode(org.apache.storm.scheduler.resource.RasNode) LoggerFactory(org.slf4j.LoggerFactory) NormalizedResourceOffer(org.apache.storm.scheduler.resource.normalization.NormalizedResourceOffer) HashMap(java.util.HashMap) RasNodes(org.apache.storm.scheduler.resource.RasNodes) BaseResourceAwareStrategy(org.apache.storm.scheduler.resource.strategies.scheduling.BaseResourceAwareStrategy) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) NoSuchElementException(java.util.NoSuchElementException) SchedulerAssignment(org.apache.storm.scheduler.SchedulerAssignment) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) DNSToSwitchMapping(org.apache.storm.networktopography.DNSToSwitchMapping) ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) Collection(java.util.Collection) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) Set(java.util.Set) Collectors(java.util.stream.Collectors) Cluster(org.apache.storm.scheduler.Cluster) List(java.util.List) Stream(java.util.stream.Stream) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem) Config(org.apache.storm.Config) Comparator(java.util.Comparator) Collections(java.util.Collections) ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) NormalizedResourceRequest(org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem) ArrayList(java.util.ArrayList)

Example 3 with ObjectResourcesItem

use of org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem in project storm by apache.

the class NodeSorterHostProximity method sortObjectResourcesCommon.

/**
 * Sort objects by the following three criteria.
 *
 * <li>
 *     The number executors of the topology that needs to be scheduled is already on the object (node or rack)
 *     in descending order. The reasoning to sort based on criterion 1 is so we schedule the rest of a topology on
 *     the same object (node or rack) as the existing executors of the topology.
 * </li>
 *
 * <li>
 *     The subordinate/subservient resource availability percentage of a rack in descending order We calculate the
 *     resource availability percentage by dividing the resource availability of the object (node or rack) by the
 *     resource availability of the entire rack or cluster depending on if object references a node or a rack.
 *     How this differs from the DefaultResourceAwareStrategy is that the percentage boosts the node or rack if it is
 *     requested by the executor that the sorting is being done for and pulls it down if it is not.
 *     By doing this calculation, objects (node or rack) that have exhausted or little of one of the resources mentioned
 *     above will be ranked after racks that have more balanced resource availability and nodes or racks that have
 *     resources that are not requested will be ranked below . So we will be less likely to pick a rack that
 *     have a lot of one resource but a low amount of another and have a lot of resources that are not requested by the executor.
 *     This is similar to logic used {@link #sortObjectResourcesGeneric(ObjectResourcesSummary, ExecutorDetails, ExistingScheduleFunc)}.
 * </li>
 *
 * <li>
 *     The tie between two nodes with same resource availability is broken by using the node with lower minimum
 *     percentage used. This comparison was used in {@link #sortObjectResourcesDefault(ObjectResourcesSummary, ExistingScheduleFunc)}
 *     but here it is made subservient to modified resource availbility used in
 *     {@link #sortObjectResourcesGeneric(ObjectResourcesSummary, ExecutorDetails, ExistingScheduleFunc)}.
 *
 * </li>
 *
 * @param allResources         contains all individual ObjectResources as well as cumulative stats
 * @param exec                 executor for which the sorting is done
 * @param existingScheduleFunc a function to get existing executors already scheduled on this object
 * @return an {@link Iterable} of sorted {@link ObjectResourcesItem}
 */
private Iterable<ObjectResourcesItem> sortObjectResourcesCommon(final ObjectResourcesSummary allResources, final ExecutorDetails exec, final ExistingScheduleFunc existingScheduleFunc) {
    // Copy and modify allResources
    ObjectResourcesSummary affinityBasedAllResources = new ObjectResourcesSummary(allResources);
    final NormalizedResourceOffer availableResourcesOverall = allResources.getAvailableResourcesOverall();
    final NormalizedResourceRequest requestedResources = (exec != null) ? topologyDetails.getTotalResources(exec) : null;
    affinityBasedAllResources.getObjectResources().forEach(x -> {
        if (requestedResources != null) {
            // negate unrequested resources
            x.availableResources.updateForRareResourceAffinity(requestedResources);
        }
        x.minResourcePercent = availableResourcesOverall.calculateMinPercentageUsedBy(x.availableResources);
        x.avgResourcePercent = availableResourcesOverall.calculateAveragePercentageUsedBy(x.availableResources);
        LOG.trace("for {}: minResourcePercent={}, avgResourcePercent={}, numExistingSchedule={}", x.id, x.minResourcePercent, x.avgResourcePercent, existingScheduleFunc.getNumExistingSchedule(x.id));
    });
    // Use the following comparator to sort
    Comparator<ObjectResourcesItem> comparator = (o1, o2) -> {
        int execsScheduled1 = existingScheduleFunc.getNumExistingSchedule(o1.id);
        int execsScheduled2 = existingScheduleFunc.getNumExistingSchedule(o2.id);
        if (execsScheduled1 > execsScheduled2) {
            return -1;
        } else if (execsScheduled1 < execsScheduled2) {
            return 1;
        }
        double o1Avg = o1.avgResourcePercent;
        double o2Avg = o2.avgResourcePercent;
        if (o1Avg > o2Avg) {
            return -1;
        } else if (o1Avg < o2Avg) {
            return 1;
        }
        if (o1.minResourcePercent > o2.minResourcePercent) {
            return -1;
        } else if (o1.minResourcePercent < o2.minResourcePercent) {
            return 1;
        }
        return o1.id.compareTo(o2.id);
    };
    TreeSet<ObjectResourcesItem> sortedObjectResources = new TreeSet(comparator);
    sortedObjectResources.addAll(affinityBasedAllResources.getObjectResources());
    LOG.debug("Sorted Object Resources: {}", sortedObjectResources);
    return sortedObjectResources;
}
Also used : NormalizedResourceOffer(org.apache.storm.scheduler.resource.normalization.NormalizedResourceOffer) NormalizedResourceRequest(org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest) RasNode(org.apache.storm.scheduler.resource.RasNode) LoggerFactory(org.slf4j.LoggerFactory) NormalizedResourceOffer(org.apache.storm.scheduler.resource.normalization.NormalizedResourceOffer) HashMap(java.util.HashMap) RasNodes(org.apache.storm.scheduler.resource.RasNodes) BaseResourceAwareStrategy(org.apache.storm.scheduler.resource.strategies.scheduling.BaseResourceAwareStrategy) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) VisibleForTesting(org.apache.storm.shade.com.google.common.annotations.VisibleForTesting) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) NoSuchElementException(java.util.NoSuchElementException) SchedulerAssignment(org.apache.storm.scheduler.SchedulerAssignment) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) DNSToSwitchMapping(org.apache.storm.networktopography.DNSToSwitchMapping) ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) Collection(java.util.Collection) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) Set(java.util.Set) Collectors(java.util.stream.Collectors) Cluster(org.apache.storm.scheduler.Cluster) List(java.util.List) Stream(java.util.stream.Stream) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem) Config(org.apache.storm.Config) Comparator(java.util.Comparator) Collections(java.util.Collections) ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) NormalizedResourceRequest(org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest) TreeSet(java.util.TreeSet) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem)

Example 4 with ObjectResourcesItem

use of org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem in project storm by apache.

the class NodeSorterHostProximity method sortNodes.

/**
 * Nodes are sorted by two criteria.
 *
 * <p>1) the number executors of the topology that needs to be scheduled is already on the node in
 * descending order. The reasoning to sort based on criterion 1 is so we schedule the rest of a topology on the same node as the
 * existing executors of the topology.
 *
 * <p>2) the subordinate/subservient resource availability percentage of a node in descending
 * order We calculate the resource availability percentage by dividing the resource availability that have exhausted or little of one of
 * the resources mentioned above will be ranked after on the node by the resource availability of the entire rack By doing this
 * calculation, nodes nodes that have more balanced resource availability. So we will be less likely to pick a node that have a lot of
 * one resource but a low amount of another.
 *
 * @param availRasNodes a list of all the nodes we want to sort
 * @param hostId     the host-id that availNodes are a part of
 * @return an {@link Iterable} of sorted {@link ObjectResourcesItem} for nodes.
 */
private Iterable<ObjectResourcesItem> sortNodes(List<RasNode> availRasNodes, ExecutorDetails exec, String hostId, Map<String, AtomicInteger> scheduledCount) {
    ObjectResourcesSummary hostResourcesSummary = new ObjectResourcesSummary("HOST");
    availRasNodes.forEach(x -> hostResourcesSummary.addObjectResourcesItem(new ObjectResourcesItem(x.getId(), x.getTotalAvailableResources(), x.getTotalResources(), 0, 0)));
    LOG.debug("Host {}: Overall Avail [ {} ] Total [ {} ]", hostId, hostResourcesSummary.getAvailableResourcesOverall(), hostResourcesSummary.getTotalResourcesOverall());
    return sortObjectResources(hostResourcesSummary, exec, (superId) -> {
        AtomicInteger count = scheduledCount.get(superId);
        if (count == null) {
            return 0;
        }
        return count.get();
    });
}
Also used : ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem)

Example 5 with ObjectResourcesItem

use of org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem in project storm by apache.

the class NodeSorterHostProximity method createClusterSummarizedResources.

private ObjectResourcesSummary createClusterSummarizedResources() {
    ObjectResourcesSummary clusterResourcesSummary = new ObjectResourcesSummary("Cluster");
    rackIdToHosts.forEach((rackId, hostIds) -> {
        if (hostIds == null || hostIds.isEmpty()) {
            LOG.info("Ignoring Rack {} since it has no hosts", rackId);
        } else {
            ObjectResourcesItem rack = new ObjectResourcesItem(rackId);
            for (String hostId : hostIds) {
                for (RasNode node : hostnameToNodes(hostId)) {
                    rack.availableResources.add(node.getTotalAvailableResources());
                    rack.totalResources.add(node.getTotalResources());
                }
            }
            clusterResourcesSummary.addObjectResourcesItem(rack);
        }
    });
    LOG.debug("Cluster Overall Avail [ {} ] Total [ {} ], rackCnt={}, hostCnt={}", clusterResourcesSummary.getAvailableResourcesOverall(), clusterResourcesSummary.getTotalResourcesOverall(), clusterResourcesSummary.getObjectResources().size(), rackIdToHosts.values().stream().mapToInt(x -> x.size()).sum());
    return clusterResourcesSummary;
}
Also used : ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem) RasNode(org.apache.storm.scheduler.resource.RasNode)

Aggregations

ObjectResourcesItem (org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem)15 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)13 ArrayList (java.util.ArrayList)11 HashMap (java.util.HashMap)11 List (java.util.List)11 Map (java.util.Map)11 ObjectResourcesSummary (org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary)11 Collection (java.util.Collection)10 Collections (java.util.Collections)10 HashSet (java.util.HashSet)10 Iterator (java.util.Iterator)10 Set (java.util.Set)10 Collectors (java.util.stream.Collectors)10 Config (org.apache.storm.Config)10 DNSToSwitchMapping (org.apache.storm.networktopography.DNSToSwitchMapping)10 Cluster (org.apache.storm.scheduler.Cluster)10 ExecutorDetails (org.apache.storm.scheduler.ExecutorDetails)10 SchedulerAssignment (org.apache.storm.scheduler.SchedulerAssignment)10 TopologyDetails (org.apache.storm.scheduler.TopologyDetails)10 WorkerSlot (org.apache.storm.scheduler.WorkerSlot)10