Search in sources :

Example 1 with NormalizedResourceRequest

use of org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest in project storm by apache.

the class NodeSorter method sortObjectResourcesCommon.

/**
 * Sort objects by the following three criteria.
 *
 * <li>
 *     The number executors of the topology that needs to be scheduled is already on the object (node or rack)
 *     in descending order. The reasoning to sort based on criterion 1 is so we schedule the rest of a topology on
 *     the same object (node or rack) as the existing executors of the topology.
 * </li>
 *
 * <li>
 *     The subordinate/subservient resource availability percentage of a rack in descending order We calculate the
 *     resource availability percentage by dividing the resource availability of the object (node or rack) by the
 *     resource availability of the entire rack or cluster depending on if object references a node or a rack.
 *     How this differs from the DefaultResourceAwareStrategy is that the percentage boosts the node or rack if it is
 *     requested by the executor that the sorting is being done for and pulls it down if it is not.
 *     By doing this calculation, objects (node or rack) that have exhausted or little of one of the resources mentioned
 *     above will be ranked after racks that have more balanced resource availability and nodes or racks that have
 *     resources that are not requested will be ranked below . So we will be less likely to pick a rack that
 *     have a lot of one resource but a low amount of another and have a lot of resources that are not requested by the executor.
 *     This is similar to logic used {@link #sortObjectResourcesGeneric(ObjectResourcesSummary, ExecutorDetails, ExistingScheduleFunc)}.
 * </li>
 *
 * <li>
 *     The tie between two nodes with same resource availability is broken by using the node with lower minimum
 *     percentage used. This comparison was used in {@link #sortObjectResourcesDefault(ObjectResourcesSummary, ExistingScheduleFunc)}
 *     but here it is made subservient to modified resource availbility used in
 *     {@link #sortObjectResourcesGeneric(ObjectResourcesSummary, ExecutorDetails, ExistingScheduleFunc)}.
 *
 * </li>
 *
 * @param allResources         contains all individual ObjectResources as well as cumulative stats
 * @param exec                 executor for which the sorting is done
 * @param existingScheduleFunc a function to get existing executors already scheduled on this object
 * @return a sorted list of ObjectResources
 */
private List<ObjectResourcesItem> sortObjectResourcesCommon(final ObjectResourcesSummary allResources, final ExecutorDetails exec, final ExistingScheduleFunc existingScheduleFunc) {
    // Copy and modify allResources
    ObjectResourcesSummary affinityBasedAllResources = new ObjectResourcesSummary(allResources);
    final NormalizedResourceOffer availableResourcesOverall = allResources.getAvailableResourcesOverall();
    final NormalizedResourceRequest requestedResources = (exec != null) ? topologyDetails.getTotalResources(exec) : null;
    affinityBasedAllResources.getObjectResources().forEach(x -> {
        x.minResourcePercent = availableResourcesOverall.calculateMinPercentageUsedBy(x.availableResources);
        if (requestedResources != null) {
            // negate unrequested resources
            x.availableResources.updateForRareResourceAffinity(requestedResources);
        }
        x.avgResourcePercent = availableResourcesOverall.calculateAveragePercentageUsedBy(x.availableResources);
        LOG.trace("for {}: minResourcePercent={}, avgResourcePercent={}, numExistingSchedule={}", x.id, x.minResourcePercent, x.avgResourcePercent, existingScheduleFunc.getNumExistingSchedule(x.id));
    });
    // Use the following comparator to return a sorted set
    List<ObjectResourcesItem> sortedObjectResources = new ArrayList();
    Comparator<ObjectResourcesItem> comparator = (o1, o2) -> {
        int execsScheduled1 = existingScheduleFunc.getNumExistingSchedule(o1.id);
        int execsScheduled2 = existingScheduleFunc.getNumExistingSchedule(o2.id);
        if (execsScheduled1 > execsScheduled2) {
            return -1;
        } else if (execsScheduled1 < execsScheduled2) {
            return 1;
        }
        double o1Avg = o1.avgResourcePercent;
        double o2Avg = o2.avgResourcePercent;
        if (o1Avg > o2Avg) {
            return -1;
        } else if (o1Avg < o2Avg) {
            return 1;
        }
        if (o1.minResourcePercent > o2.minResourcePercent) {
            return -1;
        } else if (o1.minResourcePercent < o2.minResourcePercent) {
            return 1;
        }
        return o1.id.compareTo(o2.id);
    };
    sortedObjectResources.addAll(affinityBasedAllResources.getObjectResources());
    sortedObjectResources.sort(comparator);
    LOG.debug("Sorted Object Resources: {}", sortedObjectResources);
    return sortedObjectResources;
}
Also used : NormalizedResourceOffer(org.apache.storm.scheduler.resource.normalization.NormalizedResourceOffer) NormalizedResourceRequest(org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest) RasNode(org.apache.storm.scheduler.resource.RasNode) LoggerFactory(org.slf4j.LoggerFactory) NormalizedResourceOffer(org.apache.storm.scheduler.resource.normalization.NormalizedResourceOffer) HashMap(java.util.HashMap) RasNodes(org.apache.storm.scheduler.resource.RasNodes) BaseResourceAwareStrategy(org.apache.storm.scheduler.resource.strategies.scheduling.BaseResourceAwareStrategy) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) NoSuchElementException(java.util.NoSuchElementException) SchedulerAssignment(org.apache.storm.scheduler.SchedulerAssignment) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) DNSToSwitchMapping(org.apache.storm.networktopography.DNSToSwitchMapping) ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) Collection(java.util.Collection) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) Set(java.util.Set) Collectors(java.util.stream.Collectors) Cluster(org.apache.storm.scheduler.Cluster) List(java.util.List) Stream(java.util.stream.Stream) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem) Config(org.apache.storm.Config) Comparator(java.util.Comparator) Collections(java.util.Collections) ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) NormalizedResourceRequest(org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem) ArrayList(java.util.ArrayList)

Example 2 with NormalizedResourceRequest

use of org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest in project storm by apache.

the class NodeSorterHostProximity method sortObjectResourcesCommon.

/**
 * Sort objects by the following three criteria.
 *
 * <li>
 *     The number executors of the topology that needs to be scheduled is already on the object (node or rack)
 *     in descending order. The reasoning to sort based on criterion 1 is so we schedule the rest of a topology on
 *     the same object (node or rack) as the existing executors of the topology.
 * </li>
 *
 * <li>
 *     The subordinate/subservient resource availability percentage of a rack in descending order We calculate the
 *     resource availability percentage by dividing the resource availability of the object (node or rack) by the
 *     resource availability of the entire rack or cluster depending on if object references a node or a rack.
 *     How this differs from the DefaultResourceAwareStrategy is that the percentage boosts the node or rack if it is
 *     requested by the executor that the sorting is being done for and pulls it down if it is not.
 *     By doing this calculation, objects (node or rack) that have exhausted or little of one of the resources mentioned
 *     above will be ranked after racks that have more balanced resource availability and nodes or racks that have
 *     resources that are not requested will be ranked below . So we will be less likely to pick a rack that
 *     have a lot of one resource but a low amount of another and have a lot of resources that are not requested by the executor.
 *     This is similar to logic used {@link #sortObjectResourcesGeneric(ObjectResourcesSummary, ExecutorDetails, ExistingScheduleFunc)}.
 * </li>
 *
 * <li>
 *     The tie between two nodes with same resource availability is broken by using the node with lower minimum
 *     percentage used. This comparison was used in {@link #sortObjectResourcesDefault(ObjectResourcesSummary, ExistingScheduleFunc)}
 *     but here it is made subservient to modified resource availbility used in
 *     {@link #sortObjectResourcesGeneric(ObjectResourcesSummary, ExecutorDetails, ExistingScheduleFunc)}.
 *
 * </li>
 *
 * @param allResources         contains all individual ObjectResources as well as cumulative stats
 * @param exec                 executor for which the sorting is done
 * @param existingScheduleFunc a function to get existing executors already scheduled on this object
 * @return an {@link Iterable} of sorted {@link ObjectResourcesItem}
 */
private Iterable<ObjectResourcesItem> sortObjectResourcesCommon(final ObjectResourcesSummary allResources, final ExecutorDetails exec, final ExistingScheduleFunc existingScheduleFunc) {
    // Copy and modify allResources
    ObjectResourcesSummary affinityBasedAllResources = new ObjectResourcesSummary(allResources);
    final NormalizedResourceOffer availableResourcesOverall = allResources.getAvailableResourcesOverall();
    final NormalizedResourceRequest requestedResources = (exec != null) ? topologyDetails.getTotalResources(exec) : null;
    affinityBasedAllResources.getObjectResources().forEach(x -> {
        if (requestedResources != null) {
            // negate unrequested resources
            x.availableResources.updateForRareResourceAffinity(requestedResources);
        }
        x.minResourcePercent = availableResourcesOverall.calculateMinPercentageUsedBy(x.availableResources);
        x.avgResourcePercent = availableResourcesOverall.calculateAveragePercentageUsedBy(x.availableResources);
        LOG.trace("for {}: minResourcePercent={}, avgResourcePercent={}, numExistingSchedule={}", x.id, x.minResourcePercent, x.avgResourcePercent, existingScheduleFunc.getNumExistingSchedule(x.id));
    });
    // Use the following comparator to sort
    Comparator<ObjectResourcesItem> comparator = (o1, o2) -> {
        int execsScheduled1 = existingScheduleFunc.getNumExistingSchedule(o1.id);
        int execsScheduled2 = existingScheduleFunc.getNumExistingSchedule(o2.id);
        if (execsScheduled1 > execsScheduled2) {
            return -1;
        } else if (execsScheduled1 < execsScheduled2) {
            return 1;
        }
        double o1Avg = o1.avgResourcePercent;
        double o2Avg = o2.avgResourcePercent;
        if (o1Avg > o2Avg) {
            return -1;
        } else if (o1Avg < o2Avg) {
            return 1;
        }
        if (o1.minResourcePercent > o2.minResourcePercent) {
            return -1;
        } else if (o1.minResourcePercent < o2.minResourcePercent) {
            return 1;
        }
        return o1.id.compareTo(o2.id);
    };
    TreeSet<ObjectResourcesItem> sortedObjectResources = new TreeSet(comparator);
    sortedObjectResources.addAll(affinityBasedAllResources.getObjectResources());
    LOG.debug("Sorted Object Resources: {}", sortedObjectResources);
    return sortedObjectResources;
}
Also used : NormalizedResourceOffer(org.apache.storm.scheduler.resource.normalization.NormalizedResourceOffer) NormalizedResourceRequest(org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest) RasNode(org.apache.storm.scheduler.resource.RasNode) LoggerFactory(org.slf4j.LoggerFactory) NormalizedResourceOffer(org.apache.storm.scheduler.resource.normalization.NormalizedResourceOffer) HashMap(java.util.HashMap) RasNodes(org.apache.storm.scheduler.resource.RasNodes) BaseResourceAwareStrategy(org.apache.storm.scheduler.resource.strategies.scheduling.BaseResourceAwareStrategy) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) VisibleForTesting(org.apache.storm.shade.com.google.common.annotations.VisibleForTesting) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) NoSuchElementException(java.util.NoSuchElementException) SchedulerAssignment(org.apache.storm.scheduler.SchedulerAssignment) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) DNSToSwitchMapping(org.apache.storm.networktopography.DNSToSwitchMapping) ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) Collection(java.util.Collection) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) Set(java.util.Set) Collectors(java.util.stream.Collectors) Cluster(org.apache.storm.scheduler.Cluster) List(java.util.List) Stream(java.util.stream.Stream) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem) Config(org.apache.storm.Config) Comparator(java.util.Comparator) Collections(java.util.Collections) ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) ObjectResourcesSummary(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesSummary) NormalizedResourceRequest(org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest) TreeSet(java.util.TreeSet) ObjectResourcesItem(org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem)

Example 3 with NormalizedResourceRequest

use of org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest in project storm by apache.

the class RasNode method couldEverFit.

/**
 * Is there any possibility that exec could ever fit on this node.
 * @param exec the executor to schedule
 * @param td the topology the executor is a part of
 * @return true if there is the possibility it might fit, no guarantee that it will, or false if there is no
 *     way it would ever fit.
 */
public boolean couldEverFit(ExecutorDetails exec, TopologyDetails td) {
    if (!isAlive) {
        return false;
    }
    NormalizedResourceOffer avail = getTotalAvailableResources();
    NormalizedResourceRequest requestedResources = td.getTotalResources(exec);
    return avail.couldFit(cluster.getMinWorkerCpu(), requestedResources);
}
Also used : NormalizedResourceOffer(org.apache.storm.scheduler.resource.normalization.NormalizedResourceOffer) NormalizedResourceRequest(org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest)

Example 4 with NormalizedResourceRequest

use of org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest in project storm by apache.

the class ResourceUtils method getSpoutsResources.

public static Map<String, NormalizedResourceRequest> getSpoutsResources(StormTopology topology, Map<String, Object> topologyConf) {
    Map<String, NormalizedResourceRequest> spoutResources = new HashMap<>();
    if (topology.get_spouts() != null) {
        for (Map.Entry<String, SpoutSpec> spout : topology.get_spouts().entrySet()) {
            NormalizedResourceRequest topologyResources = new NormalizedResourceRequest(spout.getValue().get_common(), topologyConf, spout.getKey());
            if (LOG.isTraceEnabled()) {
                LOG.trace("Turned {} into {}", spout.getValue().get_common().get_json_conf(), topologyResources);
            }
            spoutResources.put(spout.getKey(), topologyResources);
        }
    }
    return spoutResources;
}
Also used : NormalizedResourceRequest(org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest) SpoutSpec(org.apache.storm.generated.SpoutSpec) HashMap(java.util.HashMap) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with NormalizedResourceRequest

use of org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest in project storm by apache.

the class TopologyDetails method initResourceList.

private void initResourceList() {
    this.resourceList = new HashMap<>();
    // Extract bolt resource info
    if (topology.get_bolts() != null) {
        for (Map.Entry<String, Bolt> bolt : topology.get_bolts().entrySet()) {
            // the json_conf is populated by TopologyBuilder (e.g. boltDeclarer.setMemoryLoad)
            NormalizedResourceRequest topologyResources = new NormalizedResourceRequest(bolt.getValue().get_common(), topologyConf, bolt.getKey());
            for (Map.Entry<ExecutorDetails, String> anExecutorToComponent : executorToComponent.entrySet()) {
                if (bolt.getKey().equals(anExecutorToComponent.getValue())) {
                    resourceList.put(anExecutorToComponent.getKey(), topologyResources);
                }
            }
        }
    }
    // Extract spout resource info
    if (topology.get_spouts() != null) {
        for (Map.Entry<String, SpoutSpec> spout : topology.get_spouts().entrySet()) {
            NormalizedResourceRequest topologyResources = new NormalizedResourceRequest(spout.getValue().get_common(), topologyConf, spout.getKey());
            for (Map.Entry<ExecutorDetails, String> anExecutorToComponent : executorToComponent.entrySet()) {
                if (spout.getKey().equals(anExecutorToComponent.getValue())) {
                    resourceList.put(anExecutorToComponent.getKey(), topologyResources);
                }
            }
        }
    } else {
        LOG.warn("Topology " + topologyId + " does not seem to have any spouts!");
    }
    // topology.getbolt (AKA sys tasks most specifically __acker tasks)
    for (ExecutorDetails exec : getExecutors()) {
        if (!resourceList.containsKey(exec)) {
            LOG.debug("Scheduling component: {} executor: {} with resource requirement as {} {}", getExecutorToComponent().get(exec), exec, topologyConf.get(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB), resourceList.get(exec));
            addDefaultResforExec(exec);
        }
    }
}
Also used : NormalizedResourceRequest(org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest) SpoutSpec(org.apache.storm.generated.SpoutSpec) Bolt(org.apache.storm.generated.Bolt) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

NormalizedResourceRequest (org.apache.storm.scheduler.resource.normalization.NormalizedResourceRequest)21 HashMap (java.util.HashMap)12 Map (java.util.Map)10 HashSet (java.util.HashSet)9 ArrayList (java.util.ArrayList)8 List (java.util.List)8 Set (java.util.Set)8 Collection (java.util.Collection)7 Config (org.apache.storm.Config)7 SchedulerAssignment (org.apache.storm.scheduler.SchedulerAssignment)7 TopologyDetails (org.apache.storm.scheduler.TopologyDetails)7 Logger (org.slf4j.Logger)7 LoggerFactory (org.slf4j.LoggerFactory)7 Collections (java.util.Collections)6 Iterator (java.util.Iterator)6 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)6 Collectors (java.util.stream.Collectors)6 DNSToSwitchMapping (org.apache.storm.networktopography.DNSToSwitchMapping)6 Cluster (org.apache.storm.scheduler.Cluster)6 ExecutorDetails (org.apache.storm.scheduler.ExecutorDetails)6