Search in sources :

Example 1 with Component

use of org.apache.storm.scheduler.Component in project storm by apache.

the class ExecSorterByConnectionCount method sortExecutors.

/**
 * Order executors based on how many in and out connections it will potentially need to make, in descending order. First order
 * components by the number of in and out connections it will have.  Then iterate through the sorted list of components. For each
 * component sort the neighbors of that component by how many connections it will have to make with that component.
 * Add an executor from this component and then from each neighboring component in sorted order. Do this until there is
 * nothing left to schedule. Then add back executors not accounted for - which are system executors.
 *
 * @param unassignedExecutors an unmodifiable set of executors that need to be scheduled.
 * @return a list of executors in sorted order for scheduling.
 */
public List<ExecutorDetails> sortExecutors(Set<ExecutorDetails> unassignedExecutors) {
    // excludes system components
    Map<String, Component> componentMap = topologyDetails.getUserTopolgyComponents();
    // in insert order
    LinkedHashSet<ExecutorDetails> orderedExecutorSet = new LinkedHashSet<>();
    Map<String, Queue<ExecutorDetails>> compToExecsToSchedule = new HashMap<>();
    for (Component component : componentMap.values()) {
        compToExecsToSchedule.put(component.getId(), new LinkedList<>());
        for (ExecutorDetails exec : component.getExecs()) {
            if (unassignedExecutors.contains(exec)) {
                compToExecsToSchedule.get(component.getId()).add(exec);
            }
        }
    }
    Set<Component> sortedComponents = sortComponents(componentMap);
    sortedComponents.addAll(componentMap.values());
    for (Component currComp : sortedComponents) {
        Map<String, Component> neighbors = new HashMap<>();
        for (String compId : Sets.union(currComp.getChildren(), currComp.getParents())) {
            neighbors.put(compId, componentMap.get(compId));
        }
        Set<Component> sortedNeighbors = sortNeighbors(currComp, neighbors);
        Queue<ExecutorDetails> currCompExecsToSched = compToExecsToSchedule.get(currComp.getId());
        boolean flag;
        do {
            flag = false;
            if (!currCompExecsToSched.isEmpty()) {
                orderedExecutorSet.add(currCompExecsToSched.poll());
                flag = true;
            }
            for (Component neighborComp : sortedNeighbors) {
                Queue<ExecutorDetails> neighborCompExesToSched = compToExecsToSchedule.get(neighborComp.getId());
                if (!neighborCompExesToSched.isEmpty()) {
                    orderedExecutorSet.add(neighborCompExesToSched.poll());
                    flag = true;
                }
            }
        } while (flag);
    }
    // add executors not in sorted list - which may be system executors
    orderedExecutorSet.addAll(unassignedExecutors);
    return new LinkedList<>(orderedExecutorSet);
}
Also used : ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) LinkedHashSet(java.util.LinkedHashSet) HashMap(java.util.HashMap) LinkedList(java.util.LinkedList) Component(org.apache.storm.scheduler.Component) Queue(java.util.Queue)

Example 2 with Component

use of org.apache.storm.scheduler.Component in project storm by apache.

the class ExecSorterByProximity method topologicalSortComponents.

/**
 * Sort components topologically.
 * @param componentMap The map of component Id to Component Object.
 * @return The sorted components
 */
private List<Component> topologicalSortComponents(final Map<String, Component> componentMap) {
    LinkedHashSet<Component> sortedComponentsSet = new LinkedHashSet<>();
    boolean[] visited = new boolean[componentMap.size()];
    int[] inDegree = new int[componentMap.size()];
    List<String> componentIds = new ArrayList<>(componentMap.keySet());
    Map<String, Integer> compIdToIndex = new HashMap<>();
    for (int i = 0; i < componentIds.size(); i++) {
        compIdToIndex.put(componentIds.get(i), i);
    }
    // initialize the in-degree array
    for (int i = 0; i < inDegree.length; i++) {
        String compId = componentIds.get(i);
        Component comp = componentMap.get(compId);
        for (String childId : comp.getChildren()) {
            inDegree[compIdToIndex.get(childId)] += 1;
        }
    }
    // sorting components topologically
    for (int t = 0; t < inDegree.length; t++) {
        for (int i = 0; i < inDegree.length; i++) {
            if (inDegree[i] == 0 && !visited[i]) {
                String compId = componentIds.get(i);
                Component comp = componentMap.get(compId);
                sortedComponentsSet.add(comp);
                visited[i] = true;
                for (String childId : comp.getChildren()) {
                    inDegree[compIdToIndex.get(childId)]--;
                }
                break;
            }
        }
    }
    // add back components that could not be visited and issue warning about loop in component data flow
    if (sortedComponentsSet.size() != componentMap.size()) {
        String unvisitedComponentIds = componentMap.entrySet().stream().filter(x -> !sortedComponentsSet.contains(x.getValue())).map(x -> x.getKey()).collect(Collectors.joining(","));
        LOG.warn("topologicalSortComponents for topology {} detected possible loop(s) involving components {}, " + "appending them to the end of the sorted component list", topologyDetails.getId(), unvisitedComponentIds);
        sortedComponentsSet.addAll(componentMap.values());
    }
    return new ArrayList<>(sortedComponentsSet);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Logger(org.slf4j.Logger) Component(org.apache.storm.scheduler.Component) Grouping(org.apache.storm.generated.Grouping) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) TreeSet(java.util.TreeSet) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) ArrayList(java.util.ArrayList) List(java.util.List) Map(java.util.Map) Queue(java.util.Queue) LinkedList(java.util.LinkedList) LinkedHashSet(java.util.LinkedHashSet) ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Component(org.apache.storm.scheduler.Component)

Example 3 with Component

use of org.apache.storm.scheduler.Component in project storm by apache.

the class ExecSorterByProximity method takeExecutors.

/**
 * Take unscheduled executors from current and all its downstream components in a particular order.
 * First, take one executor from the current component;
 * then for every child (direct downstream component) of this component,
 *     if it's shuffle grouping from the current component to this child,
 *         the number of executors to take from this child is the max of
 *         1 and (the number of unscheduled executors this child has / the number of unscheduled executors the current component has);
 *     otherwise, the number of executors to take is 1;
 *     for every executor to take from this child, call takeExecutors(...).
 * @param currComp The current component.
 * @param componentMap The map from component Id to component object.
 * @param compToExecsToSchedule The map from component Id to unscheduled executors.
 * @return The executors to schedule in order.
 */
private List<ExecutorDetails> takeExecutors(Component currComp, final Map<String, Component> componentMap, final Map<String, Queue<ExecutorDetails>> compToExecsToSchedule) {
    List<ExecutorDetails> execsScheduled = new ArrayList<>();
    Queue<ExecutorDetails> currQueue = compToExecsToSchedule.get(currComp.getId());
    int currUnscheduledNumExecs = currQueue.size();
    // Just for defensive programming as this won't actually happen.
    if (currUnscheduledNumExecs == 0) {
        return execsScheduled;
    }
    execsScheduled.add(currQueue.poll());
    Set<String> sortedChildren = getSortedChildren(currComp, componentMap);
    for (String childId : sortedChildren) {
        Component childComponent = componentMap.get(childId);
        Queue<ExecutorDetails> childQueue = compToExecsToSchedule.get(childId);
        int childUnscheduledNumExecs = childQueue.size();
        if (childUnscheduledNumExecs == 0) {
            continue;
        }
        int numExecsToTake = 1;
        if (hasShuffleGroupingFromParentToChild(currComp, childComponent)) {
            // if it's shuffle grouping, truncate
            numExecsToTake = Math.max(1, childUnscheduledNumExecs / currUnscheduledNumExecs);
        }
        // otherwise, one-by-one
        for (int i = 0; i < numExecsToTake; i++) {
            execsScheduled.addAll(takeExecutors(childComponent, componentMap, compToExecsToSchedule));
        }
    }
    return execsScheduled;
}
Also used : ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) ArrayList(java.util.ArrayList) Component(org.apache.storm.scheduler.Component)

Example 4 with Component

use of org.apache.storm.scheduler.Component in project storm by apache.

the class ExecSorterByProximity method getSortedChildren.

private Set<String> getSortedChildren(Component component, final Map<String, Component> componentMap) {
    Set<String> children = component.getChildren();
    Set<String> sortedChildren = new TreeSet<>((o1, o2) -> {
        Component child1 = componentMap.get(o1);
        Component child2 = componentMap.get(o2);
        boolean child1IsShuffle = hasShuffleGroupingFromParentToChild(component, child1);
        boolean child2IsShuffle = hasShuffleGroupingFromParentToChild(component, child2);
        if (child1IsShuffle && child2IsShuffle) {
            return o1.compareTo(o2);
        } else if (child1IsShuffle) {
            return 1;
        } else {
            return -1;
        }
    });
    sortedChildren.addAll(children);
    return sortedChildren;
}
Also used : TreeSet(java.util.TreeSet) Component(org.apache.storm.scheduler.Component)

Example 5 with Component

use of org.apache.storm.scheduler.Component in project storm by apache.

the class ExecSorterByProximity method sortExecutors.

/**
 * Order executors by network proximity needs. First add all executors for components that
 * are in topological sorted order. Then add back executors not accounted for - which are
 * system executors.
 *
 * @param unassignedExecutors an unmodifiable set of executors that need to be scheduled.
 * @return a list of executors in sorted order for scheduling.
 */
public List<ExecutorDetails> sortExecutors(Set<ExecutorDetails> unassignedExecutors) {
    // excludes system components
    Map<String, Component> componentMap = topologyDetails.getUserTopolgyComponents();
    // in insert order
    LinkedHashSet<ExecutorDetails> orderedExecutorSet = new LinkedHashSet<>();
    Map<String, Queue<ExecutorDetails>> compToExecsToSchedule = new HashMap<>();
    for (Component component : componentMap.values()) {
        compToExecsToSchedule.put(component.getId(), new LinkedList<>());
        for (ExecutorDetails exec : component.getExecs()) {
            if (unassignedExecutors.contains(exec)) {
                compToExecsToSchedule.get(component.getId()).add(exec);
            }
        }
    }
    List<Component> sortedComponents = topologicalSortComponents(componentMap);
    for (Component currComp : sortedComponents) {
        int numExecs = compToExecsToSchedule.get(currComp.getId()).size();
        for (int i = 0; i < numExecs; i++) {
            orderedExecutorSet.addAll(takeExecutors(currComp, componentMap, compToExecsToSchedule));
        }
    }
    // add executors not in sorted list - which may be system executors
    orderedExecutorSet.addAll(unassignedExecutors);
    return new LinkedList<>(orderedExecutorSet);
}
Also used : ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) LinkedHashSet(java.util.LinkedHashSet) HashMap(java.util.HashMap) LinkedList(java.util.LinkedList) Component(org.apache.storm.scheduler.Component) Queue(java.util.Queue)

Aggregations

Component (org.apache.storm.scheduler.Component)5 ExecutorDetails (org.apache.storm.scheduler.ExecutorDetails)4 HashMap (java.util.HashMap)3 LinkedHashSet (java.util.LinkedHashSet)3 LinkedList (java.util.LinkedList)3 Queue (java.util.Queue)3 ArrayList (java.util.ArrayList)2 TreeSet (java.util.TreeSet)2 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Collectors (java.util.stream.Collectors)1 GlobalStreamId (org.apache.storm.generated.GlobalStreamId)1 Grouping (org.apache.storm.generated.Grouping)1 TopologyDetails (org.apache.storm.scheduler.TopologyDetails)1 Logger (org.slf4j.Logger)1 LoggerFactory (org.slf4j.LoggerFactory)1