use of org.apache.storm.scheduler.Component in project storm by apache.
the class ExecSorterByConnectionCount method sortExecutors.
/**
* Order executors based on how many in and out connections it will potentially need to make, in descending order. First order
* components by the number of in and out connections it will have. Then iterate through the sorted list of components. For each
* component sort the neighbors of that component by how many connections it will have to make with that component.
* Add an executor from this component and then from each neighboring component in sorted order. Do this until there is
* nothing left to schedule. Then add back executors not accounted for - which are system executors.
*
* @param unassignedExecutors an unmodifiable set of executors that need to be scheduled.
* @return a list of executors in sorted order for scheduling.
*/
public List<ExecutorDetails> sortExecutors(Set<ExecutorDetails> unassignedExecutors) {
// excludes system components
Map<String, Component> componentMap = topologyDetails.getUserTopolgyComponents();
// in insert order
LinkedHashSet<ExecutorDetails> orderedExecutorSet = new LinkedHashSet<>();
Map<String, Queue<ExecutorDetails>> compToExecsToSchedule = new HashMap<>();
for (Component component : componentMap.values()) {
compToExecsToSchedule.put(component.getId(), new LinkedList<>());
for (ExecutorDetails exec : component.getExecs()) {
if (unassignedExecutors.contains(exec)) {
compToExecsToSchedule.get(component.getId()).add(exec);
}
}
}
Set<Component> sortedComponents = sortComponents(componentMap);
sortedComponents.addAll(componentMap.values());
for (Component currComp : sortedComponents) {
Map<String, Component> neighbors = new HashMap<>();
for (String compId : Sets.union(currComp.getChildren(), currComp.getParents())) {
neighbors.put(compId, componentMap.get(compId));
}
Set<Component> sortedNeighbors = sortNeighbors(currComp, neighbors);
Queue<ExecutorDetails> currCompExecsToSched = compToExecsToSchedule.get(currComp.getId());
boolean flag;
do {
flag = false;
if (!currCompExecsToSched.isEmpty()) {
orderedExecutorSet.add(currCompExecsToSched.poll());
flag = true;
}
for (Component neighborComp : sortedNeighbors) {
Queue<ExecutorDetails> neighborCompExesToSched = compToExecsToSchedule.get(neighborComp.getId());
if (!neighborCompExesToSched.isEmpty()) {
orderedExecutorSet.add(neighborCompExesToSched.poll());
flag = true;
}
}
} while (flag);
}
// add executors not in sorted list - which may be system executors
orderedExecutorSet.addAll(unassignedExecutors);
return new LinkedList<>(orderedExecutorSet);
}
use of org.apache.storm.scheduler.Component in project storm by apache.
the class ExecSorterByProximity method topologicalSortComponents.
/**
* Sort components topologically.
* @param componentMap The map of component Id to Component Object.
* @return The sorted components
*/
private List<Component> topologicalSortComponents(final Map<String, Component> componentMap) {
LinkedHashSet<Component> sortedComponentsSet = new LinkedHashSet<>();
boolean[] visited = new boolean[componentMap.size()];
int[] inDegree = new int[componentMap.size()];
List<String> componentIds = new ArrayList<>(componentMap.keySet());
Map<String, Integer> compIdToIndex = new HashMap<>();
for (int i = 0; i < componentIds.size(); i++) {
compIdToIndex.put(componentIds.get(i), i);
}
// initialize the in-degree array
for (int i = 0; i < inDegree.length; i++) {
String compId = componentIds.get(i);
Component comp = componentMap.get(compId);
for (String childId : comp.getChildren()) {
inDegree[compIdToIndex.get(childId)] += 1;
}
}
// sorting components topologically
for (int t = 0; t < inDegree.length; t++) {
for (int i = 0; i < inDegree.length; i++) {
if (inDegree[i] == 0 && !visited[i]) {
String compId = componentIds.get(i);
Component comp = componentMap.get(compId);
sortedComponentsSet.add(comp);
visited[i] = true;
for (String childId : comp.getChildren()) {
inDegree[compIdToIndex.get(childId)]--;
}
break;
}
}
}
// add back components that could not be visited and issue warning about loop in component data flow
if (sortedComponentsSet.size() != componentMap.size()) {
String unvisitedComponentIds = componentMap.entrySet().stream().filter(x -> !sortedComponentsSet.contains(x.getValue())).map(x -> x.getKey()).collect(Collectors.joining(","));
LOG.warn("topologicalSortComponents for topology {} detected possible loop(s) involving components {}, " + "appending them to the end of the sorted component list", topologyDetails.getId(), unvisitedComponentIds);
sortedComponentsSet.addAll(componentMap.values());
}
return new ArrayList<>(sortedComponentsSet);
}
use of org.apache.storm.scheduler.Component in project storm by apache.
the class ExecSorterByProximity method takeExecutors.
/**
* Take unscheduled executors from current and all its downstream components in a particular order.
* First, take one executor from the current component;
* then for every child (direct downstream component) of this component,
* if it's shuffle grouping from the current component to this child,
* the number of executors to take from this child is the max of
* 1 and (the number of unscheduled executors this child has / the number of unscheduled executors the current component has);
* otherwise, the number of executors to take is 1;
* for every executor to take from this child, call takeExecutors(...).
* @param currComp The current component.
* @param componentMap The map from component Id to component object.
* @param compToExecsToSchedule The map from component Id to unscheduled executors.
* @return The executors to schedule in order.
*/
private List<ExecutorDetails> takeExecutors(Component currComp, final Map<String, Component> componentMap, final Map<String, Queue<ExecutorDetails>> compToExecsToSchedule) {
List<ExecutorDetails> execsScheduled = new ArrayList<>();
Queue<ExecutorDetails> currQueue = compToExecsToSchedule.get(currComp.getId());
int currUnscheduledNumExecs = currQueue.size();
// Just for defensive programming as this won't actually happen.
if (currUnscheduledNumExecs == 0) {
return execsScheduled;
}
execsScheduled.add(currQueue.poll());
Set<String> sortedChildren = getSortedChildren(currComp, componentMap);
for (String childId : sortedChildren) {
Component childComponent = componentMap.get(childId);
Queue<ExecutorDetails> childQueue = compToExecsToSchedule.get(childId);
int childUnscheduledNumExecs = childQueue.size();
if (childUnscheduledNumExecs == 0) {
continue;
}
int numExecsToTake = 1;
if (hasShuffleGroupingFromParentToChild(currComp, childComponent)) {
// if it's shuffle grouping, truncate
numExecsToTake = Math.max(1, childUnscheduledNumExecs / currUnscheduledNumExecs);
}
// otherwise, one-by-one
for (int i = 0; i < numExecsToTake; i++) {
execsScheduled.addAll(takeExecutors(childComponent, componentMap, compToExecsToSchedule));
}
}
return execsScheduled;
}
use of org.apache.storm.scheduler.Component in project storm by apache.
the class ExecSorterByProximity method getSortedChildren.
private Set<String> getSortedChildren(Component component, final Map<String, Component> componentMap) {
Set<String> children = component.getChildren();
Set<String> sortedChildren = new TreeSet<>((o1, o2) -> {
Component child1 = componentMap.get(o1);
Component child2 = componentMap.get(o2);
boolean child1IsShuffle = hasShuffleGroupingFromParentToChild(component, child1);
boolean child2IsShuffle = hasShuffleGroupingFromParentToChild(component, child2);
if (child1IsShuffle && child2IsShuffle) {
return o1.compareTo(o2);
} else if (child1IsShuffle) {
return 1;
} else {
return -1;
}
});
sortedChildren.addAll(children);
return sortedChildren;
}
use of org.apache.storm.scheduler.Component in project storm by apache.
the class ExecSorterByProximity method sortExecutors.
/**
* Order executors by network proximity needs. First add all executors for components that
* are in topological sorted order. Then add back executors not accounted for - which are
* system executors.
*
* @param unassignedExecutors an unmodifiable set of executors that need to be scheduled.
* @return a list of executors in sorted order for scheduling.
*/
public List<ExecutorDetails> sortExecutors(Set<ExecutorDetails> unassignedExecutors) {
// excludes system components
Map<String, Component> componentMap = topologyDetails.getUserTopolgyComponents();
// in insert order
LinkedHashSet<ExecutorDetails> orderedExecutorSet = new LinkedHashSet<>();
Map<String, Queue<ExecutorDetails>> compToExecsToSchedule = new HashMap<>();
for (Component component : componentMap.values()) {
compToExecsToSchedule.put(component.getId(), new LinkedList<>());
for (ExecutorDetails exec : component.getExecs()) {
if (unassignedExecutors.contains(exec)) {
compToExecsToSchedule.get(component.getId()).add(exec);
}
}
}
List<Component> sortedComponents = topologicalSortComponents(componentMap);
for (Component currComp : sortedComponents) {
int numExecs = compToExecsToSchedule.get(currComp.getId()).size();
for (int i = 0; i < numExecs; i++) {
orderedExecutorSet.addAll(takeExecutors(currComp, componentMap, compToExecsToSchedule));
}
}
// add executors not in sorted list - which may be system executors
orderedExecutorSet.addAll(unassignedExecutors);
return new LinkedList<>(orderedExecutorSet);
}
Aggregations