use of org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem in project storm by apache.
the class TestNodeSorterHostProximity method testAntiAffinityWithMultipleTopologies.
/**
* Schedule two topologies, once with special resources and another without.
* There are enough special resources to hold one topology with special resource ("my.gpu").
* If the sort order is incorrect, scheduling will not succeed.
*/
@Test
public void testAntiAffinityWithMultipleTopologies() {
INimbus iNimbus = new INimbusTest();
Map<String, SupervisorDetails> supMap = genSupervisorsWithRacks(1, 40, 66, 0, 0, 4700, 226200, new HashMap<>());
HashMap<String, Double> extraResources = new HashMap<>();
extraResources.put("my.gpu", 1.0);
supMap.putAll(genSupervisorsWithRacks(1, 40, 66, 1, 0, 4700, 226200, extraResources));
Config config = new Config();
config.putAll(createGrasClusterConfig(88, 775, 25, null, null));
IScheduler scheduler = new ResourceAwareScheduler();
scheduler.prepare(config, new StormMetricsRegistry());
TopologyDetails tdSimple = genTopology("topology-simple", config, 1, 5, 100, 300, 0, 0, "user", 8192);
// Schedule the simple topology first
Topologies topologies = new Topologies(tdSimple);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
{
NodeSorterHostProximity nodeSorter = new NodeSorterHostProximity(cluster, tdSimple);
for (ExecutorDetails exec : tdSimple.getExecutors()) {
nodeSorter.prepare(exec);
List<ObjectResourcesItem> sortedRacks = StreamSupport.stream(nodeSorter.getSortedRacks().spliterator(), false).collect(Collectors.toList());
String rackSummaries = StreamSupport.stream(sortedRacks.spliterator(), false).map(x -> String.format("Rack %s -> scheduled-cnt %d, min-avail %f, avg-avail %f, cpu %f, mem %f", x.id, nodeSorter.getScheduledExecCntByRackId().getOrDefault(x.id, new AtomicInteger(-1)).get(), x.minResourcePercent, x.avgResourcePercent, x.availableResources.getTotalCpu(), x.availableResources.getTotalMemoryMb())).collect(Collectors.joining("\n\t"));
NormalizedResourceRequest topoResourceRequest = tdSimple.getApproximateTotalResources();
String topoRequest = String.format("Topo %s, approx-requested-resources %s", tdSimple.getId(), topoResourceRequest.toString());
Assert.assertEquals(rackSummaries + "\n# of racks sorted", 2, sortedRacks.size());
Assert.assertEquals(rackSummaries + "\nFirst rack sorted", "rack-000", sortedRacks.get(0).id);
Assert.assertEquals(rackSummaries + "\nSecond rack sorted", "rack-001", sortedRacks.get(1).id);
}
}
scheduler.schedule(topologies, cluster);
TopologyBuilder builder = topologyBuilder(1, 5, 100, 300);
builder.setBolt("gpu-bolt", new TestBolt(), 40).addResource("my.gpu", 1.0).shuffleGrouping("spout-0");
TopologyDetails tdGpu = topoToTopologyDetails("topology-gpu", config, builder.createTopology(), 0, 0, "user", 8192);
// Now schedule GPU but with the simple topology in place.
topologies = new Topologies(tdSimple, tdGpu);
cluster = new Cluster(cluster, topologies);
{
NodeSorterHostProximity nodeSorter = new NodeSorterHostProximity(cluster, tdGpu);
for (ExecutorDetails exec : tdGpu.getExecutors()) {
String comp = tdGpu.getComponentFromExecutor(exec);
nodeSorter.prepare(exec);
List<ObjectResourcesItem> sortedRacks = StreamSupport.stream(nodeSorter.getSortedRacks().spliterator(), false).collect(Collectors.toList());
String rackSummaries = sortedRacks.stream().map(x -> String.format("Rack %s -> scheduled-cnt %d, min-avail %f, avg-avail %f, cpu %f, mem %f", x.id, nodeSorter.getScheduledExecCntByRackId().getOrDefault(x.id, new AtomicInteger(-1)).get(), x.minResourcePercent, x.avgResourcePercent, x.availableResources.getTotalCpu(), x.availableResources.getTotalMemoryMb())).collect(Collectors.joining("\n\t"));
NormalizedResourceRequest topoResourceRequest = tdSimple.getApproximateTotalResources();
String topoRequest = String.format("Topo %s, approx-requested-resources %s", tdSimple.getId(), topoResourceRequest.toString());
Assert.assertEquals(rackSummaries + "\n# of racks sorted", 2, sortedRacks.size());
if (comp.equals("gpu-bolt")) {
Assert.assertEquals(rackSummaries + "\nFirst rack sorted for " + comp, "rack-001", sortedRacks.get(0).id);
Assert.assertEquals(rackSummaries + "\nSecond rack sorted for " + comp, "rack-000", sortedRacks.get(1).id);
} else {
Assert.assertEquals(rackSummaries + "\nFirst rack sorted for " + comp, "rack-000", sortedRacks.get(0).id);
Assert.assertEquals(rackSummaries + "\nSecond rack sorted for " + comp, "rack-001", sortedRacks.get(1).id);
}
}
}
scheduler.schedule(topologies, cluster);
Map<String, SchedulerAssignment> assignments = new TreeMap<>(cluster.getAssignments());
assertEquals(2, assignments.size());
Map<String, Map<String, AtomicLong>> topoPerRackCount = new HashMap<>();
for (Map.Entry<String, SchedulerAssignment> entry : assignments.entrySet()) {
SchedulerAssignment sa = entry.getValue();
Map<String, AtomicLong> slotsPerRack = new TreeMap<>();
for (WorkerSlot slot : sa.getSlots()) {
String nodeId = slot.getNodeId();
String rack = supervisorIdToRackName(nodeId);
slotsPerRack.computeIfAbsent(rack, (r) -> new AtomicLong(0)).incrementAndGet();
}
LOG.info("{} => {}", entry.getKey(), slotsPerRack);
topoPerRackCount.put(entry.getKey(), slotsPerRack);
}
Map<String, AtomicLong> simpleCount = topoPerRackCount.get("topology-simple-0");
assertNotNull(simpleCount);
// Because the simple topology was scheduled first we want to be sure that it didn't put anything on
// the GPU nodes.
// Only 1 rack is in use
assertEquals(1, simpleCount.size());
// r001 is the second rack with GPUs
assertFalse(simpleCount.containsKey("r001"));
// r000 is the first rack with no GPUs
assertTrue(simpleCount.containsKey("r000"));
// We don't really care too much about the scheduling of topology-gpu-0, because it was scheduled.
}
use of org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem in project storm by apache.
the class TestNodeSorterHostProximity method testMultipleRacksWithFavoritism.
/**
* Test whether strategy will choose correct rack.
*/
@Test
public void testMultipleRacksWithFavoritism() {
final Map<String, SupervisorDetails> supMap = new HashMap<>();
final int numRacks = 1;
final int numSupersPerRack = 10;
final int numPortsPerSuper = 4;
final int numZonesPerHost = 2;
int rackStartNum = 0;
int supStartNum = 0;
final Map<String, SupervisorDetails> supMapRack0 = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, 400, 8000, Collections.emptyMap(), 1.0);
// generate another rack of supervisors with less resources
supStartNum += numSupersPerRack;
final Map<String, SupervisorDetails> supMapRack1 = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, 200, 4000, Collections.emptyMap(), 1.0);
// generate some supervisors that are depleted of one resource
supStartNum += numSupersPerRack;
final Map<String, SupervisorDetails> supMapRack2 = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, 0, 8000, Collections.emptyMap(), 1.0);
// generate some that has a lot of memory but little of cpu
supStartNum += numSupersPerRack;
final Map<String, SupervisorDetails> supMapRack3 = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, 10, 8000 * 2 + 4000, Collections.emptyMap(), 1.0);
// generate some that has a lot of cpu but little of memory
supStartNum += numSupersPerRack;
final Map<String, SupervisorDetails> supMapRack4 = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, 400 + 200 + 10, 1000, Collections.emptyMap(), 1.0);
supMap.putAll(supMapRack0);
supMap.putAll(supMapRack1);
supMap.putAll(supMapRack2);
supMap.putAll(supMapRack3);
supMap.putAll(supMapRack4);
Config config = createClusterConfig(100, 500, 500, null);
config.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE);
INimbus iNimbus = new INimbusTest();
// create test DNSToSwitchMapping plugin
TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMapRack0, supMapRack1, supMapRack2, supMapRack3, supMapRack4);
Config t1Conf = new Config();
t1Conf.putAll(config);
final List<String> t1FavoredHostNames = Arrays.asList("host-41", "host-42", "host-43");
t1Conf.put(Config.TOPOLOGY_SCHEDULER_FAVORED_NODES, t1FavoredHostNames);
final List<String> t1UnfavoredHostIds = Arrays.asList("host-1", "host-2", "host-3");
t1Conf.put(Config.TOPOLOGY_SCHEDULER_UNFAVORED_NODES, t1UnfavoredHostIds);
// generate topologies
TopologyDetails topo1 = genTopology("topo-1", t1Conf, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user");
Config t2Conf = new Config();
t2Conf.putAll(config);
t2Conf.put(Config.TOPOLOGY_SCHEDULER_FAVORED_NODES, Arrays.asList("host-31", "host-32", "host-33"));
t2Conf.put(Config.TOPOLOGY_SCHEDULER_UNFAVORED_NODES, Arrays.asList("host-11", "host-12", "host-13"));
TopologyDetails topo2 = genTopology("topo-2", t2Conf, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user");
Topologies topologies = new Topologies(topo1, topo2);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
List<String> supHostnames = new LinkedList<>();
for (SupervisorDetails sup : supMap.values()) {
supHostnames.add(sup.getHost());
}
Map<String, List<String>> rackToHosts = testDNSToSwitchMapping.getRackToHosts();
cluster.setNetworkTopography(rackToHosts);
NodeSorterHostProximity nodeSorter = new NodeSorterHostProximity(cluster, topo1, BaseResourceAwareStrategy.NodeSortType.DEFAULT_RAS);
nodeSorter.prepare(null);
List<ObjectResourcesItem> sortedRacks = StreamSupport.stream(nodeSorter.getSortedRacks().spliterator(), false).collect(Collectors.toList());
String rackSummaries = sortedRacks.stream().map(x -> String.format("Rack %s -> scheduled-cnt %d, min-avail %f, avg-avail %f, cpu %f, mem %f", x.id, nodeSorter.getScheduledExecCntByRackId().getOrDefault(x.id, new AtomicInteger(-1)).get(), x.minResourcePercent, x.avgResourcePercent, x.availableResources.getTotalCpu(), x.availableResources.getTotalMemoryMb())).collect(Collectors.joining("\n\t"));
Iterator<ObjectResourcesItem> it = sortedRacks.iterator();
// Ranked first since rack-000 has the most balanced set of resources
Assert.assertEquals("rack-000 should be ordered first", "rack-000", it.next().id);
// Ranked second since rack-1 has a balanced set of resources but less than rack-0
Assert.assertEquals("rack-001 should be ordered second", "rack-001", it.next().id);
// Ranked third since rack-4 has a lot of cpu but not a lot of memory
Assert.assertEquals("rack-004 should be ordered third", "rack-004", it.next().id);
// Ranked fourth since rack-3 has alot of memory but not cpu
Assert.assertEquals("rack-003 should be ordered fourth", "rack-003", it.next().id);
// Ranked last since rack-2 has not cpu resources
Assert.assertEquals("rack-00s2 should be ordered fifth", "rack-002", it.next().id);
}
use of org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem in project storm by apache.
the class TestNodeSorterHostProximity method testMultipleRacksOrderedByCapacity.
/**
* Racks should be returned in order of decreasing capacity.
*/
@Test
public void testMultipleRacksOrderedByCapacity() {
final Map<String, SupervisorDetails> supMap = new HashMap<>();
final int numRacks = 1;
final int numSupersPerRack = 10;
final int numPortsPerSuper = 4;
final int numZonesPerHost = 1;
final double numaResourceMultiplier = 1.0;
int rackStartNum = 0;
int supStartNum = 0;
final Map<String, SupervisorDetails> supMapRack0 = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, 600, 8000 - rackStartNum, Collections.emptyMap(), numaResourceMultiplier);
supStartNum += numSupersPerRack;
final Map<String, SupervisorDetails> supMapRack1 = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, 500, 8000 - rackStartNum, Collections.emptyMap(), numaResourceMultiplier);
supStartNum += numSupersPerRack;
final Map<String, SupervisorDetails> supMapRack2 = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, 400, 8000 - rackStartNum, Collections.emptyMap(), numaResourceMultiplier);
supStartNum += numSupersPerRack;
final Map<String, SupervisorDetails> supMapRack3 = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, 300, 8000 - rackStartNum, Collections.emptyMap(), numaResourceMultiplier);
supStartNum += numSupersPerRack;
final Map<String, SupervisorDetails> supMapRack4 = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, 200, 8000 - rackStartNum, Collections.emptyMap(), numaResourceMultiplier);
// too small to hold topology
supStartNum += numSupersPerRack;
final Map<String, SupervisorDetails> supMapRack5 = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum++, supStartNum, 100, 8000 - rackStartNum, Collections.singletonMap("gpu.count", 0.0), numaResourceMultiplier);
supMap.putAll(supMapRack0);
supMap.putAll(supMapRack1);
supMap.putAll(supMapRack2);
supMap.putAll(supMapRack3);
supMap.putAll(supMapRack4);
supMap.putAll(supMapRack5);
Config config = createClusterConfig(100, 500, 500, null);
config.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE);
INimbus iNimbus = new INimbusTest();
// create test DNSToSwitchMapping plugin
TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMapRack0, supMapRack1, supMapRack2, supMapRack3, supMapRack4, supMapRack5);
// generate topologies
TopologyDetails topo1 = genTopology("topo-1", config, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user");
TopologyDetails topo2 = genTopology("topo-2", config, 8, 0, 2, 0, CURRENT_TIME - 2, 10, "user");
Topologies topologies = new Topologies(topo1, topo2);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts());
NodeSorterHostProximity nodeSorter = new NodeSorterHostProximity(cluster, topo1);
nodeSorter.prepare(null);
List<ObjectResourcesItem> sortedRacks = StreamSupport.stream(nodeSorter.getSortedRacks().spliterator(), false).collect(Collectors.toList());
String rackSummaries = sortedRacks.stream().map(x -> String.format("Rack %s -> scheduled-cnt %d, min-avail %f, avg-avail %f, cpu %f, mem %f", x.id, nodeSorter.getScheduledExecCntByRackId().getOrDefault(x.id, new AtomicInteger(-1)).get(), x.minResourcePercent, x.avgResourcePercent, x.availableResources.getTotalCpu(), x.availableResources.getTotalMemoryMb())).collect(Collectors.joining("\n\t"));
NormalizedResourceRequest topoResourceRequest = topo1.getApproximateTotalResources();
String topoRequest = String.format("Topo %s, approx-requested-resources %s", topo1.getId(), topoResourceRequest.toString());
Iterator<ObjectResourcesItem> it = sortedRacks.iterator();
Assert.assertEquals(topoRequest + "\n\t" + rackSummaries + "\nRack-000 should be ordered first since it has the largest capacity", "rack-000", it.next().id);
Assert.assertEquals(topoRequest + "\n\t" + rackSummaries + "\nrack-001 should be ordered second since it smaller than rack-000", "rack-001", it.next().id);
Assert.assertEquals(topoRequest + "\n\t" + rackSummaries + "\nrack-002 should be ordered third since it is smaller than rack-001", "rack-002", it.next().id);
Assert.assertEquals(topoRequest + "\n\t" + rackSummaries + "\nrack-003 should be ordered fourth since it since it is smaller than rack-002", "rack-003", it.next().id);
Assert.assertEquals(topoRequest + "\n\t" + rackSummaries + "\nrack-004 should be ordered fifth since it since it is smaller than rack-003", "rack-004", it.next().id);
Assert.assertEquals(topoRequest + "\n\t" + rackSummaries + "\nrack-005 should be ordered last since it since it is has smallest capacity", "rack-005", it.next().id);
}
use of org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem in project storm by apache.
the class NodeSorterHostProximity method sortHosts.
/**
* Nodes are sorted by two criteria.
*
* <p>1) the number executors of the topology that needs to be scheduled is already on the node in
* descending order. The reasoning to sort based on criterion 1 is so we schedule the rest of a topology on the same node as the
* existing executors of the topology.
*
* <p>2) the subordinate/subservient resource availability percentage of a node in descending
* order We calculate the resource availability percentage by dividing the resource availability that have exhausted or little of one of
* the resources mentioned above will be ranked after on the node by the resource availability of the entire rack By doing this
* calculation, nodes nodes that have more balanced resource availability. So we will be less likely to pick a node that have a lot of
* one resource but a low amount of another.
*
* @param availHosts a collection of all the hosts we want to sort
* @param rackId the rack id availNodes are a part of
* @return an iterable of sorted hosts.
*/
private Iterable<ObjectResourcesItem> sortHosts(Collection<String> availHosts, ExecutorDetails exec, String rackId, Map<String, AtomicInteger> scheduledCount) {
ObjectResourcesSummary rackResourcesSummary = new ObjectResourcesSummary("RACK");
availHosts.forEach(h -> {
ObjectResourcesItem hostItem = new ObjectResourcesItem(h);
for (RasNode x : hostnameToNodes.get(h)) {
hostItem.add(new ObjectResourcesItem(x.getId(), x.getTotalAvailableResources(), x.getTotalResources(), 0, 0));
}
rackResourcesSummary.addObjectResourcesItem(hostItem);
});
LOG.debug("Rack {}: Overall Avail [ {} ] Total [ {} ]", rackId, rackResourcesSummary.getAvailableResourcesOverall(), rackResourcesSummary.getTotalResourcesOverall());
return sortObjectResources(rackResourcesSummary, exec, (hostId) -> {
AtomicInteger count = scheduledCount.get(hostId);
if (count == null) {
return 0;
}
return count.get();
});
}
use of org.apache.storm.scheduler.resource.strategies.scheduling.ObjectResourcesItem in project storm by apache.
the class NodeSorterHostProximity method sortObjectResourcesGeneric.
/**
* Sort objects by the following two criteria.
*
* <li>the number executors of the topology that needs to be scheduled is already on the
* object (node or rack) in descending order. The reasoning to sort based on criterion 1 is so we schedule the rest
* of a topology on the same object (node or rack) as the existing executors of the topology.</li>
*
* <li>the subordinate/subservient resource availability percentage of a rack in descending order We calculate the
* resource availability percentage by dividing the resource availability of the object (node or rack) by the
* resource availability of the entire rack or cluster depending on if object references a node or a rack.
* How this differs from the DefaultResourceAwareStrategy is that the percentage boosts the node or rack if it is
* requested by the executor that the sorting is being done for and pulls it down if it is not.
* By doing this calculation, objects (node or rack) that have exhausted or little of one of the resources mentioned
* above will be ranked after racks that have more balanced resource availability and nodes or racks that have
* resources that are not requested will be ranked below . So we will be less likely to pick a rack that
* have a lot of one resource but a low amount of another and have a lot of resources that are not requested by the executor.</li>
*
* @param allResources contains all individual ObjectResources as well as cumulative stats
* @param exec executor for which the sorting is done
* @param existingScheduleFunc a function to get existing executors already scheduled on this object
* @return an {@link Iterable} of sorted {@link ObjectResourcesItem}
*/
@Deprecated
private Iterable<ObjectResourcesItem> sortObjectResourcesGeneric(final ObjectResourcesSummary allResources, ExecutorDetails exec, final ExistingScheduleFunc existingScheduleFunc) {
ObjectResourcesSummary affinityBasedAllResources = new ObjectResourcesSummary(allResources);
final NormalizedResourceOffer availableResourcesOverall = allResources.getAvailableResourcesOverall();
final NormalizedResourceRequest requestedResources = (exec != null) ? topologyDetails.getTotalResources(exec) : null;
affinityBasedAllResources.getObjectResources().forEach(x -> {
if (requestedResources != null) {
// negate unrequested resources
x.availableResources.updateForRareResourceAffinity(requestedResources);
}
x.minResourcePercent = availableResourcesOverall.calculateMinPercentageUsedBy(x.availableResources);
x.avgResourcePercent = availableResourcesOverall.calculateAveragePercentageUsedBy(x.availableResources);
LOG.trace("for {}: minResourcePercent={}, avgResourcePercent={}, numExistingSchedule={}", x.id, x.minResourcePercent, x.avgResourcePercent, existingScheduleFunc.getNumExistingSchedule(x.id));
});
Comparator<ObjectResourcesItem> comparator = (o1, o2) -> {
int execsScheduled1 = existingScheduleFunc.getNumExistingSchedule(o1.id);
int execsScheduled2 = existingScheduleFunc.getNumExistingSchedule(o2.id);
if (execsScheduled1 > execsScheduled2) {
return -1;
} else if (execsScheduled1 < execsScheduled2) {
return 1;
}
double o1Avg = o1.avgResourcePercent;
double o2Avg = o2.avgResourcePercent;
if (o1Avg > o2Avg) {
return -1;
} else if (o1Avg < o2Avg) {
return 1;
}
return o1.id.compareTo(o2.id);
};
TreeSet<ObjectResourcesItem> sortedObjectResources = new TreeSet<>(comparator);
sortedObjectResources.addAll(affinityBasedAllResources.getObjectResources());
LOG.debug("Sorted Object Resources: {}", sortedObjectResources);
return sortedObjectResources;
}
Aggregations