use of org.apache.storm.scheduler.IScheduler in project storm by apache.
the class Nimbus method makeScheduler.
private static IScheduler makeScheduler(Map<String, Object> conf, INimbus inimbus) {
String schedClass = (String) conf.get(DaemonConfig.STORM_SCHEDULER);
IScheduler scheduler = inimbus == null ? null : inimbus.getForcedScheduler();
if (scheduler != null) {
LOG.info("Using forced scheduler from INimbus {} {}", scheduler.getClass(), scheduler);
} else if (schedClass != null) {
LOG.info("Using custom scheduler: {}", schedClass);
scheduler = ReflectionUtils.newInstance(schedClass);
} else {
LOG.info("Using default scheduler");
scheduler = new DefaultScheduler();
}
return scheduler;
}
use of org.apache.storm.scheduler.IScheduler in project storm by apache.
the class TestNodeSorterHostProximity method testAntiAffinityWithMultipleTopologies.
/**
* Schedule two topologies, once with special resources and another without.
* There are enough special resources to hold one topology with special resource ("my.gpu").
* If the sort order is incorrect, scheduling will not succeed.
*/
@Test
public void testAntiAffinityWithMultipleTopologies() {
INimbus iNimbus = new INimbusTest();
Map<String, SupervisorDetails> supMap = genSupervisorsWithRacks(1, 40, 66, 0, 0, 4700, 226200, new HashMap<>());
HashMap<String, Double> extraResources = new HashMap<>();
extraResources.put("my.gpu", 1.0);
supMap.putAll(genSupervisorsWithRacks(1, 40, 66, 1, 0, 4700, 226200, extraResources));
Config config = new Config();
config.putAll(createGrasClusterConfig(88, 775, 25, null, null));
IScheduler scheduler = new ResourceAwareScheduler();
scheduler.prepare(config, new StormMetricsRegistry());
TopologyDetails tdSimple = genTopology("topology-simple", config, 1, 5, 100, 300, 0, 0, "user", 8192);
// Schedule the simple topology first
Topologies topologies = new Topologies(tdSimple);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
{
NodeSorterHostProximity nodeSorter = new NodeSorterHostProximity(cluster, tdSimple);
for (ExecutorDetails exec : tdSimple.getExecutors()) {
nodeSorter.prepare(exec);
List<ObjectResourcesItem> sortedRacks = StreamSupport.stream(nodeSorter.getSortedRacks().spliterator(), false).collect(Collectors.toList());
String rackSummaries = StreamSupport.stream(sortedRacks.spliterator(), false).map(x -> String.format("Rack %s -> scheduled-cnt %d, min-avail %f, avg-avail %f, cpu %f, mem %f", x.id, nodeSorter.getScheduledExecCntByRackId().getOrDefault(x.id, new AtomicInteger(-1)).get(), x.minResourcePercent, x.avgResourcePercent, x.availableResources.getTotalCpu(), x.availableResources.getTotalMemoryMb())).collect(Collectors.joining("\n\t"));
NormalizedResourceRequest topoResourceRequest = tdSimple.getApproximateTotalResources();
String topoRequest = String.format("Topo %s, approx-requested-resources %s", tdSimple.getId(), topoResourceRequest.toString());
Assert.assertEquals(rackSummaries + "\n# of racks sorted", 2, sortedRacks.size());
Assert.assertEquals(rackSummaries + "\nFirst rack sorted", "rack-000", sortedRacks.get(0).id);
Assert.assertEquals(rackSummaries + "\nSecond rack sorted", "rack-001", sortedRacks.get(1).id);
}
}
scheduler.schedule(topologies, cluster);
TopologyBuilder builder = topologyBuilder(1, 5, 100, 300);
builder.setBolt("gpu-bolt", new TestBolt(), 40).addResource("my.gpu", 1.0).shuffleGrouping("spout-0");
TopologyDetails tdGpu = topoToTopologyDetails("topology-gpu", config, builder.createTopology(), 0, 0, "user", 8192);
// Now schedule GPU but with the simple topology in place.
topologies = new Topologies(tdSimple, tdGpu);
cluster = new Cluster(cluster, topologies);
{
NodeSorterHostProximity nodeSorter = new NodeSorterHostProximity(cluster, tdGpu);
for (ExecutorDetails exec : tdGpu.getExecutors()) {
String comp = tdGpu.getComponentFromExecutor(exec);
nodeSorter.prepare(exec);
List<ObjectResourcesItem> sortedRacks = StreamSupport.stream(nodeSorter.getSortedRacks().spliterator(), false).collect(Collectors.toList());
String rackSummaries = sortedRacks.stream().map(x -> String.format("Rack %s -> scheduled-cnt %d, min-avail %f, avg-avail %f, cpu %f, mem %f", x.id, nodeSorter.getScheduledExecCntByRackId().getOrDefault(x.id, new AtomicInteger(-1)).get(), x.minResourcePercent, x.avgResourcePercent, x.availableResources.getTotalCpu(), x.availableResources.getTotalMemoryMb())).collect(Collectors.joining("\n\t"));
NormalizedResourceRequest topoResourceRequest = tdSimple.getApproximateTotalResources();
String topoRequest = String.format("Topo %s, approx-requested-resources %s", tdSimple.getId(), topoResourceRequest.toString());
Assert.assertEquals(rackSummaries + "\n# of racks sorted", 2, sortedRacks.size());
if (comp.equals("gpu-bolt")) {
Assert.assertEquals(rackSummaries + "\nFirst rack sorted for " + comp, "rack-001", sortedRacks.get(0).id);
Assert.assertEquals(rackSummaries + "\nSecond rack sorted for " + comp, "rack-000", sortedRacks.get(1).id);
} else {
Assert.assertEquals(rackSummaries + "\nFirst rack sorted for " + comp, "rack-000", sortedRacks.get(0).id);
Assert.assertEquals(rackSummaries + "\nSecond rack sorted for " + comp, "rack-001", sortedRacks.get(1).id);
}
}
}
scheduler.schedule(topologies, cluster);
Map<String, SchedulerAssignment> assignments = new TreeMap<>(cluster.getAssignments());
assertEquals(2, assignments.size());
Map<String, Map<String, AtomicLong>> topoPerRackCount = new HashMap<>();
for (Map.Entry<String, SchedulerAssignment> entry : assignments.entrySet()) {
SchedulerAssignment sa = entry.getValue();
Map<String, AtomicLong> slotsPerRack = new TreeMap<>();
for (WorkerSlot slot : sa.getSlots()) {
String nodeId = slot.getNodeId();
String rack = supervisorIdToRackName(nodeId);
slotsPerRack.computeIfAbsent(rack, (r) -> new AtomicLong(0)).incrementAndGet();
}
LOG.info("{} => {}", entry.getKey(), slotsPerRack);
topoPerRackCount.put(entry.getKey(), slotsPerRack);
}
Map<String, AtomicLong> simpleCount = topoPerRackCount.get("topology-simple-0");
assertNotNull(simpleCount);
// Because the simple topology was scheduled first we want to be sure that it didn't put anything on
// the GPU nodes.
// Only 1 rack is in use
assertEquals(1, simpleCount.size());
// r001 is the second rack with GPUs
assertFalse(simpleCount.containsKey("r001"));
// r000 is the first rack with no GPUs
assertTrue(simpleCount.containsKey("r000"));
// We don't really care too much about the scheduling of topology-gpu-0, because it was scheduled.
}
use of org.apache.storm.scheduler.IScheduler in project storm by apache.
the class TestNodeSorterHostProximity method testFillUpRackAndSpilloverToNextRack.
/**
* If the topology is too large for one rack, it should be partially scheduled onto the next rack (and next rack only).
*/
@Test
public void testFillUpRackAndSpilloverToNextRack() {
INimbus iNimbus = new INimbusTest();
double compPcore = 100;
double compOnHeap = 775;
double compOffHeap = 25;
int topo1NumSpouts = 1;
int topo1NumBolts = 5;
int topo1SpoutParallelism = 100;
int topo1BoltParallelism = 200;
final int numRacks = 3;
final int numSupersPerRack = 10;
final int numPortsPerSuper = 6;
final int numZonesPerHost = 1;
final double numaResourceMultiplier = 1.0;
int rackStartNum = 0;
int supStartNum = 0;
// not enough for topo1
long compPerRack = (topo1NumSpouts * topo1SpoutParallelism + topo1NumBolts * topo1BoltParallelism) * 4 / 5;
long compPerSuper = compPerRack / numSupersPerRack;
double cpuPerSuper = compPcore * compPerSuper;
double memPerSuper = (compOnHeap + compOffHeap) * compPerSuper;
double topo1MaxHeapSize = memPerSuper;
final String topoName1 = "topology1";
Map<String, SupervisorDetails> supMap = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum, supStartNum, cpuPerSuper, memPerSuper, Collections.emptyMap(), numaResourceMultiplier);
TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMap.values());
Config config = new Config();
config.putAll(createGrasClusterConfig(compPcore, compOnHeap, compOffHeap, null, null));
config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, GenericResourceAwareStrategy.class.getName());
IScheduler scheduler = new ResourceAwareScheduler();
scheduler.prepare(config, new StormMetricsRegistry());
TopologyDetails td1 = genTopology(topoName1, config, topo1NumSpouts, topo1NumBolts, topo1SpoutParallelism, topo1BoltParallelism, 0, 0, "user", topo1MaxHeapSize);
// Schedule the topo1 topology and ensure it fits on 2 racks
Topologies topologies = new Topologies(td1);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts());
scheduler.schedule(topologies, cluster);
Set<String> assignedRacks = cluster.getAssignedRacks(td1.getId());
assertEquals("Racks for topology=" + td1.getId() + " is " + assignedRacks, 2, assignedRacks.size());
}
use of org.apache.storm.scheduler.IScheduler in project storm by apache.
the class TestNodeSorterHostProximity method testPreferRackWithTopoExecutors.
/**
* Rack with low resources should be used to schedule an executor if it has other executors for the same topology.
* <li>Schedule topo1 on one rack</li>
* <li>unassign some executors</li>
* <li>schedule another topology to partially fill up rack1</li>
* <li>Add another rack and schedule topology 1 remaining executors again</li>
* <li>scheduling should utilize all resources on rack1 before before trying next rack</li>
*/
@Test
public void testPreferRackWithTopoExecutors() {
INimbus iNimbus = new INimbusTest();
double compPcore = 100;
double compOnHeap = 775;
double compOffHeap = 25;
int topo1NumSpouts = 1;
int topo1NumBolts = 5;
int topo1SpoutParallelism = 100;
int topo1BoltParallelism = 200;
int topo2NumSpouts = 1;
int topo2NumBolts = 5;
int topo2SpoutParallelism = 10;
int topo2BoltParallelism = 20;
final int numRacks = 3;
final int numSupersPerRack = 10;
final int numPortsPerSuper = 6;
final int numZonesPerHost = 1;
final double numaResourceMultiplier = 1.0;
int rackStartNum = 0;
int supStartNum = 0;
long compPerRack = (topo1NumSpouts * topo1SpoutParallelism + topo1NumBolts * topo1BoltParallelism + // enough for topo1 but not topo1+topo2
topo2NumSpouts * topo2SpoutParallelism);
long compPerSuper = compPerRack / numSupersPerRack;
double cpuPerSuper = compPcore * compPerSuper;
double memPerSuper = (compOnHeap + compOffHeap) * compPerSuper;
double topo1MaxHeapSize = memPerSuper;
double topo2MaxHeapSize = memPerSuper;
final String topoName1 = "topology1";
final String topoName2 = "topology2";
Map<String, SupervisorDetails> supMap = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum, supStartNum, cpuPerSuper, memPerSuper, Collections.emptyMap(), numaResourceMultiplier);
TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMap.values());
Config config = new Config();
config.putAll(createGrasClusterConfig(compPcore, compOnHeap, compOffHeap, null, null));
config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, GenericResourceAwareStrategy.class.getName());
IScheduler scheduler = new ResourceAwareScheduler();
scheduler.prepare(config, new StormMetricsRegistry());
TopologyDetails td1 = genTopology(topoName1, config, topo1NumSpouts, topo1NumBolts, topo1SpoutParallelism, topo1BoltParallelism, 0, 0, "user", topo1MaxHeapSize);
// Schedule the topo1 topology and ensure it fits on 1 rack
Topologies topologies = new Topologies(td1);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts());
scheduler.schedule(topologies, cluster);
Set<String> assignedRacks = cluster.getAssignedRacks(td1.getId());
assertEquals("Racks for topology=" + td1.getId() + " is " + assignedRacks, 1, assignedRacks.size());
TopologyBuilder builder = topologyBuilder(topo2NumSpouts, topo2NumBolts, topo2SpoutParallelism, topo2BoltParallelism);
TopologyDetails td2 = topoToTopologyDetails(topoName2, config, builder.createTopology(), 0, 0, "user", topo2MaxHeapSize);
// Now schedule GPU but with the simple topology in place.
topologies = new Topologies(td1, td2);
cluster = new Cluster(cluster, topologies);
scheduler.schedule(topologies, cluster);
assignedRacks = cluster.getAssignedRacks(td1.getId(), td2.getId());
assertEquals("Racks for topologies=" + td1.getId() + "/" + td2.getId() + " is " + assignedRacks, 2, assignedRacks.size());
// topo2 gets scheduled on its own rack because it is empty and available
assignedRacks = cluster.getAssignedRacks(td2.getId());
assertEquals("Racks for topologies=" + td2.getId() + " is " + assignedRacks, 1, assignedRacks.size());
// now unassign topo2, expect only one rack to be in use; free some slots and reschedule topo1 some topo1 executors
cluster.unassign(td2.getId());
assignedRacks = cluster.getAssignedRacks(td2.getId());
assertEquals("After unassigning topology " + td2.getId() + ", racks for topology=" + td2.getId() + " is " + assignedRacks, 0, assignedRacks.size());
assignedRacks = cluster.getAssignedRacks(td1.getId());
assertEquals("After unassigning topology " + td2.getId() + ", racks for topology=" + td1.getId() + " is " + assignedRacks, 1, assignedRacks.size());
assertFalse("Topology " + td1.getId() + " should be fully assigned before freeing slots", cluster.needsSchedulingRas(td1));
freeSomeWorkerSlots(cluster);
assertTrue("Topology " + td1.getId() + " should need scheduling after freeing slots", cluster.needsSchedulingRas(td1));
// then reschedule executors
scheduler.schedule(topologies, cluster);
// only one rack should be in use by topology1
assignedRacks = cluster.getAssignedRacks(td1.getId());
assertEquals("After reassigning topology " + td2.getId() + ", racks for topology=" + td1.getId() + " is " + assignedRacks, 1, assignedRacks.size());
}
use of org.apache.storm.scheduler.IScheduler in project storm by apache.
the class TestLargeCluster method testLargeCluster.
/**
* Create a large cluster, read topologies and configuration from resource directory and schedule.
*
* @throws Exception upon error.
*/
@Test
public void testLargeCluster() throws Exception {
for (TEST_CLUSTER_NAME testClusterName : TEST_CLUSTER_NAME.values()) {
LOG.info("********************************************");
LOG.info("testLargeCluster: Start Processing cluster {}", testClusterName.getClusterName());
String resourcePath = testClusterName.getResourcePath();
Map<String, SupervisorDetails> supervisors = createSupervisors(testClusterName, 0);
TopologyDetails[] topoDetailsArray = createTopoDetailsArray(resourcePath, false);
Assert.assertTrue("No topologies found for cluster " + testClusterName.getClusterName(), topoDetailsArray.length > 0);
Topologies topologies = new Topologies(topoDetailsArray);
Config confWithDefaultStrategy = new Config();
confWithDefaultStrategy.putAll(topoDetailsArray[0].getConf());
confWithDefaultStrategy.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, DefaultResourceAwareStrategy.class.getName());
confWithDefaultStrategy.put(Config.STORM_NETWORK_TOPOGRAPHY_PLUGIN, TestUtilsForResourceAwareScheduler.GenSupervisorsDnsToSwitchMapping.class.getName());
INimbus iNimbus = new INimbusTest();
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supervisors, new HashMap<>(), topologies, confWithDefaultStrategy);
scheduler = new ResourceAwareScheduler();
List<Class> classesToDebug = Arrays.asList(DefaultResourceAwareStrategy.class, GenericResourceAwareStrategy.class, ResourceAwareScheduler.class, Cluster.class);
// switch to Level.DEBUG for verbose otherwise Level.INFO
Level logLevel = Level.INFO;
classesToDebug.forEach(x -> Configurator.setLevel(x.getName(), logLevel));
long startTime = System.currentTimeMillis();
scheduler.prepare(confWithDefaultStrategy, new StormMetricsRegistry());
scheduler.schedule(topologies, cluster);
long endTime = System.currentTimeMillis();
LOG.info("Cluster={} Scheduling Time: {} topologies in {} seconds", testClusterName.getClusterName(), topoDetailsArray.length, (endTime - startTime) / 1000.0);
for (TopologyDetails td : topoDetailsArray) {
TestUtilsForResourceAwareScheduler.assertTopologiesFullyScheduled(cluster, td.getName());
}
// Remove topology and reschedule it
for (int i = 0; i < topoDetailsArray.length; i++) {
startTime = System.currentTimeMillis();
TopologyDetails topoDetails = topoDetailsArray[i];
cluster.unassign(topoDetails.getId());
LOG.info("Cluster={}, ({}) Removed topology {}", testClusterName.getClusterName(), i, topoDetails.getName());
IScheduler rescheduler = new ResourceAwareScheduler();
rescheduler.prepare(confWithDefaultStrategy, new StormMetricsRegistry());
rescheduler.schedule(topologies, cluster);
TestUtilsForResourceAwareScheduler.assertTopologiesFullyScheduled(cluster, topoDetails.getName());
endTime = System.currentTimeMillis();
LOG.info("Cluster={}, ({}) Scheduling Time: Removed topology {} and rescheduled in {} seconds", testClusterName.getClusterName(), i, topoDetails.getName(), (endTime - startTime) / 1000.0);
}
classesToDebug.forEach(x -> Configurator.setLevel(x.getName(), Level.INFO));
LOG.info("testLargeCluster: End Processing cluster {}", testClusterName.getClusterName());
LOG.info("********************************************");
}
}
Aggregations