use of org.apache.storm.scheduler.resource.ResourceAwareScheduler in project storm by apache.
the class TestNodeSorterHostProximity method testFillUpRackAndSpilloverToNextRack.
/**
* If the topology is too large for one rack, it should be partially scheduled onto the next rack (and next rack only).
*/
@Test
public void testFillUpRackAndSpilloverToNextRack() {
INimbus iNimbus = new INimbusTest();
double compPcore = 100;
double compOnHeap = 775;
double compOffHeap = 25;
int topo1NumSpouts = 1;
int topo1NumBolts = 5;
int topo1SpoutParallelism = 100;
int topo1BoltParallelism = 200;
final int numRacks = 3;
final int numSupersPerRack = 10;
final int numPortsPerSuper = 6;
final int numZonesPerHost = 1;
final double numaResourceMultiplier = 1.0;
int rackStartNum = 0;
int supStartNum = 0;
// not enough for topo1
long compPerRack = (topo1NumSpouts * topo1SpoutParallelism + topo1NumBolts * topo1BoltParallelism) * 4 / 5;
long compPerSuper = compPerRack / numSupersPerRack;
double cpuPerSuper = compPcore * compPerSuper;
double memPerSuper = (compOnHeap + compOffHeap) * compPerSuper;
double topo1MaxHeapSize = memPerSuper;
final String topoName1 = "topology1";
Map<String, SupervisorDetails> supMap = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum, supStartNum, cpuPerSuper, memPerSuper, Collections.emptyMap(), numaResourceMultiplier);
TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMap.values());
Config config = new Config();
config.putAll(createGrasClusterConfig(compPcore, compOnHeap, compOffHeap, null, null));
config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, GenericResourceAwareStrategy.class.getName());
IScheduler scheduler = new ResourceAwareScheduler();
scheduler.prepare(config, new StormMetricsRegistry());
TopologyDetails td1 = genTopology(topoName1, config, topo1NumSpouts, topo1NumBolts, topo1SpoutParallelism, topo1BoltParallelism, 0, 0, "user", topo1MaxHeapSize);
// Schedule the topo1 topology and ensure it fits on 2 racks
Topologies topologies = new Topologies(td1);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts());
scheduler.schedule(topologies, cluster);
Set<String> assignedRacks = cluster.getAssignedRacks(td1.getId());
assertEquals("Racks for topology=" + td1.getId() + " is " + assignedRacks, 2, assignedRacks.size());
}
use of org.apache.storm.scheduler.resource.ResourceAwareScheduler in project storm by apache.
the class TestNodeSorterHostProximity method testPreferRackWithTopoExecutors.
/**
* Rack with low resources should be used to schedule an executor if it has other executors for the same topology.
* <li>Schedule topo1 on one rack</li>
* <li>unassign some executors</li>
* <li>schedule another topology to partially fill up rack1</li>
* <li>Add another rack and schedule topology 1 remaining executors again</li>
* <li>scheduling should utilize all resources on rack1 before before trying next rack</li>
*/
@Test
public void testPreferRackWithTopoExecutors() {
INimbus iNimbus = new INimbusTest();
double compPcore = 100;
double compOnHeap = 775;
double compOffHeap = 25;
int topo1NumSpouts = 1;
int topo1NumBolts = 5;
int topo1SpoutParallelism = 100;
int topo1BoltParallelism = 200;
int topo2NumSpouts = 1;
int topo2NumBolts = 5;
int topo2SpoutParallelism = 10;
int topo2BoltParallelism = 20;
final int numRacks = 3;
final int numSupersPerRack = 10;
final int numPortsPerSuper = 6;
final int numZonesPerHost = 1;
final double numaResourceMultiplier = 1.0;
int rackStartNum = 0;
int supStartNum = 0;
long compPerRack = (topo1NumSpouts * topo1SpoutParallelism + topo1NumBolts * topo1BoltParallelism + // enough for topo1 but not topo1+topo2
topo2NumSpouts * topo2SpoutParallelism);
long compPerSuper = compPerRack / numSupersPerRack;
double cpuPerSuper = compPcore * compPerSuper;
double memPerSuper = (compOnHeap + compOffHeap) * compPerSuper;
double topo1MaxHeapSize = memPerSuper;
double topo2MaxHeapSize = memPerSuper;
final String topoName1 = "topology1";
final String topoName2 = "topology2";
Map<String, SupervisorDetails> supMap = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum, supStartNum, cpuPerSuper, memPerSuper, Collections.emptyMap(), numaResourceMultiplier);
TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMap.values());
Config config = new Config();
config.putAll(createGrasClusterConfig(compPcore, compOnHeap, compOffHeap, null, null));
config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, GenericResourceAwareStrategy.class.getName());
IScheduler scheduler = new ResourceAwareScheduler();
scheduler.prepare(config, new StormMetricsRegistry());
TopologyDetails td1 = genTopology(topoName1, config, topo1NumSpouts, topo1NumBolts, topo1SpoutParallelism, topo1BoltParallelism, 0, 0, "user", topo1MaxHeapSize);
// Schedule the topo1 topology and ensure it fits on 1 rack
Topologies topologies = new Topologies(td1);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts());
scheduler.schedule(topologies, cluster);
Set<String> assignedRacks = cluster.getAssignedRacks(td1.getId());
assertEquals("Racks for topology=" + td1.getId() + " is " + assignedRacks, 1, assignedRacks.size());
TopologyBuilder builder = topologyBuilder(topo2NumSpouts, topo2NumBolts, topo2SpoutParallelism, topo2BoltParallelism);
TopologyDetails td2 = topoToTopologyDetails(topoName2, config, builder.createTopology(), 0, 0, "user", topo2MaxHeapSize);
// Now schedule GPU but with the simple topology in place.
topologies = new Topologies(td1, td2);
cluster = new Cluster(cluster, topologies);
scheduler.schedule(topologies, cluster);
assignedRacks = cluster.getAssignedRacks(td1.getId(), td2.getId());
assertEquals("Racks for topologies=" + td1.getId() + "/" + td2.getId() + " is " + assignedRacks, 2, assignedRacks.size());
// topo2 gets scheduled on its own rack because it is empty and available
assignedRacks = cluster.getAssignedRacks(td2.getId());
assertEquals("Racks for topologies=" + td2.getId() + " is " + assignedRacks, 1, assignedRacks.size());
// now unassign topo2, expect only one rack to be in use; free some slots and reschedule topo1 some topo1 executors
cluster.unassign(td2.getId());
assignedRacks = cluster.getAssignedRacks(td2.getId());
assertEquals("After unassigning topology " + td2.getId() + ", racks for topology=" + td2.getId() + " is " + assignedRacks, 0, assignedRacks.size());
assignedRacks = cluster.getAssignedRacks(td1.getId());
assertEquals("After unassigning topology " + td2.getId() + ", racks for topology=" + td1.getId() + " is " + assignedRacks, 1, assignedRacks.size());
assertFalse("Topology " + td1.getId() + " should be fully assigned before freeing slots", cluster.needsSchedulingRas(td1));
freeSomeWorkerSlots(cluster);
assertTrue("Topology " + td1.getId() + " should need scheduling after freeing slots", cluster.needsSchedulingRas(td1));
// then reschedule executors
scheduler.schedule(topologies, cluster);
// only one rack should be in use by topology1
assignedRacks = cluster.getAssignedRacks(td1.getId());
assertEquals("After reassigning topology " + td2.getId() + ", racks for topology=" + td1.getId() + " is " + assignedRacks, 1, assignedRacks.size());
}
use of org.apache.storm.scheduler.resource.ResourceAwareScheduler in project storm by apache.
the class TestConstraintSolverStrategy method testIntegrationWithRAS.
@Test
public void testIntegrationWithRAS() {
if (!consolidatedConfigFlag) {
LOG.info("Skipping test since bolt-1 maxCoLocationCnt=10 requires consolidatedConfigFlag=true, current={}", consolidatedConfigFlag);
return;
}
Map<String, Object> config = Utils.readDefaultConfig();
config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, ConstraintSolverStrategy.class.getName());
config.put(Config.TOPOLOGY_RAS_CONSTRAINT_MAX_STATE_SEARCH, MAX_TRAVERSAL_DEPTH);
config.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 100_000);
config.put(Config.TOPOLOGY_PRIORITY, 1);
config.put(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT, 10);
config.put(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB, 100);
config.put(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB, 0.0);
List<List<String>> constraints = new LinkedList<>();
addConstraints("spout-0", "bolt-0", constraints);
addConstraints("bolt-1", "bolt-1", constraints);
addConstraints("bolt-1", "bolt-2", constraints);
Map<String, Integer> spreads = new HashMap<String, Integer>();
spreads.put("spout-0", 1);
spreads.put("bolt-1", 10);
setConstraintConfig(constraints, spreads, config);
TopologyDetails topo = genTopology("testTopo", config, 2, 3, 30, 300, 0, 0, "user");
Map<String, TopologyDetails> topoMap = new HashMap<>();
topoMap.put(topo.getId(), topo);
Topologies topologies = new Topologies(topoMap);
// Fails with 36 supervisors, works with 37
Map<String, SupervisorDetails> supMap = genSupervisors(37, 16, 400, 1024 * 4);
Cluster cluster = makeCluster(topologies, supMap);
ResourceAwareScheduler rs = new ResourceAwareScheduler();
rs.prepare(config, new StormMetricsRegistry());
try {
rs.schedule(topologies, cluster);
assertStatusSuccess(cluster, topo.getId());
Assert.assertEquals("topo all executors scheduled?", 0, cluster.getUnassignedExecutors(topo).size());
} finally {
rs.cleanup();
}
// simulate worker loss
Map<ExecutorDetails, WorkerSlot> newExecToSlot = new HashMap<>();
Map<ExecutorDetails, WorkerSlot> execToSlot = cluster.getAssignmentById(topo.getId()).getExecutorToSlot();
Iterator<Map.Entry<ExecutorDetails, WorkerSlot>> it = execToSlot.entrySet().iterator();
for (int i = 0; i < execToSlot.size() / 2; i++) {
ExecutorDetails exec = it.next().getKey();
WorkerSlot ws = it.next().getValue();
newExecToSlot.put(exec, ws);
}
Map<String, SchedulerAssignment> newAssignments = new HashMap<>();
newAssignments.put(topo.getId(), new SchedulerAssignmentImpl(topo.getId(), newExecToSlot, null, null));
cluster.setAssignments(newAssignments, false);
rs.prepare(config, new StormMetricsRegistry());
try {
rs.schedule(topologies, cluster);
assertStatusSuccess(cluster, topo.getId());
Assert.assertEquals("topo all executors scheduled?", 0, cluster.getUnassignedExecutors(topo).size());
} finally {
rs.cleanup();
}
}
use of org.apache.storm.scheduler.resource.ResourceAwareScheduler in project storm by apache.
the class TestGenericResourceAwareStrategy method testGenericResourceAwareStrategySharedMemory.
/**
* test if the scheduling logic for the GenericResourceAwareStrategy is correct.
*/
@Test
public void testGenericResourceAwareStrategySharedMemory() {
int spoutParallelism = 2;
int boltParallelism = 2;
int numBolts = 3;
double cpuPercent = 10;
double memoryOnHeap = 10;
double memoryOffHeap = 10;
double sharedOnHeap = 500;
double sharedOffHeapNode = 700;
double sharedOffHeapWorker = 500;
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new TestSpout(), spoutParallelism).addResource("gpu.count", 1.0);
builder.setBolt("bolt-1", new TestBolt(), boltParallelism).addSharedMemory(new SharedOffHeapWithinWorker(sharedOffHeapWorker, "bolt-1 shared off heap worker")).shuffleGrouping("spout");
builder.setBolt("bolt-2", new TestBolt(), boltParallelism).addSharedMemory(new SharedOffHeapWithinNode(sharedOffHeapNode, "bolt-2 shared node")).shuffleGrouping("bolt-1");
builder.setBolt("bolt-3", new TestBolt(), boltParallelism).addSharedMemory(new SharedOnHeap(sharedOnHeap, "bolt-3 shared worker")).shuffleGrouping("bolt-2");
StormTopology stormToplogy = builder.createTopology();
INimbus iNimbus = new INimbusTest();
Config conf = createGrasClusterConfig(cpuPercent, memoryOnHeap, memoryOffHeap, null, Collections.emptyMap());
Map<String, Double> genericResourcesMap = new HashMap<>();
genericResourcesMap.put("gpu.count", 1.0);
Map<String, SupervisorDetails> supMap = genSupervisors(4, 4, 500, 2000, genericResourcesMap);
conf.put(Config.TOPOLOGY_PRIORITY, 0);
conf.put(Config.TOPOLOGY_NAME, "testTopology");
conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000);
TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormToplogy, 0, genExecsAndComps(stormToplogy), currentTime, "user");
Topologies topologies = new Topologies(topo);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf);
scheduler = new ResourceAwareScheduler();
scheduler.prepare(conf, new StormMetricsRegistry());
scheduler.schedule(topologies, cluster);
for (Entry<String, SupervisorResources> entry : cluster.getSupervisorsResourcesMap().entrySet()) {
String supervisorId = entry.getKey();
SupervisorResources resources = entry.getValue();
assertTrue(supervisorId, resources.getTotalCpu() >= resources.getUsedCpu());
assertTrue(supervisorId, resources.getTotalMem() >= resources.getUsedMem());
}
// If we didn't take GPUs into account everything would fit under a single slot
// But because there is only 1 GPU per node, and each of the 2 spouts needs a GPU
// It has to be scheduled on at least 2 nodes, and hence 2 slots.
// Because of this all of the bolts will be scheduled on a single slot with one of
// the spouts and the other spout is on its own slot. So everything that can be shared is
// shared.
int totalNumberOfTasks = (spoutParallelism + (boltParallelism * numBolts));
double totalExpectedCPU = totalNumberOfTasks * cpuPercent;
double totalExpectedOnHeap = (totalNumberOfTasks * memoryOnHeap) + sharedOnHeap;
double totalExpectedWorkerOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWorker;
SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId());
Set<WorkerSlot> slots = assignment.getSlots();
Map<String, Double> nodeToTotalShared = assignment.getNodeIdToTotalSharedOffHeapNodeMemory();
LOG.info("NODE TO SHARED OFF HEAP {}", nodeToTotalShared);
Map<WorkerSlot, WorkerResources> scheduledResources = assignment.getScheduledResources();
assertEquals(2, slots.size());
assertEquals(2, nodeToTotalShared.size());
assertEquals(2, scheduledResources.size());
double totalFoundCPU = 0.0;
double totalFoundOnHeap = 0.0;
double totalFoundWorkerOffHeap = 0.0;
for (WorkerSlot ws : slots) {
WorkerResources resources = scheduledResources.get(ws);
totalFoundCPU += resources.get_cpu();
totalFoundOnHeap += resources.get_mem_on_heap();
totalFoundWorkerOffHeap += resources.get_mem_off_heap();
}
assertEquals(totalExpectedCPU, totalFoundCPU, 0.01);
assertEquals(totalExpectedOnHeap, totalFoundOnHeap, 0.01);
assertEquals(totalExpectedWorkerOffHeap, totalFoundWorkerOffHeap, 0.01);
assertEquals(sharedOffHeapNode, nodeToTotalShared.values().stream().mapToDouble((d) -> d).sum(), 0.01);
assertEquals(sharedOnHeap, scheduledResources.values().stream().mapToDouble(WorkerResources::get_shared_mem_on_heap).sum(), 0.01);
assertEquals(sharedOffHeapWorker, scheduledResources.values().stream().mapToDouble(WorkerResources::get_shared_mem_off_heap).sum(), 0.01);
}
use of org.apache.storm.scheduler.resource.ResourceAwareScheduler in project storm by apache.
the class TestGenericResourceAwareStrategy method testGenericResourceAwareStrategyWithSettingAckerExecutors.
/**
* Test if the scheduling logic for the GenericResourceAwareStrategy is correct
* with setting {@link Config#TOPOLOGY_ACKER_EXECUTORS}.
*
* Test details refer to {@link TestDefaultResourceAwareStrategy#testDefaultResourceAwareStrategyWithSettingAckerExecutors(int)}
*/
@ParameterizedTest
@ValueSource(ints = { -1, 0, 2, 200 })
public void testGenericResourceAwareStrategyWithSettingAckerExecutors(int numOfAckersPerWorker) throws InvalidTopologyException {
int spoutParallelism = 1;
int boltParallelism = 2;
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("spout", new TestSpout(), spoutParallelism);
builder.setBolt("bolt-1", new TestBolt(), boltParallelism).shuffleGrouping("spout");
builder.setBolt("bolt-2", new TestBolt(), boltParallelism).shuffleGrouping("bolt-1").addResource("gpu.count", 1.0);
builder.setBolt("bolt-3", new TestBolt(), boltParallelism).shuffleGrouping("bolt-2").addResource("gpu.count", 2.0);
String topoName = "testTopology";
StormTopology stormToplogy = builder.createTopology();
INimbus iNimbus = new INimbusTest();
Config conf = createGrasClusterConfig(50, 500, 0, null, Collections.emptyMap());
Map<String, Double> genericResourcesMap = new HashMap<>();
genericResourcesMap.put("gpu.count", 2.0);
Map<String, SupervisorDetails> supMap = genSupervisors(4, 4, 200, 2000, genericResourcesMap);
conf.put(Config.TOPOLOGY_PRIORITY, 0);
conf.put(Config.TOPOLOGY_NAME, topoName);
conf.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 2000);
conf.put(Config.TOPOLOGY_SUBMITTER_USER, "user");
conf.put(Config.TOPOLOGY_ACKER_EXECUTORS, 4);
if (numOfAckersPerWorker == -1) {
// Leave topology.acker.executors.per.worker unset
} else {
conf.put(Config.TOPOLOGY_RAS_ACKER_EXECUTORS_PER_WORKER, numOfAckersPerWorker);
}
int estimatedNumWorker = ServerUtils.getEstimatedWorkerCountForRasTopo(conf, stormToplogy);
Nimbus.setUpAckerExecutorConfigs(topoName, conf, conf, estimatedNumWorker);
conf.put(Config.TOPOLOGY_ACKER_RESOURCES_ONHEAP_MEMORY_MB, 250);
conf.put(Config.TOPOLOGY_ACKER_CPU_PCORE_PERCENT, 50);
TopologyDetails topo = new TopologyDetails("testTopology-id", conf, stormToplogy, 0, genExecsAndComps(StormCommon.systemTopology(conf, stormToplogy)), currentTime, "user");
Topologies topologies = new Topologies(topo);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf);
scheduler = new ResourceAwareScheduler();
scheduler.prepare(conf, new StormMetricsRegistry());
scheduler.schedule(topologies, cluster);
// We need to have 3 slots on 3 separate hosts. The topology needs 6 GPUs 3500 MB memory and 350% CPU
// The bolt-3 instances must be on separate nodes because they each need 2 GPUs.
// The bolt-2 instances must be on the same node as they each need 1 GPU
// (this assumes that we are packing the components to avoid fragmentation).
// The bolt-1 and spout instances fill in the rest.
// Ordered execs: [[6, 6], [2, 2], [4, 4], [5, 5], [1, 1], [3, 3], [0, 0]]
// Ackers: [[8, 8], [7, 7]] (+ [[9, 9], [10, 10]] when numOfAckersPerWorker=2)
HashSet<HashSet<ExecutorDetails>> expectedScheduling = new HashSet<>();
expectedScheduling.add(new HashSet<>(Arrays.asList(// bolt-3 - 500 MB, 50% CPU, 2 GPU
new ExecutorDetails(3, 3))));
// Total 500 MB, 50% CPU, 2 - GPU -> this node has 1500 MB, 150% cpu, 0 GPU left
expectedScheduling.add(new HashSet<>(Arrays.asList(// acker - 250 MB, 50% CPU, 0 GPU
new ExecutorDetails(7, 7), // acker - 250 MB, 50% CPU, 0 GPU
new ExecutorDetails(8, 8), // bolt-2 - 500 MB, 50% CPU, 1 GPU
new ExecutorDetails(6, 6), // bolt-1 - 500 MB, 50% CPU, 0 GPU
new ExecutorDetails(2, 2))));
// Total 1500 MB, 200% CPU, 2 GPU -> this node has 500 MB, 0% CPU, 0 GPU left
expectedScheduling.add(new HashSet<>(Arrays.asList(// acker- 250 MB, 50% CPU, 0 GPU
new ExecutorDetails(9, 9), // acker- 250 MB, 50% CPU, 0 GPU
new ExecutorDetails(10, 10), // bolt-1 - 500 MB, 50% CPU, 0 GPU
new ExecutorDetails(1, 1), // bolt-3 500 MB, 50% cpu, 2 GPU
new ExecutorDetails(4, 4))));
// Total 1500 MB, 200% CPU, 2 GPU -> this node has 500 MB, 0% CPU, 0 GPU left
expectedScheduling.add(new HashSet<>(Arrays.asList(// Spout - 500 MB, 50% CPU, 0 GPU
new ExecutorDetails(0, 0), // bolt-2 - 500 MB, 50% CPU, 1 GPU
new ExecutorDetails(5, 5))));
// Total 1000 MB, 100% CPU, 2 GPU -> this node has 1000 MB, 100% CPU, 0 GPU left
HashSet<HashSet<ExecutorDetails>> foundScheduling = new HashSet<>();
SchedulerAssignment assignment = cluster.getAssignmentById("testTopology-id");
for (Collection<ExecutorDetails> execs : assignment.getSlotToExecutors().values()) {
foundScheduling.add(new HashSet<>(execs));
}
assertEquals(expectedScheduling, foundScheduling);
}
Aggregations