use of org.apache.storm.topology.TopologyBuilder in project storm by apache.
the class TestResourceAwareScheduler method addTopologyBlockToMap.
// Create multiple copies of a test topology
private void addTopologyBlockToMap(Map<String, TopologyDetails> topologyMap, String baseName, Config config, double spoutMemoryLoad, int[] blockIndices) {
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("testSpout", new TestSpout(), 1).setMemoryLoad(spoutMemoryLoad);
StormTopology stormTopology = builder.createTopology();
Map<ExecutorDetails, String> executorMap = genExecsAndComps(stormTopology);
for (int i = blockIndices[0]; i <= blockIndices[1]; ++i) {
TopologyDetails topo = new TopologyDetails(baseName + i, config, stormTopology, 0, executorMap, 0, "user");
topologyMap.put(topo.getId(), topo);
}
}
use of org.apache.storm.topology.TopologyBuilder in project storm by apache.
the class TestResourceAwareScheduler method testResourceLimitation.
@Test
public void testResourceLimitation() {
INimbus iNimbus = new INimbusTest();
Map<String, SupervisorDetails> supMap = genSupervisors(2, 2, 400, 2000);
// a topology with multiple spouts
TopologyBuilder builder1 = new TopologyBuilder();
builder1.setSpout("wordSpout", new TestWordSpout(), 2).setCPULoad(250.0).setMemoryLoad(1000.0, 200.0);
builder1.setBolt("wordCountBolt", new TestWordCounter(), 1).shuffleGrouping("wordSpout").setCPULoad(100.0).setMemoryLoad(500.0, 100.0);
StormTopology stormTopology1 = builder1.createTopology();
Config config = new Config();
config.putAll(defaultTopologyConf);
Map<ExecutorDetails, String> executorMap1 = genExecsAndComps(stormTopology1);
TopologyDetails topology1 = new TopologyDetails("topology1", config, stormTopology1, 2, executorMap1, 0, "user");
ResourceAwareScheduler rs = new ResourceAwareScheduler();
scheduler = rs;
Topologies topologies = new Topologies(topology1);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
rs.prepare(config, new StormMetricsRegistry());
rs.schedule(topologies, cluster);
SchedulerAssignment assignment1 = cluster.getAssignmentById(topology1.getId());
Set<WorkerSlot> assignedSlots1 = assignment1.getSlots();
Set<String> nodesIDs1 = new HashSet<>();
for (WorkerSlot slot : assignedSlots1) {
nodesIDs1.add(slot.getNodeId());
}
Collection<ExecutorDetails> executors1 = assignment1.getExecutors();
List<Double> assignedExecutorMemory = new ArrayList<>();
List<Double> assignedExecutorCpu = new ArrayList<>();
for (ExecutorDetails executor : executors1) {
assignedExecutorMemory.add(topology1.getTotalMemReqTask(executor));
assignedExecutorCpu.add(topology1.getTotalCpuReqTask(executor));
}
Collections.sort(assignedExecutorCpu);
Collections.sort(assignedExecutorMemory);
Map<ExecutorDetails, SupervisorDetails> executorToSupervisor = new HashMap<>();
Map<SupervisorDetails, List<ExecutorDetails>> supervisorToExecutors = new HashMap<>();
Map<Double, Double> cpuAvailableToUsed = new HashMap<>();
Map<Double, Double> memoryAvailableToUsed = new HashMap<>();
for (Map.Entry<ExecutorDetails, WorkerSlot> entry : assignment1.getExecutorToSlot().entrySet()) {
executorToSupervisor.put(entry.getKey(), cluster.getSupervisorById(entry.getValue().getNodeId()));
}
for (Map.Entry<ExecutorDetails, SupervisorDetails> entry : executorToSupervisor.entrySet()) {
supervisorToExecutors.computeIfAbsent(entry.getValue(), k -> new ArrayList<>()).add(entry.getKey());
}
for (Map.Entry<SupervisorDetails, List<ExecutorDetails>> entry : supervisorToExecutors.entrySet()) {
Double supervisorTotalCpu = entry.getKey().getTotalCpu();
Double supervisorTotalMemory = entry.getKey().getTotalMemory();
Double supervisorUsedCpu = 0.0;
Double supervisorUsedMemory = 0.0;
for (ExecutorDetails executor : entry.getValue()) {
supervisorUsedMemory += topology1.getTotalCpuReqTask(executor);
supervisorTotalCpu += topology1.getTotalMemReqTask(executor);
}
cpuAvailableToUsed.put(supervisorTotalCpu, supervisorUsedCpu);
memoryAvailableToUsed.put(supervisorTotalMemory, supervisorUsedMemory);
}
// executor0 resides one one worker (on one), executor1 and executor2 on another worker (on the other node)
assertEquals(2, assignedSlots1.size());
assertEquals(2, nodesIDs1.size());
assertEquals(3, executors1.size());
assertEquals(100.0, assignedExecutorCpu.get(0), 0.001);
assertEquals(250.0, assignedExecutorCpu.get(1), 0.001);
assertEquals(250.0, assignedExecutorCpu.get(2), 0.001);
assertEquals(600.0, assignedExecutorMemory.get(0), 0.001);
assertEquals(1200.0, assignedExecutorMemory.get(1), 0.001);
assertEquals(1200.0, assignedExecutorMemory.get(2), 0.001);
for (Map.Entry<Double, Double> entry : memoryAvailableToUsed.entrySet()) {
assertTrue(entry.getKey() - entry.getValue() >= 0);
}
for (Map.Entry<Double, Double> entry : cpuAvailableToUsed.entrySet()) {
assertTrue(entry.getKey() - entry.getValue() >= 0);
}
assertFalse(cluster.needsSchedulingRas(topology1));
assertTrue(cluster.getStatusMap().get(topology1.getId()).startsWith("Running - Fully Scheduled by DefaultResourceAwareStrategy"));
}
use of org.apache.storm.topology.TopologyBuilder in project storm by apache.
the class TestResourceAwareScheduler method testTopologyWithMultipleSpouts.
@Test
public void testTopologyWithMultipleSpouts() {
INimbus iNimbus = new INimbusTest();
Map<String, SupervisorDetails> supMap = genSupervisors(2, 4, 400, 2000);
// a topology with multiple spouts
TopologyBuilder builder1 = new TopologyBuilder();
builder1.setSpout("wordSpout1", new TestWordSpout(), 1);
builder1.setSpout("wordSpout2", new TestWordSpout(), 1);
builder1.setBolt("wordCountBolt1", new TestWordCounter(), 1).shuffleGrouping("wordSpout1").shuffleGrouping("wordSpout2");
builder1.setBolt("wordCountBolt2", new TestWordCounter(), 1).shuffleGrouping("wordCountBolt1");
builder1.setBolt("wordCountBolt3", new TestWordCounter(), 1).shuffleGrouping("wordCountBolt1");
builder1.setBolt("wordCountBolt4", new TestWordCounter(), 1).shuffleGrouping("wordCountBolt2");
builder1.setBolt("wordCountBolt5", new TestWordCounter(), 1).shuffleGrouping("wordSpout2");
StormTopology stormTopology1 = builder1.createTopology();
Config config = new Config();
config.putAll(defaultTopologyConf);
Map<ExecutorDetails, String> executorMap1 = genExecsAndComps(stormTopology1);
TopologyDetails topology1 = new TopologyDetails("topology1", config, stormTopology1, 0, executorMap1, 0, "user");
// a topology with two unconnected partitions
TopologyBuilder builder2 = new TopologyBuilder();
builder2.setSpout("wordSpoutX", new TestWordSpout(), 1);
builder2.setSpout("wordSpoutY", new TestWordSpout(), 1);
StormTopology stormTopology2 = builder2.createTopology();
Map<ExecutorDetails, String> executorMap2 = genExecsAndComps(stormTopology2);
TopologyDetails topology2 = new TopologyDetails("topology2", config, stormTopology2, 0, executorMap2, 0, "user");
scheduler = new ResourceAwareScheduler();
Topologies topologies = new Topologies(topology1, topology2);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
scheduler.prepare(config, new StormMetricsRegistry());
scheduler.schedule(topologies, cluster);
SchedulerAssignment assignment1 = cluster.getAssignmentById(topology1.getId());
Set<WorkerSlot> assignedSlots1 = assignment1.getSlots();
Set<String> nodesIDs1 = new HashSet<>();
for (WorkerSlot slot : assignedSlots1) {
nodesIDs1.add(slot.getNodeId());
}
Collection<ExecutorDetails> executors1 = assignment1.getExecutors();
assertEquals(1, assignedSlots1.size());
assertEquals(1, nodesIDs1.size());
assertEquals(7, executors1.size());
assertFalse(cluster.needsSchedulingRas(topology1));
assertTrue(cluster.getStatusMap().get(topology1.getId()).startsWith("Running - Fully Scheduled by DefaultResourceAwareStrategy"));
SchedulerAssignment assignment2 = cluster.getAssignmentById(topology2.getId());
Set<WorkerSlot> assignedSlots2 = assignment2.getSlots();
Set<String> nodesIDs2 = new HashSet<>();
for (WorkerSlot slot : assignedSlots2) {
nodesIDs2.add(slot.getNodeId());
}
Collection<ExecutorDetails> executors2 = assignment2.getExecutors();
assertEquals(1, assignedSlots2.size());
assertEquals(1, nodesIDs2.size());
assertEquals(2, executors2.size());
assertFalse(cluster.needsSchedulingRas(topology2));
assertTrue(cluster.getStatusMap().get(topology2.getId()).startsWith("Running - Fully Scheduled by DefaultResourceAwareStrategy"));
}
use of org.apache.storm.topology.TopologyBuilder in project storm by apache.
the class TestNodeSorterHostProximity method testAntiAffinityWithMultipleTopologies.
/**
* Schedule two topologies, once with special resources and another without.
* There are enough special resources to hold one topology with special resource ("my.gpu").
* If the sort order is incorrect, scheduling will not succeed.
*/
@Test
public void testAntiAffinityWithMultipleTopologies() {
INimbus iNimbus = new INimbusTest();
Map<String, SupervisorDetails> supMap = genSupervisorsWithRacks(1, 40, 66, 0, 0, 4700, 226200, new HashMap<>());
HashMap<String, Double> extraResources = new HashMap<>();
extraResources.put("my.gpu", 1.0);
supMap.putAll(genSupervisorsWithRacks(1, 40, 66, 1, 0, 4700, 226200, extraResources));
Config config = new Config();
config.putAll(createGrasClusterConfig(88, 775, 25, null, null));
IScheduler scheduler = new ResourceAwareScheduler();
scheduler.prepare(config, new StormMetricsRegistry());
TopologyDetails tdSimple = genTopology("topology-simple", config, 1, 5, 100, 300, 0, 0, "user", 8192);
// Schedule the simple topology first
Topologies topologies = new Topologies(tdSimple);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
{
NodeSorterHostProximity nodeSorter = new NodeSorterHostProximity(cluster, tdSimple);
for (ExecutorDetails exec : tdSimple.getExecutors()) {
nodeSorter.prepare(exec);
List<ObjectResourcesItem> sortedRacks = StreamSupport.stream(nodeSorter.getSortedRacks().spliterator(), false).collect(Collectors.toList());
String rackSummaries = StreamSupport.stream(sortedRacks.spliterator(), false).map(x -> String.format("Rack %s -> scheduled-cnt %d, min-avail %f, avg-avail %f, cpu %f, mem %f", x.id, nodeSorter.getScheduledExecCntByRackId().getOrDefault(x.id, new AtomicInteger(-1)).get(), x.minResourcePercent, x.avgResourcePercent, x.availableResources.getTotalCpu(), x.availableResources.getTotalMemoryMb())).collect(Collectors.joining("\n\t"));
NormalizedResourceRequest topoResourceRequest = tdSimple.getApproximateTotalResources();
String topoRequest = String.format("Topo %s, approx-requested-resources %s", tdSimple.getId(), topoResourceRequest.toString());
Assert.assertEquals(rackSummaries + "\n# of racks sorted", 2, sortedRacks.size());
Assert.assertEquals(rackSummaries + "\nFirst rack sorted", "rack-000", sortedRacks.get(0).id);
Assert.assertEquals(rackSummaries + "\nSecond rack sorted", "rack-001", sortedRacks.get(1).id);
}
}
scheduler.schedule(topologies, cluster);
TopologyBuilder builder = topologyBuilder(1, 5, 100, 300);
builder.setBolt("gpu-bolt", new TestBolt(), 40).addResource("my.gpu", 1.0).shuffleGrouping("spout-0");
TopologyDetails tdGpu = topoToTopologyDetails("topology-gpu", config, builder.createTopology(), 0, 0, "user", 8192);
// Now schedule GPU but with the simple topology in place.
topologies = new Topologies(tdSimple, tdGpu);
cluster = new Cluster(cluster, topologies);
{
NodeSorterHostProximity nodeSorter = new NodeSorterHostProximity(cluster, tdGpu);
for (ExecutorDetails exec : tdGpu.getExecutors()) {
String comp = tdGpu.getComponentFromExecutor(exec);
nodeSorter.prepare(exec);
List<ObjectResourcesItem> sortedRacks = StreamSupport.stream(nodeSorter.getSortedRacks().spliterator(), false).collect(Collectors.toList());
String rackSummaries = sortedRacks.stream().map(x -> String.format("Rack %s -> scheduled-cnt %d, min-avail %f, avg-avail %f, cpu %f, mem %f", x.id, nodeSorter.getScheduledExecCntByRackId().getOrDefault(x.id, new AtomicInteger(-1)).get(), x.minResourcePercent, x.avgResourcePercent, x.availableResources.getTotalCpu(), x.availableResources.getTotalMemoryMb())).collect(Collectors.joining("\n\t"));
NormalizedResourceRequest topoResourceRequest = tdSimple.getApproximateTotalResources();
String topoRequest = String.format("Topo %s, approx-requested-resources %s", tdSimple.getId(), topoResourceRequest.toString());
Assert.assertEquals(rackSummaries + "\n# of racks sorted", 2, sortedRacks.size());
if (comp.equals("gpu-bolt")) {
Assert.assertEquals(rackSummaries + "\nFirst rack sorted for " + comp, "rack-001", sortedRacks.get(0).id);
Assert.assertEquals(rackSummaries + "\nSecond rack sorted for " + comp, "rack-000", sortedRacks.get(1).id);
} else {
Assert.assertEquals(rackSummaries + "\nFirst rack sorted for " + comp, "rack-000", sortedRacks.get(0).id);
Assert.assertEquals(rackSummaries + "\nSecond rack sorted for " + comp, "rack-001", sortedRacks.get(1).id);
}
}
}
scheduler.schedule(topologies, cluster);
Map<String, SchedulerAssignment> assignments = new TreeMap<>(cluster.getAssignments());
assertEquals(2, assignments.size());
Map<String, Map<String, AtomicLong>> topoPerRackCount = new HashMap<>();
for (Map.Entry<String, SchedulerAssignment> entry : assignments.entrySet()) {
SchedulerAssignment sa = entry.getValue();
Map<String, AtomicLong> slotsPerRack = new TreeMap<>();
for (WorkerSlot slot : sa.getSlots()) {
String nodeId = slot.getNodeId();
String rack = supervisorIdToRackName(nodeId);
slotsPerRack.computeIfAbsent(rack, (r) -> new AtomicLong(0)).incrementAndGet();
}
LOG.info("{} => {}", entry.getKey(), slotsPerRack);
topoPerRackCount.put(entry.getKey(), slotsPerRack);
}
Map<String, AtomicLong> simpleCount = topoPerRackCount.get("topology-simple-0");
assertNotNull(simpleCount);
// Because the simple topology was scheduled first we want to be sure that it didn't put anything on
// the GPU nodes.
// Only 1 rack is in use
assertEquals(1, simpleCount.size());
// r001 is the second rack with GPUs
assertFalse(simpleCount.containsKey("r001"));
// r000 is the first rack with no GPUs
assertTrue(simpleCount.containsKey("r000"));
// We don't really care too much about the scheduling of topology-gpu-0, because it was scheduled.
}
use of org.apache.storm.topology.TopologyBuilder in project storm by apache.
the class TestNodeSorterHostProximity method testPreferRackWithTopoExecutors.
/**
* Rack with low resources should be used to schedule an executor if it has other executors for the same topology.
* <li>Schedule topo1 on one rack</li>
* <li>unassign some executors</li>
* <li>schedule another topology to partially fill up rack1</li>
* <li>Add another rack and schedule topology 1 remaining executors again</li>
* <li>scheduling should utilize all resources on rack1 before before trying next rack</li>
*/
@Test
public void testPreferRackWithTopoExecutors() {
INimbus iNimbus = new INimbusTest();
double compPcore = 100;
double compOnHeap = 775;
double compOffHeap = 25;
int topo1NumSpouts = 1;
int topo1NumBolts = 5;
int topo1SpoutParallelism = 100;
int topo1BoltParallelism = 200;
int topo2NumSpouts = 1;
int topo2NumBolts = 5;
int topo2SpoutParallelism = 10;
int topo2BoltParallelism = 20;
final int numRacks = 3;
final int numSupersPerRack = 10;
final int numPortsPerSuper = 6;
final int numZonesPerHost = 1;
final double numaResourceMultiplier = 1.0;
int rackStartNum = 0;
int supStartNum = 0;
long compPerRack = (topo1NumSpouts * topo1SpoutParallelism + topo1NumBolts * topo1BoltParallelism + // enough for topo1 but not topo1+topo2
topo2NumSpouts * topo2SpoutParallelism);
long compPerSuper = compPerRack / numSupersPerRack;
double cpuPerSuper = compPcore * compPerSuper;
double memPerSuper = (compOnHeap + compOffHeap) * compPerSuper;
double topo1MaxHeapSize = memPerSuper;
double topo2MaxHeapSize = memPerSuper;
final String topoName1 = "topology1";
final String topoName2 = "topology2";
Map<String, SupervisorDetails> supMap = genSupervisorsWithRacksAndNuma(numRacks, numSupersPerRack, numZonesPerHost, numPortsPerSuper, rackStartNum, supStartNum, cpuPerSuper, memPerSuper, Collections.emptyMap(), numaResourceMultiplier);
TestDNSToSwitchMapping testDNSToSwitchMapping = new TestDNSToSwitchMapping(supMap.values());
Config config = new Config();
config.putAll(createGrasClusterConfig(compPcore, compOnHeap, compOffHeap, null, null));
config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, GenericResourceAwareStrategy.class.getName());
IScheduler scheduler = new ResourceAwareScheduler();
scheduler.prepare(config, new StormMetricsRegistry());
TopologyDetails td1 = genTopology(topoName1, config, topo1NumSpouts, topo1NumBolts, topo1SpoutParallelism, topo1BoltParallelism, 0, 0, "user", topo1MaxHeapSize);
// Schedule the topo1 topology and ensure it fits on 1 rack
Topologies topologies = new Topologies(td1);
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config);
cluster.setNetworkTopography(testDNSToSwitchMapping.getRackToHosts());
scheduler.schedule(topologies, cluster);
Set<String> assignedRacks = cluster.getAssignedRacks(td1.getId());
assertEquals("Racks for topology=" + td1.getId() + " is " + assignedRacks, 1, assignedRacks.size());
TopologyBuilder builder = topologyBuilder(topo2NumSpouts, topo2NumBolts, topo2SpoutParallelism, topo2BoltParallelism);
TopologyDetails td2 = topoToTopologyDetails(topoName2, config, builder.createTopology(), 0, 0, "user", topo2MaxHeapSize);
// Now schedule GPU but with the simple topology in place.
topologies = new Topologies(td1, td2);
cluster = new Cluster(cluster, topologies);
scheduler.schedule(topologies, cluster);
assignedRacks = cluster.getAssignedRacks(td1.getId(), td2.getId());
assertEquals("Racks for topologies=" + td1.getId() + "/" + td2.getId() + " is " + assignedRacks, 2, assignedRacks.size());
// topo2 gets scheduled on its own rack because it is empty and available
assignedRacks = cluster.getAssignedRacks(td2.getId());
assertEquals("Racks for topologies=" + td2.getId() + " is " + assignedRacks, 1, assignedRacks.size());
// now unassign topo2, expect only one rack to be in use; free some slots and reschedule topo1 some topo1 executors
cluster.unassign(td2.getId());
assignedRacks = cluster.getAssignedRacks(td2.getId());
assertEquals("After unassigning topology " + td2.getId() + ", racks for topology=" + td2.getId() + " is " + assignedRacks, 0, assignedRacks.size());
assignedRacks = cluster.getAssignedRacks(td1.getId());
assertEquals("After unassigning topology " + td2.getId() + ", racks for topology=" + td1.getId() + " is " + assignedRacks, 1, assignedRacks.size());
assertFalse("Topology " + td1.getId() + " should be fully assigned before freeing slots", cluster.needsSchedulingRas(td1));
freeSomeWorkerSlots(cluster);
assertTrue("Topology " + td1.getId() + " should need scheduling after freeing slots", cluster.needsSchedulingRas(td1));
// then reschedule executors
scheduler.schedule(topologies, cluster);
// only one rack should be in use by topology1
assignedRacks = cluster.getAssignedRacks(td1.getId());
assertEquals("After reassigning topology " + td2.getId() + ", racks for topology=" + td1.getId() + " is " + assignedRacks, 1, assignedRacks.size());
}
Aggregations