use of org.apache.storm.topology.TopologyBuilder in project storm by apache.
the class StreamBuilder method build.
/**
* Builds a new {@link StormTopology} for the computation expressed
* via the stream api.
*
* @return the storm topology
*/
public StormTopology build() {
nodeGroupingInfo.clear();
windowInfo.clear();
curGroup.clear();
TopologicalOrderIterator<Node, Edge> iterator = new TopologicalOrderIterator<>(graph, queue());
TopologyBuilder topologyBuilder = new TopologyBuilder();
while (iterator.hasNext()) {
Node node = iterator.next();
if (node instanceof SpoutNode) {
addSpout(topologyBuilder, (SpoutNode) node);
} else if (node instanceof ProcessorNode) {
handleProcessorNode((ProcessorNode) node, topologyBuilder);
} else if (node instanceof PartitionNode) {
updateNodeGroupingInfo((PartitionNode) node);
processCurGroup(topologyBuilder);
} else if (node instanceof WindowNode) {
updateWindowInfo((WindowNode) node);
processCurGroup(topologyBuilder);
} else if (node instanceof SinkNode) {
processCurGroup(topologyBuilder);
addSink(topologyBuilder, (SinkNode) node);
}
}
processCurGroup(topologyBuilder);
mayBeAddTsField();
return topologyBuilder.createTopology();
}
use of org.apache.storm.topology.TopologyBuilder in project storm by apache.
the class TransactionalTopologyBuilder method buildTopologyBuilder.
public TopologyBuilder buildTopologyBuilder() {
String coordinator = _spoutId + "/coordinator";
TopologyBuilder builder = new TopologyBuilder();
SpoutDeclarer declarer = builder.setSpout(coordinator, new TransactionalSpoutCoordinator(_spout));
for (Map<String, Object> conf : _spoutConfs) {
declarer.addConfigurations(conf);
}
declarer.addConfiguration(Config.TOPOLOGY_TRANSACTIONAL_ID, _id);
BoltDeclarer emitterDeclarer = builder.setBolt(_spoutId, new CoordinatedBolt(new TransactionalSpoutBatchExecutor(_spout), null, null), _spoutParallelism).allGrouping(coordinator, TransactionalSpoutCoordinator.TRANSACTION_BATCH_STREAM_ID).addConfiguration(Config.TOPOLOGY_TRANSACTIONAL_ID, _id);
if (_spout instanceof ICommitterTransactionalSpout) {
emitterDeclarer.allGrouping(coordinator, TransactionalSpoutCoordinator.TRANSACTION_COMMIT_STREAM_ID);
}
for (String id : _bolts.keySet()) {
Component component = _bolts.get(id);
Map<String, SourceArgs> coordinatedArgs = new HashMap<String, SourceArgs>();
for (String c : componentBoltSubscriptions(component)) {
coordinatedArgs.put(c, SourceArgs.all());
}
IdStreamSpec idSpec = null;
if (component.committer) {
idSpec = IdStreamSpec.makeDetectSpec(coordinator, TransactionalSpoutCoordinator.TRANSACTION_COMMIT_STREAM_ID);
}
BoltDeclarer input = builder.setBolt(id, new CoordinatedBolt(component.bolt, coordinatedArgs, idSpec), component.parallelism);
for (Map conf : component.componentConfs) {
input.addConfigurations(conf);
}
for (String c : componentBoltSubscriptions(component)) {
input.directGrouping(c, Constants.COORDINATED_STREAM_ID);
}
for (InputDeclaration d : component.declarations) {
d.declare(input);
}
if (component.committer) {
input.allGrouping(coordinator, TransactionalSpoutCoordinator.TRANSACTION_COMMIT_STREAM_ID);
}
}
return builder;
}
use of org.apache.storm.topology.TopologyBuilder in project storm by apache.
the class TestResourceAwareScheduler method testTopologyWorkerMaxHeapSize.
@Test
public void testTopologyWorkerMaxHeapSize() {
// Test1: If RAS spreads executors across multiple workers based on the set limit for a worker used by the topology
INimbus iNimbus = new TestUtilsForResourceAwareScheduler.INimbusTest();
Map<String, Number> resourceMap = new HashMap<>();
resourceMap.put(Config.SUPERVISOR_CPU_CAPACITY, 400.0);
resourceMap.put(Config.SUPERVISOR_MEMORY_CAPACITY_MB, 2000.0);
Map<String, SupervisorDetails> supMap = TestUtilsForResourceAwareScheduler.genSupervisors(2, 2, resourceMap);
TopologyBuilder builder1 = new TopologyBuilder();
builder1.setSpout("wordSpout1", new TestWordSpout(), 4);
StormTopology stormTopology1 = builder1.createTopology();
Config config1 = new Config();
config1.putAll(defaultTopologyConf);
config1.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 128.0);
Map<ExecutorDetails, String> executorMap1 = TestUtilsForResourceAwareScheduler.genExecsAndComps(stormTopology1);
TopologyDetails topology1 = new TopologyDetails("topology1", config1, stormTopology1, 1, executorMap1, 0);
Cluster cluster = new Cluster(iNimbus, supMap, new HashMap<String, SchedulerAssignmentImpl>(), config1);
ResourceAwareScheduler rs = new ResourceAwareScheduler();
Map<String, TopologyDetails> topoMap = new HashMap<>();
topoMap.put(topology1.getId(), topology1);
Topologies topologies = new Topologies(topoMap);
rs.prepare(config1);
rs.schedule(topologies, cluster);
Assert.assertEquals("Running - Fully Scheduled by DefaultResourceAwareStrategy", cluster.getStatusMap().get(topology1.getId()));
Assert.assertEquals(4, cluster.getAssignedNumWorkers(topology1));
// Test2: test when no more workers are available due to topology worker max heap size limit but there is memory is still available
// wordSpout2 is going to contain 5 executors that needs scheduling. Each of those executors has a memory requirement of 128.0 MB
// The cluster contains 4 free WorkerSlots. For this topolology each worker is limited to a max heap size of 128.0
// Thus, one executor not going to be able to get scheduled thus failing the scheduling of this topology and no executors of this topology will be scheduleded
TopologyBuilder builder2 = new TopologyBuilder();
builder2.setSpout("wordSpout2", new TestWordSpout(), 5);
StormTopology stormTopology2 = builder2.createTopology();
Config config2 = new Config();
config2.putAll(defaultTopologyConf);
config2.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 128.0);
Map<ExecutorDetails, String> executorMap2 = TestUtilsForResourceAwareScheduler.genExecsAndComps(stormTopology2);
TopologyDetails topology2 = new TopologyDetails("topology2", config2, stormTopology2, 1, executorMap2, 0);
cluster = new Cluster(iNimbus, supMap, new HashMap<String, SchedulerAssignmentImpl>(), config2);
topoMap = new HashMap<>();
topoMap.put(topology2.getId(), topology2);
topologies = new Topologies(topoMap);
rs.prepare(config2);
rs.schedule(topologies, cluster);
Assert.assertEquals("Not enough resources to schedule - 0/5 executors scheduled", cluster.getStatusMap().get(topology2.getId()));
Assert.assertEquals(5, cluster.getUnassignedExecutors(topology2).size());
}
use of org.apache.storm.topology.TopologyBuilder in project storm by apache.
the class TestResourceAwareScheduler method TestMultipleSpoutsAndCyclicTopologies.
/**
* Test multiple spouts and cyclic topologies
*/
@Test
public void TestMultipleSpoutsAndCyclicTopologies() {
TopologyBuilder builder = new TopologyBuilder();
SpoutDeclarer s1 = builder.setSpout("spout-1", new TestUtilsForResourceAwareScheduler.TestSpout(), 5);
SpoutDeclarer s2 = builder.setSpout("spout-2", new TestUtilsForResourceAwareScheduler.TestSpout(), 5);
BoltDeclarer b1 = builder.setBolt("bolt-1", new TestUtilsForResourceAwareScheduler.TestBolt(), 5).shuffleGrouping("spout-1").shuffleGrouping("bolt-3");
BoltDeclarer b2 = builder.setBolt("bolt-2", new TestUtilsForResourceAwareScheduler.TestBolt(), 5).shuffleGrouping("bolt-1");
BoltDeclarer b3 = builder.setBolt("bolt-3", new TestUtilsForResourceAwareScheduler.TestBolt(), 5).shuffleGrouping("bolt-2").shuffleGrouping("spout-2");
INimbus iNimbus = new TestUtilsForResourceAwareScheduler.INimbusTest();
Map<String, Number> resourceMap = new HashMap<String, Number>();
resourceMap.put(Config.SUPERVISOR_CPU_CAPACITY, 100.0);
resourceMap.put(Config.SUPERVISOR_MEMORY_CAPACITY_MB, 1000.0);
Map<String, SupervisorDetails> supMap = TestUtilsForResourceAwareScheduler.genSupervisors(25, 1, resourceMap);
Config config = new Config();
config.putAll(Utils.readDefaultConfig());
config.put(Config.RESOURCE_AWARE_SCHEDULER_EVICTION_STRATEGY, org.apache.storm.scheduler.resource.strategies.eviction.DefaultEvictionStrategy.class.getName());
config.put(Config.RESOURCE_AWARE_SCHEDULER_PRIORITY_STRATEGY, org.apache.storm.scheduler.resource.strategies.priority.DefaultSchedulingPriorityStrategy.class.getName());
config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy.class.getName());
config.put(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT, 100.0);
config.put(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB, 500);
config.put(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB, 500);
config.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE);
StormTopology stormTopology = builder.createTopology();
TopologyDetails topo = new TopologyDetails("topo-1", config, stormTopology, 0, genExecsAndComps(stormTopology), 0);
Cluster cluster = new Cluster(iNimbus, supMap, new HashMap<String, SchedulerAssignmentImpl>(), config);
config.put(Config.TOPOLOGY_SUBMITTER_USER, "jerry");
Map<String, TopologyDetails> topoMap = new HashMap<String, TopologyDetails>();
topoMap.put(topo.getId(), topo);
Topologies topologies = new Topologies(topoMap);
ResourceAwareScheduler rs = new ResourceAwareScheduler();
rs.prepare(config);
rs.schedule(topologies, cluster);
Assert.assertTrue("Topo scheduled?", cluster.getAssignmentById(topo.getId()) != null);
Assert.assertEquals("Topo all executors scheduled?", 25, cluster.getAssignmentById(topo.getId()).getExecutorToSlot().size());
}
use of org.apache.storm.topology.TopologyBuilder in project storm by apache.
the class TestResourceAwareScheduler method testResourceLimitation.
@Test
public void testResourceLimitation() {
INimbus iNimbus = new TestUtilsForResourceAwareScheduler.INimbusTest();
Map<String, Number> resourceMap = new HashMap<>();
resourceMap.put(Config.SUPERVISOR_CPU_CAPACITY, 400.0);
resourceMap.put(Config.SUPERVISOR_MEMORY_CAPACITY_MB, 2000.0);
Map<String, SupervisorDetails> supMap = TestUtilsForResourceAwareScheduler.genSupervisors(2, 2, resourceMap);
// a topology with multiple spouts
TopologyBuilder builder1 = new TopologyBuilder();
builder1.setSpout("wordSpout", new TestWordSpout(), 2).setCPULoad(250.0).setMemoryLoad(1000.0, 200.0);
builder1.setBolt("wordCountBolt", new TestWordCounter(), 1).shuffleGrouping("wordSpout").setCPULoad(100.0).setMemoryLoad(500.0, 100.0);
StormTopology stormTopology1 = builder1.createTopology();
Config config = new Config();
config.putAll(defaultTopologyConf);
Map<ExecutorDetails, String> executorMap1 = TestUtilsForResourceAwareScheduler.genExecsAndComps(stormTopology1);
TopologyDetails topology1 = new TopologyDetails("topology1", config, stormTopology1, 2, executorMap1, 0);
Cluster cluster = new Cluster(iNimbus, supMap, new HashMap<String, SchedulerAssignmentImpl>(), config);
ResourceAwareScheduler rs = new ResourceAwareScheduler();
Map<String, TopologyDetails> topoMap = new HashMap<>();
topoMap.put(topology1.getId(), topology1);
Topologies topologies = new Topologies(topoMap);
rs.prepare(config);
rs.schedule(topologies, cluster);
SchedulerAssignment assignment1 = cluster.getAssignmentById(topology1.getId());
Set<WorkerSlot> assignedSlots1 = assignment1.getSlots();
Set<String> nodesIDs1 = new HashSet<>();
for (WorkerSlot slot : assignedSlots1) {
nodesIDs1.add(slot.getNodeId());
}
Collection<ExecutorDetails> executors1 = assignment1.getExecutors();
List<Double> assignedExecutorMemory = new ArrayList<>();
List<Double> assignedExecutorCpu = new ArrayList<>();
for (ExecutorDetails executor : executors1) {
assignedExecutorMemory.add(topology1.getTotalMemReqTask(executor));
assignedExecutorCpu.add(topology1.getTotalCpuReqTask(executor));
}
Collections.sort(assignedExecutorCpu);
Collections.sort(assignedExecutorMemory);
Map<ExecutorDetails, SupervisorDetails> executorToSupervisor = new HashMap<>();
Map<SupervisorDetails, List<ExecutorDetails>> supervisorToExecutors = new HashMap<>();
Map<Double, Double> cpuAvailableToUsed = new HashMap();
Map<Double, Double> memoryAvailableToUsed = new HashMap();
for (Map.Entry<ExecutorDetails, WorkerSlot> entry : assignment1.getExecutorToSlot().entrySet()) {
executorToSupervisor.put(entry.getKey(), cluster.getSupervisorById(entry.getValue().getNodeId()));
}
for (Map.Entry<ExecutorDetails, SupervisorDetails> entry : executorToSupervisor.entrySet()) {
List<ExecutorDetails> executorsOnSupervisor = supervisorToExecutors.get(entry.getValue());
if (executorsOnSupervisor == null) {
executorsOnSupervisor = new ArrayList<>();
supervisorToExecutors.put(entry.getValue(), executorsOnSupervisor);
}
executorsOnSupervisor.add(entry.getKey());
}
for (Map.Entry<SupervisorDetails, List<ExecutorDetails>> entry : supervisorToExecutors.entrySet()) {
Double supervisorTotalCpu = entry.getKey().getTotalCPU();
Double supervisorTotalMemory = entry.getKey().getTotalMemory();
Double supervisorUsedCpu = 0.0;
Double supervisorUsedMemory = 0.0;
for (ExecutorDetails executor : entry.getValue()) {
supervisorUsedMemory += topology1.getTotalCpuReqTask(executor);
supervisorTotalCpu += topology1.getTotalMemReqTask(executor);
}
cpuAvailableToUsed.put(supervisorTotalCpu, supervisorUsedCpu);
memoryAvailableToUsed.put(supervisorTotalMemory, supervisorUsedMemory);
}
// executor0 resides one one worker (on one), executor1 and executor2 on another worker (on the other node)
Assert.assertEquals(2, assignedSlots1.size());
Assert.assertEquals(2, nodesIDs1.size());
Assert.assertEquals(3, executors1.size());
Assert.assertEquals(100.0, assignedExecutorCpu.get(0), 0.001);
Assert.assertEquals(250.0, assignedExecutorCpu.get(1), 0.001);
Assert.assertEquals(250.0, assignedExecutorCpu.get(2), 0.001);
Assert.assertEquals(600.0, assignedExecutorMemory.get(0), 0.001);
Assert.assertEquals(1200.0, assignedExecutorMemory.get(1), 0.001);
Assert.assertEquals(1200.0, assignedExecutorMemory.get(2), 0.001);
for (Map.Entry<Double, Double> entry : memoryAvailableToUsed.entrySet()) {
Assert.assertTrue(entry.getKey() - entry.getValue() >= 0);
}
for (Map.Entry<Double, Double> entry : cpuAvailableToUsed.entrySet()) {
Assert.assertTrue(entry.getKey() - entry.getValue() >= 0);
}
Assert.assertEquals("Running - Fully Scheduled by DefaultResourceAwareStrategy", cluster.getStatusMap().get(topology1.getId()));
}
Aggregations