Search in sources :

Example 26 with TopologyBuilder

use of org.apache.storm.topology.TopologyBuilder in project storm by apache.

the class StreamBuilder method build.

/**
     * Builds a new {@link StormTopology} for the computation expressed
     * via the stream api.
     *
     * @return the storm topology
     */
public StormTopology build() {
    nodeGroupingInfo.clear();
    windowInfo.clear();
    curGroup.clear();
    TopologicalOrderIterator<Node, Edge> iterator = new TopologicalOrderIterator<>(graph, queue());
    TopologyBuilder topologyBuilder = new TopologyBuilder();
    while (iterator.hasNext()) {
        Node node = iterator.next();
        if (node instanceof SpoutNode) {
            addSpout(topologyBuilder, (SpoutNode) node);
        } else if (node instanceof ProcessorNode) {
            handleProcessorNode((ProcessorNode) node, topologyBuilder);
        } else if (node instanceof PartitionNode) {
            updateNodeGroupingInfo((PartitionNode) node);
            processCurGroup(topologyBuilder);
        } else if (node instanceof WindowNode) {
            updateWindowInfo((WindowNode) node);
            processCurGroup(topologyBuilder);
        } else if (node instanceof SinkNode) {
            processCurGroup(topologyBuilder);
            addSink(topologyBuilder, (SinkNode) node);
        }
    }
    processCurGroup(topologyBuilder);
    mayBeAddTsField();
    return topologyBuilder.createTopology();
}
Also used : TopologyBuilder(org.apache.storm.topology.TopologyBuilder) TopologicalOrderIterator(org.jgrapht.traverse.TopologicalOrderIterator)

Example 27 with TopologyBuilder

use of org.apache.storm.topology.TopologyBuilder in project storm by apache.

the class TransactionalTopologyBuilder method buildTopologyBuilder.

public TopologyBuilder buildTopologyBuilder() {
    String coordinator = _spoutId + "/coordinator";
    TopologyBuilder builder = new TopologyBuilder();
    SpoutDeclarer declarer = builder.setSpout(coordinator, new TransactionalSpoutCoordinator(_spout));
    for (Map<String, Object> conf : _spoutConfs) {
        declarer.addConfigurations(conf);
    }
    declarer.addConfiguration(Config.TOPOLOGY_TRANSACTIONAL_ID, _id);
    BoltDeclarer emitterDeclarer = builder.setBolt(_spoutId, new CoordinatedBolt(new TransactionalSpoutBatchExecutor(_spout), null, null), _spoutParallelism).allGrouping(coordinator, TransactionalSpoutCoordinator.TRANSACTION_BATCH_STREAM_ID).addConfiguration(Config.TOPOLOGY_TRANSACTIONAL_ID, _id);
    if (_spout instanceof ICommitterTransactionalSpout) {
        emitterDeclarer.allGrouping(coordinator, TransactionalSpoutCoordinator.TRANSACTION_COMMIT_STREAM_ID);
    }
    for (String id : _bolts.keySet()) {
        Component component = _bolts.get(id);
        Map<String, SourceArgs> coordinatedArgs = new HashMap<String, SourceArgs>();
        for (String c : componentBoltSubscriptions(component)) {
            coordinatedArgs.put(c, SourceArgs.all());
        }
        IdStreamSpec idSpec = null;
        if (component.committer) {
            idSpec = IdStreamSpec.makeDetectSpec(coordinator, TransactionalSpoutCoordinator.TRANSACTION_COMMIT_STREAM_ID);
        }
        BoltDeclarer input = builder.setBolt(id, new CoordinatedBolt(component.bolt, coordinatedArgs, idSpec), component.parallelism);
        for (Map conf : component.componentConfs) {
            input.addConfigurations(conf);
        }
        for (String c : componentBoltSubscriptions(component)) {
            input.directGrouping(c, Constants.COORDINATED_STREAM_ID);
        }
        for (InputDeclaration d : component.declarations) {
            d.declare(input);
        }
        if (component.committer) {
            input.allGrouping(coordinator, TransactionalSpoutCoordinator.TRANSACTION_COMMIT_STREAM_ID);
        }
    }
    return builder;
}
Also used : TopologyBuilder(org.apache.storm.topology.TopologyBuilder) HashMap(java.util.HashMap) IdStreamSpec(org.apache.storm.coordination.CoordinatedBolt.IdStreamSpec) SourceArgs(org.apache.storm.coordination.CoordinatedBolt.SourceArgs) BoltDeclarer(org.apache.storm.topology.BoltDeclarer) SpoutDeclarer(org.apache.storm.topology.SpoutDeclarer) HashMap(java.util.HashMap) Map(java.util.Map) CoordinatedBolt(org.apache.storm.coordination.CoordinatedBolt)

Example 28 with TopologyBuilder

use of org.apache.storm.topology.TopologyBuilder in project storm by apache.

the class TestResourceAwareScheduler method testTopologyWorkerMaxHeapSize.

@Test
public void testTopologyWorkerMaxHeapSize() {
    // Test1: If RAS spreads executors across multiple workers based on the set limit for a worker used by the topology
    INimbus iNimbus = new TestUtilsForResourceAwareScheduler.INimbusTest();
    Map<String, Number> resourceMap = new HashMap<>();
    resourceMap.put(Config.SUPERVISOR_CPU_CAPACITY, 400.0);
    resourceMap.put(Config.SUPERVISOR_MEMORY_CAPACITY_MB, 2000.0);
    Map<String, SupervisorDetails> supMap = TestUtilsForResourceAwareScheduler.genSupervisors(2, 2, resourceMap);
    TopologyBuilder builder1 = new TopologyBuilder();
    builder1.setSpout("wordSpout1", new TestWordSpout(), 4);
    StormTopology stormTopology1 = builder1.createTopology();
    Config config1 = new Config();
    config1.putAll(defaultTopologyConf);
    config1.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 128.0);
    Map<ExecutorDetails, String> executorMap1 = TestUtilsForResourceAwareScheduler.genExecsAndComps(stormTopology1);
    TopologyDetails topology1 = new TopologyDetails("topology1", config1, stormTopology1, 1, executorMap1, 0);
    Cluster cluster = new Cluster(iNimbus, supMap, new HashMap<String, SchedulerAssignmentImpl>(), config1);
    ResourceAwareScheduler rs = new ResourceAwareScheduler();
    Map<String, TopologyDetails> topoMap = new HashMap<>();
    topoMap.put(topology1.getId(), topology1);
    Topologies topologies = new Topologies(topoMap);
    rs.prepare(config1);
    rs.schedule(topologies, cluster);
    Assert.assertEquals("Running - Fully Scheduled by DefaultResourceAwareStrategy", cluster.getStatusMap().get(topology1.getId()));
    Assert.assertEquals(4, cluster.getAssignedNumWorkers(topology1));
    // Test2: test when no more workers are available due to topology worker max heap size limit but there is memory is still available
    // wordSpout2 is going to contain 5 executors that needs scheduling. Each of those executors has a memory requirement of 128.0 MB
    // The cluster contains 4 free WorkerSlots. For this topolology each worker is limited to a max heap size of 128.0
    // Thus, one executor not going to be able to get scheduled thus failing the scheduling of this topology and no executors of this topology will be scheduleded
    TopologyBuilder builder2 = new TopologyBuilder();
    builder2.setSpout("wordSpout2", new TestWordSpout(), 5);
    StormTopology stormTopology2 = builder2.createTopology();
    Config config2 = new Config();
    config2.putAll(defaultTopologyConf);
    config2.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, 128.0);
    Map<ExecutorDetails, String> executorMap2 = TestUtilsForResourceAwareScheduler.genExecsAndComps(stormTopology2);
    TopologyDetails topology2 = new TopologyDetails("topology2", config2, stormTopology2, 1, executorMap2, 0);
    cluster = new Cluster(iNimbus, supMap, new HashMap<String, SchedulerAssignmentImpl>(), config2);
    topoMap = new HashMap<>();
    topoMap.put(topology2.getId(), topology2);
    topologies = new Topologies(topoMap);
    rs.prepare(config2);
    rs.schedule(topologies, cluster);
    Assert.assertEquals("Not enough resources to schedule - 0/5 executors scheduled", cluster.getStatusMap().get(topology2.getId()));
    Assert.assertEquals(5, cluster.getUnassignedExecutors(topology2).size());
}
Also used : ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) HashMap(java.util.HashMap) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) Config(org.apache.storm.Config) StormTopology(org.apache.storm.generated.StormTopology) Cluster(org.apache.storm.scheduler.Cluster) INimbus(org.apache.storm.scheduler.INimbus) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) SchedulerAssignmentImpl(org.apache.storm.scheduler.SchedulerAssignmentImpl) TestWordSpout(org.apache.storm.testing.TestWordSpout) Topologies(org.apache.storm.scheduler.Topologies) SupervisorDetails(org.apache.storm.scheduler.SupervisorDetails) Test(org.junit.Test)

Example 29 with TopologyBuilder

use of org.apache.storm.topology.TopologyBuilder in project storm by apache.

the class TestResourceAwareScheduler method TestMultipleSpoutsAndCyclicTopologies.

/**
     * Test multiple spouts and cyclic topologies
     */
@Test
public void TestMultipleSpoutsAndCyclicTopologies() {
    TopologyBuilder builder = new TopologyBuilder();
    SpoutDeclarer s1 = builder.setSpout("spout-1", new TestUtilsForResourceAwareScheduler.TestSpout(), 5);
    SpoutDeclarer s2 = builder.setSpout("spout-2", new TestUtilsForResourceAwareScheduler.TestSpout(), 5);
    BoltDeclarer b1 = builder.setBolt("bolt-1", new TestUtilsForResourceAwareScheduler.TestBolt(), 5).shuffleGrouping("spout-1").shuffleGrouping("bolt-3");
    BoltDeclarer b2 = builder.setBolt("bolt-2", new TestUtilsForResourceAwareScheduler.TestBolt(), 5).shuffleGrouping("bolt-1");
    BoltDeclarer b3 = builder.setBolt("bolt-3", new TestUtilsForResourceAwareScheduler.TestBolt(), 5).shuffleGrouping("bolt-2").shuffleGrouping("spout-2");
    INimbus iNimbus = new TestUtilsForResourceAwareScheduler.INimbusTest();
    Map<String, Number> resourceMap = new HashMap<String, Number>();
    resourceMap.put(Config.SUPERVISOR_CPU_CAPACITY, 100.0);
    resourceMap.put(Config.SUPERVISOR_MEMORY_CAPACITY_MB, 1000.0);
    Map<String, SupervisorDetails> supMap = TestUtilsForResourceAwareScheduler.genSupervisors(25, 1, resourceMap);
    Config config = new Config();
    config.putAll(Utils.readDefaultConfig());
    config.put(Config.RESOURCE_AWARE_SCHEDULER_EVICTION_STRATEGY, org.apache.storm.scheduler.resource.strategies.eviction.DefaultEvictionStrategy.class.getName());
    config.put(Config.RESOURCE_AWARE_SCHEDULER_PRIORITY_STRATEGY, org.apache.storm.scheduler.resource.strategies.priority.DefaultSchedulingPriorityStrategy.class.getName());
    config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, org.apache.storm.scheduler.resource.strategies.scheduling.DefaultResourceAwareStrategy.class.getName());
    config.put(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT, 100.0);
    config.put(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB, 500);
    config.put(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB, 500);
    config.put(Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB, Double.MAX_VALUE);
    StormTopology stormTopology = builder.createTopology();
    TopologyDetails topo = new TopologyDetails("topo-1", config, stormTopology, 0, genExecsAndComps(stormTopology), 0);
    Cluster cluster = new Cluster(iNimbus, supMap, new HashMap<String, SchedulerAssignmentImpl>(), config);
    config.put(Config.TOPOLOGY_SUBMITTER_USER, "jerry");
    Map<String, TopologyDetails> topoMap = new HashMap<String, TopologyDetails>();
    topoMap.put(topo.getId(), topo);
    Topologies topologies = new Topologies(topoMap);
    ResourceAwareScheduler rs = new ResourceAwareScheduler();
    rs.prepare(config);
    rs.schedule(topologies, cluster);
    Assert.assertTrue("Topo scheduled?", cluster.getAssignmentById(topo.getId()) != null);
    Assert.assertEquals("Topo all executors scheduled?", 25, cluster.getAssignmentById(topo.getId()).getExecutorToSlot().size());
}
Also used : TopologyBuilder(org.apache.storm.topology.TopologyBuilder) HashMap(java.util.HashMap) Config(org.apache.storm.Config) StormTopology(org.apache.storm.generated.StormTopology) SchedulerAssignmentImpl(org.apache.storm.scheduler.SchedulerAssignmentImpl) Topologies(org.apache.storm.scheduler.Topologies) SupervisorDetails(org.apache.storm.scheduler.SupervisorDetails) Cluster(org.apache.storm.scheduler.Cluster) INimbus(org.apache.storm.scheduler.INimbus) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) BoltDeclarer(org.apache.storm.topology.BoltDeclarer) SpoutDeclarer(org.apache.storm.topology.SpoutDeclarer) Test(org.junit.Test)

Example 30 with TopologyBuilder

use of org.apache.storm.topology.TopologyBuilder in project storm by apache.

the class TestResourceAwareScheduler method testResourceLimitation.

@Test
public void testResourceLimitation() {
    INimbus iNimbus = new TestUtilsForResourceAwareScheduler.INimbusTest();
    Map<String, Number> resourceMap = new HashMap<>();
    resourceMap.put(Config.SUPERVISOR_CPU_CAPACITY, 400.0);
    resourceMap.put(Config.SUPERVISOR_MEMORY_CAPACITY_MB, 2000.0);
    Map<String, SupervisorDetails> supMap = TestUtilsForResourceAwareScheduler.genSupervisors(2, 2, resourceMap);
    // a topology with multiple spouts
    TopologyBuilder builder1 = new TopologyBuilder();
    builder1.setSpout("wordSpout", new TestWordSpout(), 2).setCPULoad(250.0).setMemoryLoad(1000.0, 200.0);
    builder1.setBolt("wordCountBolt", new TestWordCounter(), 1).shuffleGrouping("wordSpout").setCPULoad(100.0).setMemoryLoad(500.0, 100.0);
    StormTopology stormTopology1 = builder1.createTopology();
    Config config = new Config();
    config.putAll(defaultTopologyConf);
    Map<ExecutorDetails, String> executorMap1 = TestUtilsForResourceAwareScheduler.genExecsAndComps(stormTopology1);
    TopologyDetails topology1 = new TopologyDetails("topology1", config, stormTopology1, 2, executorMap1, 0);
    Cluster cluster = new Cluster(iNimbus, supMap, new HashMap<String, SchedulerAssignmentImpl>(), config);
    ResourceAwareScheduler rs = new ResourceAwareScheduler();
    Map<String, TopologyDetails> topoMap = new HashMap<>();
    topoMap.put(topology1.getId(), topology1);
    Topologies topologies = new Topologies(topoMap);
    rs.prepare(config);
    rs.schedule(topologies, cluster);
    SchedulerAssignment assignment1 = cluster.getAssignmentById(topology1.getId());
    Set<WorkerSlot> assignedSlots1 = assignment1.getSlots();
    Set<String> nodesIDs1 = new HashSet<>();
    for (WorkerSlot slot : assignedSlots1) {
        nodesIDs1.add(slot.getNodeId());
    }
    Collection<ExecutorDetails> executors1 = assignment1.getExecutors();
    List<Double> assignedExecutorMemory = new ArrayList<>();
    List<Double> assignedExecutorCpu = new ArrayList<>();
    for (ExecutorDetails executor : executors1) {
        assignedExecutorMemory.add(topology1.getTotalMemReqTask(executor));
        assignedExecutorCpu.add(topology1.getTotalCpuReqTask(executor));
    }
    Collections.sort(assignedExecutorCpu);
    Collections.sort(assignedExecutorMemory);
    Map<ExecutorDetails, SupervisorDetails> executorToSupervisor = new HashMap<>();
    Map<SupervisorDetails, List<ExecutorDetails>> supervisorToExecutors = new HashMap<>();
    Map<Double, Double> cpuAvailableToUsed = new HashMap();
    Map<Double, Double> memoryAvailableToUsed = new HashMap();
    for (Map.Entry<ExecutorDetails, WorkerSlot> entry : assignment1.getExecutorToSlot().entrySet()) {
        executorToSupervisor.put(entry.getKey(), cluster.getSupervisorById(entry.getValue().getNodeId()));
    }
    for (Map.Entry<ExecutorDetails, SupervisorDetails> entry : executorToSupervisor.entrySet()) {
        List<ExecutorDetails> executorsOnSupervisor = supervisorToExecutors.get(entry.getValue());
        if (executorsOnSupervisor == null) {
            executorsOnSupervisor = new ArrayList<>();
            supervisorToExecutors.put(entry.getValue(), executorsOnSupervisor);
        }
        executorsOnSupervisor.add(entry.getKey());
    }
    for (Map.Entry<SupervisorDetails, List<ExecutorDetails>> entry : supervisorToExecutors.entrySet()) {
        Double supervisorTotalCpu = entry.getKey().getTotalCPU();
        Double supervisorTotalMemory = entry.getKey().getTotalMemory();
        Double supervisorUsedCpu = 0.0;
        Double supervisorUsedMemory = 0.0;
        for (ExecutorDetails executor : entry.getValue()) {
            supervisorUsedMemory += topology1.getTotalCpuReqTask(executor);
            supervisorTotalCpu += topology1.getTotalMemReqTask(executor);
        }
        cpuAvailableToUsed.put(supervisorTotalCpu, supervisorUsedCpu);
        memoryAvailableToUsed.put(supervisorTotalMemory, supervisorUsedMemory);
    }
    // executor0 resides one one worker (on one), executor1 and executor2 on another worker (on the other node)
    Assert.assertEquals(2, assignedSlots1.size());
    Assert.assertEquals(2, nodesIDs1.size());
    Assert.assertEquals(3, executors1.size());
    Assert.assertEquals(100.0, assignedExecutorCpu.get(0), 0.001);
    Assert.assertEquals(250.0, assignedExecutorCpu.get(1), 0.001);
    Assert.assertEquals(250.0, assignedExecutorCpu.get(2), 0.001);
    Assert.assertEquals(600.0, assignedExecutorMemory.get(0), 0.001);
    Assert.assertEquals(1200.0, assignedExecutorMemory.get(1), 0.001);
    Assert.assertEquals(1200.0, assignedExecutorMemory.get(2), 0.001);
    for (Map.Entry<Double, Double> entry : memoryAvailableToUsed.entrySet()) {
        Assert.assertTrue(entry.getKey() - entry.getValue() >= 0);
    }
    for (Map.Entry<Double, Double> entry : cpuAvailableToUsed.entrySet()) {
        Assert.assertTrue(entry.getKey() - entry.getValue() >= 0);
    }
    Assert.assertEquals("Running - Fully Scheduled by DefaultResourceAwareStrategy", cluster.getStatusMap().get(topology1.getId()));
}
Also used : ExecutorDetails(org.apache.storm.scheduler.ExecutorDetails) HashMap(java.util.HashMap) TopologyBuilder(org.apache.storm.topology.TopologyBuilder) Config(org.apache.storm.Config) StormTopology(org.apache.storm.generated.StormTopology) ArrayList(java.util.ArrayList) SchedulerAssignmentImpl(org.apache.storm.scheduler.SchedulerAssignmentImpl) WorkerSlot(org.apache.storm.scheduler.WorkerSlot) Topologies(org.apache.storm.scheduler.Topologies) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) SupervisorDetails(org.apache.storm.scheduler.SupervisorDetails) HashSet(java.util.HashSet) TestWordCounter(org.apache.storm.testing.TestWordCounter) Cluster(org.apache.storm.scheduler.Cluster) INimbus(org.apache.storm.scheduler.INimbus) TopologyDetails(org.apache.storm.scheduler.TopologyDetails) SchedulerAssignment(org.apache.storm.scheduler.SchedulerAssignment) TestWordSpout(org.apache.storm.testing.TestWordSpout) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Aggregations

TopologyBuilder (org.apache.storm.topology.TopologyBuilder)118 Config (org.apache.storm.Config)62 Fields (org.apache.storm.tuple.Fields)36 LocalCluster (org.apache.storm.LocalCluster)28 HashMap (java.util.HashMap)25 LocalTopology (org.apache.storm.LocalCluster.LocalTopology)24 Test (org.junit.Test)17 TestWordSpout (org.apache.storm.testing.TestWordSpout)16 Map (java.util.Map)12 StormTopology (org.apache.storm.generated.StormTopology)12 BoltDeclarer (org.apache.storm.topology.BoltDeclarer)9 Values (org.apache.storm.tuple.Values)9 FlinkLocalCluster (org.apache.flink.storm.api.FlinkLocalCluster)8 Cluster (org.apache.storm.scheduler.Cluster)8 INimbus (org.apache.storm.scheduler.INimbus)8 SchedulerAssignmentImpl (org.apache.storm.scheduler.SchedulerAssignmentImpl)8 SupervisorDetails (org.apache.storm.scheduler.SupervisorDetails)8 Topologies (org.apache.storm.scheduler.Topologies)8 TopologyDetails (org.apache.storm.scheduler.TopologyDetails)8 BoltFileSink (org.apache.flink.storm.util.BoltFileSink)7