Search in sources :

Example 1 with BoltDeclarer

use of backtype.storm.topology.BoltDeclarer in project storm by nathanmarz.

the class TridentTopologyBuilder method buildTopology.

public StormTopology buildTopology() {
    TopologyBuilder builder = new TopologyBuilder();
    Map<GlobalStreamId, String> batchIdsForSpouts = fleshOutStreamBatchIds(false);
    Map<GlobalStreamId, String> batchIdsForBolts = fleshOutStreamBatchIds(true);
    Map<String, List<String>> batchesToCommitIds = new HashMap<String, List<String>>();
    Map<String, List<ITridentSpout>> batchesToSpouts = new HashMap<String, List<ITridentSpout>>();
    for (String id : _spouts.keySet()) {
        TransactionalSpoutComponent c = _spouts.get(id);
        if (c.spout instanceof IRichSpout) {
            //TODO: wrap this to set the stream name
            builder.setSpout(id, (IRichSpout) c.spout, c.parallelism);
        } else {
            String batchGroup = c.batchGroupId;
            if (!batchesToCommitIds.containsKey(batchGroup)) {
                batchesToCommitIds.put(batchGroup, new ArrayList<String>());
            }
            batchesToCommitIds.get(batchGroup).add(c.commitStateId);
            if (!batchesToSpouts.containsKey(batchGroup)) {
                batchesToSpouts.put(batchGroup, new ArrayList<ITridentSpout>());
            }
            batchesToSpouts.get(batchGroup).add((ITridentSpout) c.spout);
            BoltDeclarer scd = builder.setBolt(spoutCoordinator(id), new TridentSpoutCoordinator(c.commitStateId, (ITridentSpout) c.spout)).globalGrouping(masterCoordinator(c.batchGroupId), MasterBatchCoordinator.BATCH_STREAM_ID).globalGrouping(masterCoordinator(c.batchGroupId), MasterBatchCoordinator.SUCCESS_STREAM_ID);
            for (Map m : c.componentConfs) {
                scd.addConfigurations(m);
            }
            Map<String, TridentBoltExecutor.CoordSpec> specs = new HashMap();
            specs.put(c.batchGroupId, new CoordSpec());
            BoltDeclarer bd = builder.setBolt(id, new TridentBoltExecutor(new TridentSpoutExecutor(c.commitStateId, c.streamName, ((ITridentSpout) c.spout)), batchIdsForSpouts, specs), c.parallelism);
            bd.allGrouping(spoutCoordinator(id), MasterBatchCoordinator.BATCH_STREAM_ID);
            bd.allGrouping(masterCoordinator(batchGroup), MasterBatchCoordinator.SUCCESS_STREAM_ID);
            if (c.spout instanceof ICommitterTridentSpout) {
                bd.allGrouping(masterCoordinator(batchGroup), MasterBatchCoordinator.COMMIT_STREAM_ID);
            }
            for (Map m : c.componentConfs) {
                bd.addConfigurations(m);
            }
        }
    }
    for (String id : _batchPerTupleSpouts.keySet()) {
        SpoutComponent c = _batchPerTupleSpouts.get(id);
        SpoutDeclarer d = builder.setSpout(id, new RichSpoutBatchTriggerer((IRichSpout) c.spout, c.streamName, c.batchGroupId), c.parallelism);
        for (Map conf : c.componentConfs) {
            d.addConfigurations(conf);
        }
    }
    for (String batch : batchesToCommitIds.keySet()) {
        List<String> commitIds = batchesToCommitIds.get(batch);
        builder.setSpout(masterCoordinator(batch), new MasterBatchCoordinator(commitIds, batchesToSpouts.get(batch)));
    }
    for (String id : _bolts.keySet()) {
        Component c = _bolts.get(id);
        Map<String, CoordSpec> specs = new HashMap();
        for (GlobalStreamId s : getBoltSubscriptionStreams(id)) {
            String batch = batchIdsForBolts.get(s);
            if (!specs.containsKey(batch))
                specs.put(batch, new CoordSpec());
            CoordSpec spec = specs.get(batch);
            CoordType ct;
            if (_batchPerTupleSpouts.containsKey(s.get_componentId())) {
                ct = CoordType.single();
            } else {
                ct = CoordType.all();
            }
            spec.coords.put(s.get_componentId(), ct);
        }
        for (String b : c.committerBatches) {
            specs.get(b).commitStream = new GlobalStreamId(masterCoordinator(b), MasterBatchCoordinator.COMMIT_STREAM_ID);
        }
        BoltDeclarer d = builder.setBolt(id, new TridentBoltExecutor(c.bolt, batchIdsForBolts, specs), c.parallelism);
        for (Map conf : c.componentConfs) {
            d.addConfigurations(conf);
        }
        for (InputDeclaration inputDecl : c.declarations) {
            inputDecl.declare(d);
        }
        Map<String, Set<String>> batchToComponents = getBoltBatchToComponentSubscriptions(id);
        for (String b : batchToComponents.keySet()) {
            for (String comp : batchToComponents.get(b)) {
                d.directGrouping(comp, TridentBoltExecutor.COORD_STREAM(b));
            }
        }
        for (String b : c.committerBatches) {
            d.allGrouping(masterCoordinator(b), MasterBatchCoordinator.COMMIT_STREAM_ID);
        }
    }
    return builder.createTopology();
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) TopologyBuilder(backtype.storm.topology.TopologyBuilder) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) TridentSpoutExecutor(storm.trident.spout.TridentSpoutExecutor) CoordSpec(storm.trident.topology.TridentBoltExecutor.CoordSpec) ICommitterTridentSpout(storm.trident.spout.ICommitterTridentSpout) TridentSpoutCoordinator(storm.trident.spout.TridentSpoutCoordinator) RichSpoutBatchTriggerer(storm.trident.spout.RichSpoutBatchTriggerer) IRichSpout(backtype.storm.topology.IRichSpout) BoltDeclarer(backtype.storm.topology.BoltDeclarer) GlobalStreamId(backtype.storm.generated.GlobalStreamId) HashMap(java.util.HashMap) Map(java.util.Map) SpoutDeclarer(backtype.storm.topology.SpoutDeclarer) ITridentSpout(storm.trident.spout.ITridentSpout) CoordType(storm.trident.topology.TridentBoltExecutor.CoordType)

Example 2 with BoltDeclarer

use of backtype.storm.topology.BoltDeclarer in project storm by nathanmarz.

the class TridentTopology method build.

public StormTopology build() {
    DefaultDirectedGraph<Node, IndexedEdge> graph = (DefaultDirectedGraph) _graph.clone();
    completeDRPC(graph, _colocate, _gen);
    List<SpoutNode> spoutNodes = new ArrayList<SpoutNode>();
    // can be regular nodes (static state) or processor nodes
    Set<Node> boltNodes = new HashSet<Node>();
    for (Node n : graph.vertexSet()) {
        if (n instanceof SpoutNode) {
            spoutNodes.add((SpoutNode) n);
        } else if (!(n instanceof PartitionNode)) {
            boltNodes.add(n);
        }
    }
    Set<Group> initialGroups = new HashSet<Group>();
    for (List<Node> colocate : _colocate.values()) {
        Group g = new Group(graph, colocate);
        boltNodes.removeAll(colocate);
        initialGroups.add(g);
    }
    for (Node n : boltNodes) {
        initialGroups.add(new Group(graph, n));
    }
    GraphGrouper grouper = new GraphGrouper(graph, initialGroups);
    grouper.mergeFully();
    Collection<Group> mergedGroups = grouper.getAllGroups();
    // add identity partitions between groups
    for (IndexedEdge<Node> e : new HashSet<IndexedEdge>(graph.edgeSet())) {
        if (!(e.source instanceof PartitionNode) && !(e.target instanceof PartitionNode)) {
            Group g1 = grouper.nodeGroup(e.source);
            Group g2 = grouper.nodeGroup(e.target);
            // g1 being null means the source is a spout node
            if (g1 == null && !(e.source instanceof SpoutNode))
                throw new RuntimeException("Planner exception: Null source group must indicate a spout node at this phase of planning");
            if (g1 == null || !g1.equals(g2)) {
                graph.removeEdge(e);
                PartitionNode pNode = makeIdentityPartition(e.source);
                graph.addVertex(pNode);
                graph.addEdge(e.source, pNode, new IndexedEdge(e.source, pNode, 0));
                graph.addEdge(pNode, e.target, new IndexedEdge(pNode, e.target, e.index));
            }
        }
    }
    // if one group subscribes to the same stream with same partitioning multiple times,
    // merge those together (otherwise can end up with many output streams created for that partitioning
    // if need to split into multiple output streams because of same input having different
    // partitioning to the group)
    // this is because can't currently merge splitting logic into a spout
    // not the most kosher algorithm here, since the grouper indexes are being trounced via the adding of nodes to random groups, but it 
    // works out
    List<Node> forNewGroups = new ArrayList<Node>();
    for (Group g : mergedGroups) {
        for (PartitionNode n : extraPartitionInputs(g)) {
            Node idNode = makeIdentityNode(n.allOutputFields);
            Node newPartitionNode = new PartitionNode(idNode.streamId, n.name, idNode.allOutputFields, n.thriftGrouping);
            Node parentNode = TridentUtils.getParent(graph, n);
            Set<IndexedEdge> outgoing = graph.outgoingEdgesOf(n);
            graph.removeVertex(n);
            graph.addVertex(idNode);
            graph.addVertex(newPartitionNode);
            addEdge(graph, parentNode, idNode, 0);
            addEdge(graph, idNode, newPartitionNode, 0);
            for (IndexedEdge e : outgoing) {
                addEdge(graph, newPartitionNode, e.target, e.index);
            }
            Group parentGroup = grouper.nodeGroup(parentNode);
            if (parentGroup == null) {
                forNewGroups.add(idNode);
            } else {
                parentGroup.nodes.add(idNode);
            }
        }
    }
    for (Node n : forNewGroups) {
        grouper.addGroup(new Group(graph, n));
    }
    // add in spouts as groups so we can get parallelisms
    for (Node n : spoutNodes) {
        grouper.addGroup(new Group(graph, n));
    }
    grouper.reindex();
    mergedGroups = grouper.getAllGroups();
    Map<Node, String> batchGroupMap = new HashMap();
    List<Set<Node>> connectedComponents = new ConnectivityInspector<Node, IndexedEdge>(graph).connectedSets();
    for (int i = 0; i < connectedComponents.size(); i++) {
        String groupId = "bg" + i;
        for (Node n : connectedComponents.get(i)) {
            batchGroupMap.put(n, groupId);
        }
    }
    //        System.out.println("GRAPH:");
    //        System.out.println(graph);
    Map<Group, Integer> parallelisms = getGroupParallelisms(graph, grouper, mergedGroups);
    TridentTopologyBuilder builder = new TridentTopologyBuilder();
    Map<Node, String> spoutIds = genSpoutIds(spoutNodes);
    Map<Group, String> boltIds = genBoltIds(mergedGroups);
    for (SpoutNode sn : spoutNodes) {
        Integer parallelism = parallelisms.get(grouper.nodeGroup(sn));
        if (sn.type == SpoutNode.SpoutType.DRPC) {
            builder.setBatchPerTupleSpout(spoutIds.get(sn), sn.streamId, (IRichSpout) sn.spout, parallelism, batchGroupMap.get(sn));
        } else {
            ITridentSpout s;
            if (sn.spout instanceof IBatchSpout) {
                s = new BatchSpoutExecutor((IBatchSpout) sn.spout);
            } else if (sn.spout instanceof ITridentSpout) {
                s = (ITridentSpout) sn.spout;
            } else {
                throw new RuntimeException("Regular rich spouts not supported yet... try wrapping in a RichSpoutBatchExecutor");
            // TODO: handle regular rich spout without batches (need lots of updates to support this throughout)
            }
            builder.setSpout(spoutIds.get(sn), sn.streamId, sn.txId, s, parallelism, batchGroupMap.get(sn));
        }
    }
    for (Group g : mergedGroups) {
        if (!isSpoutGroup(g)) {
            Integer p = parallelisms.get(g);
            Map<String, String> streamToGroup = getOutputStreamBatchGroups(g, batchGroupMap);
            BoltDeclarer d = builder.setBolt(boltIds.get(g), new SubtopologyBolt(graph, g.nodes, batchGroupMap), p, committerBatches(g, batchGroupMap), streamToGroup);
            Collection<PartitionNode> inputs = uniquedSubscriptions(externalGroupInputs(g));
            for (PartitionNode n : inputs) {
                Node parent = TridentUtils.getParent(graph, n);
                String componentId;
                if (parent instanceof SpoutNode) {
                    componentId = spoutIds.get(parent);
                } else {
                    componentId = boltIds.get(grouper.nodeGroup(parent));
                }
                d.grouping(new GlobalStreamId(componentId, n.streamId), n.thriftGrouping);
            }
        }
    }
    return builder.buildTopology();
}
Also used : Group(storm.trident.graph.Group) Set(java.util.Set) HashSet(java.util.HashSet) IBatchSpout(storm.trident.spout.IBatchSpout) DefaultDirectedGraph(org.jgrapht.graph.DefaultDirectedGraph) HashMap(java.util.HashMap) SpoutNode(storm.trident.planner.SpoutNode) ProcessorNode(storm.trident.planner.ProcessorNode) PartitionNode(storm.trident.planner.PartitionNode) Node(storm.trident.planner.Node) ArrayList(java.util.ArrayList) GraphGrouper(storm.trident.graph.GraphGrouper) IndexedEdge(storm.trident.util.IndexedEdge) BatchSpoutExecutor(storm.trident.spout.BatchSpoutExecutor) HashSet(java.util.HashSet) TridentTopologyBuilder(storm.trident.topology.TridentTopologyBuilder) SpoutNode(storm.trident.planner.SpoutNode) PartitionNode(storm.trident.planner.PartitionNode) BoltDeclarer(backtype.storm.topology.BoltDeclarer) GlobalStreamId(backtype.storm.generated.GlobalStreamId) SubtopologyBolt(storm.trident.planner.SubtopologyBolt) ITridentSpout(storm.trident.spout.ITridentSpout)

Example 3 with BoltDeclarer

use of backtype.storm.topology.BoltDeclarer in project jstorm by alibaba.

the class TransactionalTopologyBuilder method buildTopologyBuilder.

public TopologyBuilder buildTopologyBuilder() {
    // Transaction is not compatible with jstorm batch mode(task.batch.tuple)
    // so we close batch mode via system property
    System.setProperty(ConfigExtension.TASK_BATCH_TUPLE, "false");
    String coordinator = _spoutId + "/coordinator";
    TopologyBuilder builder = new TopologyBuilder();
    SpoutDeclarer declarer = builder.setSpout(coordinator, new TransactionalSpoutCoordinator(_spout));
    for (Map conf : _spoutConfs) {
        declarer.addConfigurations(conf);
    }
    declarer.addConfiguration(Config.TOPOLOGY_TRANSACTIONAL_ID, _id);
    BoltDeclarer emitterDeclarer = builder.setBolt(_spoutId, new CoordinatedBolt(new TransactionalSpoutBatchExecutor(_spout), null, null), _spoutParallelism).allGrouping(coordinator, TransactionalSpoutCoordinator.TRANSACTION_BATCH_STREAM_ID).addConfiguration(Config.TOPOLOGY_TRANSACTIONAL_ID, _id);
    if (_spout instanceof ICommitterTransactionalSpout) {
        emitterDeclarer.allGrouping(coordinator, TransactionalSpoutCoordinator.TRANSACTION_COMMIT_STREAM_ID);
    }
    for (String id : _bolts.keySet()) {
        Component component = _bolts.get(id);
        Map<String, SourceArgs> coordinatedArgs = new HashMap<String, SourceArgs>();
        for (String c : componentBoltSubscriptions(component)) {
            coordinatedArgs.put(c, SourceArgs.all());
        }
        IdStreamSpec idSpec = null;
        if (component.committer) {
            idSpec = IdStreamSpec.makeDetectSpec(coordinator, TransactionalSpoutCoordinator.TRANSACTION_COMMIT_STREAM_ID);
        }
        BoltDeclarer input = builder.setBolt(id, new CoordinatedBolt(component.bolt, coordinatedArgs, idSpec), component.parallelism);
        for (Map conf : component.componentConfs) {
            input.addConfigurations(conf);
        }
        for (String c : componentBoltSubscriptions(component)) {
            input.directGrouping(c, Constants.COORDINATED_STREAM_ID);
        }
        for (InputDeclaration d : component.declarations) {
            d.declare(input);
        }
        if (component.committer) {
            input.allGrouping(coordinator, TransactionalSpoutCoordinator.TRANSACTION_COMMIT_STREAM_ID);
        }
    }
    return builder;
}
Also used : TopologyBuilder(backtype.storm.topology.TopologyBuilder) HashMap(java.util.HashMap) IdStreamSpec(backtype.storm.coordination.CoordinatedBolt.IdStreamSpec) SourceArgs(backtype.storm.coordination.CoordinatedBolt.SourceArgs) BoltDeclarer(backtype.storm.topology.BoltDeclarer) SpoutDeclarer(backtype.storm.topology.SpoutDeclarer) HashMap(java.util.HashMap) Map(java.util.Map) CoordinatedBolt(backtype.storm.coordination.CoordinatedBolt)

Example 4 with BoltDeclarer

use of backtype.storm.topology.BoltDeclarer in project jstorm by alibaba.

the class BatchTopologyBuilder method setSpout.

public BoltDeclarer setSpout(String id, IBatchSpout spout, int paralel) {
    BoltDeclarer boltDeclarer = this.setBolt(id, (IBatchSpout) spout, paralel);
    boltDeclarer.allGrouping(BatchDef.SPOUT_TRIGGER, BatchDef.COMPUTING_STREAM_ID);
    return boltDeclarer;
}
Also used : BoltDeclarer(backtype.storm.topology.BoltDeclarer)

Example 5 with BoltDeclarer

use of backtype.storm.topology.BoltDeclarer in project jstorm by alibaba.

the class PerformanceTestTopology method SetRemoteTopology.

public static void SetRemoteTopology() throws AlreadyAliveException, InvalidTopologyException, TopologyAssignException {
    String streamName = (String) conf.get(Config.TOPOLOGY_NAME);
    if (streamName == null) {
        String[] className = Thread.currentThread().getStackTrace()[1].getClassName().split("\\.");
        streamName = className[className.length - 1];
    }
    TopologyBuilder builder = new TopologyBuilder();
    int spout_Parallelism_hint = JStormUtils.parseInt(conf.get(TOPOLOGY_SPOUT_PARALLELISM_HINT), 1);
    int bolt_Parallelism_hint = JStormUtils.parseInt(conf.get(TOPOLOGY_BOLT_PARALLELISM_HINT), 2);
    builder.setSpout("spout", new TestSpout(), spout_Parallelism_hint);
    BoltDeclarer boltDeclarer = builder.setBolt("bolt", new TestBolt(), bolt_Parallelism_hint);
    // localFirstGrouping is only for jstorm
    // boltDeclarer.localFirstGrouping(SequenceTopologyDef.SEQUENCE_SPOUT_NAME);
    boltDeclarer.shuffleGrouping("spout");
    // .addConfiguration(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 60);
    conf.put(Config.STORM_CLUSTER_MODE, "distributed");
    StormSubmitter.submitTopology(streamName, conf, builder.createTopology());
}
Also used : TopologyBuilder(backtype.storm.topology.TopologyBuilder) BoltDeclarer(backtype.storm.topology.BoltDeclarer)

Aggregations

BoltDeclarer (backtype.storm.topology.BoltDeclarer)16 HashMap (java.util.HashMap)9 Map (java.util.Map)8 SourceArgs (backtype.storm.coordination.CoordinatedBolt.SourceArgs)6 TopologyBuilder (backtype.storm.topology.TopologyBuilder)6 CoordinatedBolt (backtype.storm.coordination.CoordinatedBolt)4 IdStreamSpec (backtype.storm.coordination.CoordinatedBolt.IdStreamSpec)4 GlobalStreamId (backtype.storm.generated.GlobalStreamId)3 IRichBolt (backtype.storm.topology.IRichBolt)3 SpoutDeclarer (backtype.storm.topology.SpoutDeclarer)3 Fields (backtype.storm.tuple.Fields)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 Set (java.util.Set)3 ITridentSpout (storm.trident.spout.ITridentSpout)3 FinishedCallback (backtype.storm.coordination.CoordinatedBolt.FinishedCallback)2 StreamInfo (backtype.storm.generated.StreamInfo)2 OutputFieldsGetter (backtype.storm.topology.OutputFieldsGetter)2 BatchTopologyBuilder (com.alibaba.jstorm.batch.BatchTopologyBuilder)2 DefaultDirectedGraph (org.jgrapht.graph.DefaultDirectedGraph)2