Search in sources :

Example 11 with Node

use of storm.trident.planner.Node in project storm by nathanmarz.

the class TridentTopology method checkValidJoins.

private static void checkValidJoins(Collection<Node> g) {
    boolean hasDRPCSpout = false;
    boolean hasBatchSpout = false;
    for (Node n : g) {
        if (n instanceof SpoutNode) {
            SpoutNode.SpoutType type = ((SpoutNode) n).type;
            if (type == SpoutNode.SpoutType.BATCH) {
                hasBatchSpout = true;
            } else if (type == SpoutNode.SpoutType.DRPC) {
                hasDRPCSpout = true;
            }
        }
    }
    if (hasBatchSpout && hasDRPCSpout) {
        throw new RuntimeException("Cannot join DRPC stream with streams originating from other spouts");
    }
}
Also used : SpoutNode(storm.trident.planner.SpoutNode) SpoutNode(storm.trident.planner.SpoutNode) ProcessorNode(storm.trident.planner.ProcessorNode) PartitionNode(storm.trident.planner.PartitionNode) Node(storm.trident.planner.Node)

Example 12 with Node

use of storm.trident.planner.Node in project storm by nathanmarz.

the class TridentTopology method newDRPCStream.

private Stream newDRPCStream(DRPCSpout spout) {
    // TODO: consider adding a shuffle grouping after the spout to avoid so much routing of the args/return-info all over the place
    // (at least until its possible to just pack bolt logic into the spout itself)
    Node n = new SpoutNode(getUniqueStreamId(), TridentUtils.getSingleOutputStreamFields(spout), null, spout, SpoutNode.SpoutType.DRPC);
    Stream nextStream = addNode(n);
    // later on, this will be joined back with return-info and all the results
    return nextStream.project(new Fields("args"));
}
Also used : Fields(backtype.storm.tuple.Fields) SpoutNode(storm.trident.planner.SpoutNode) SpoutNode(storm.trident.planner.SpoutNode) ProcessorNode(storm.trident.planner.ProcessorNode) PartitionNode(storm.trident.planner.PartitionNode) Node(storm.trident.planner.Node) GroupedStream(storm.trident.fluent.GroupedStream) IAggregatableStream(storm.trident.fluent.IAggregatableStream)

Example 13 with Node

use of storm.trident.planner.Node in project storm by nathanmarz.

the class GraphGrouper method merge.

private void merge(Group g1, Group g2) {
    Group newGroup = new Group(g1, g2);
    currGroups.remove(g1);
    currGroups.remove(g2);
    currGroups.add(newGroup);
    for (Node n : newGroup.nodes) {
        groupIndex.put(n, newGroup);
    }
}
Also used : Node(storm.trident.planner.Node)

Example 14 with Node

use of storm.trident.planner.Node in project jstorm by alibaba.

the class Stream method stateQuery.

public Stream stateQuery(TridentState state, Fields inputFields, QueryFunction function, Fields functionFields) {
    projectionValidation(inputFields);
    String stateId = state._node.stateInfo.id;
    Node n = new ProcessorNode(_topology.getUniqueStreamId(), _name, TridentUtils.fieldsConcat(getOutputFields(), functionFields), functionFields, new StateQueryProcessor(stateId, inputFields, function));
    _topology._colocate.get(stateId).add(n);
    return _topology.addSourcedNode(this, n);
}
Also used : ProcessorNode(storm.trident.planner.ProcessorNode) ProcessorNode(storm.trident.planner.ProcessorNode) PartitionNode(storm.trident.planner.PartitionNode) Node(storm.trident.planner.Node) StateQueryProcessor(storm.trident.planner.processor.StateQueryProcessor)

Example 15 with Node

use of storm.trident.planner.Node in project jstorm by alibaba.

the class TridentTopology method build.

public StormTopology build() {
    // Transaction is not compatible with jstorm batch mode(task.batch.tuple)
    // so we close batch mode via system property
    System.setProperty(ConfigExtension.TASK_BATCH_TUPLE, "false");
    DefaultDirectedGraph<Node, IndexedEdge> graph = (DefaultDirectedGraph) _graph.clone();
    completeDRPC(graph, _colocate, _gen);
    List<SpoutNode> spoutNodes = new ArrayList<>();
    // can be regular nodes (static state) or processor nodes
    Set<Node> boltNodes = new LinkedHashSet<>();
    for (Node n : graph.vertexSet()) {
        if (n instanceof SpoutNode) {
            spoutNodes.add((SpoutNode) n);
        } else if (!(n instanceof PartitionNode)) {
            boltNodes.add(n);
        }
    }
    Set<Group> initialGroups = new LinkedHashSet<>();
    for (List<Node> colocate : _colocate.values()) {
        Group g = new Group(graph, colocate);
        boltNodes.removeAll(colocate);
        initialGroups.add(g);
    }
    for (Node n : boltNodes) {
        initialGroups.add(new Group(graph, n));
    }
    GraphGrouper grouper = new GraphGrouper(graph, initialGroups);
    grouper.mergeFully();
    Collection<Group> mergedGroups = grouper.getAllGroups();
    // add identity partitions between groups
    for (IndexedEdge<Node> e : new HashSet<>(graph.edgeSet())) {
        if (!(e.source instanceof PartitionNode) && !(e.target instanceof PartitionNode)) {
            Group g1 = grouper.nodeGroup(e.source);
            Group g2 = grouper.nodeGroup(e.target);
            // g1 being null means the source is a spout node
            if (g1 == null && !(e.source instanceof SpoutNode))
                throw new RuntimeException("Planner exception: Null source group must indicate a spout node at this phase of planning");
            if (g1 == null || !g1.equals(g2)) {
                graph.removeEdge(e);
                PartitionNode pNode = makeIdentityPartition(e.source);
                graph.addVertex(pNode);
                graph.addEdge(e.source, pNode, new IndexedEdge(e.source, pNode, 0));
                graph.addEdge(pNode, e.target, new IndexedEdge(pNode, e.target, e.index));
            }
        }
    }
    // if one group subscribes to the same stream with same partitioning multiple times,
    // merge those together (otherwise can end up with many output streams created for that partitioning
    // if need to split into multiple output streams because of same input having different
    // partitioning to the group)
    // this is because can't currently merge splitting logic into a spout
    // not the most kosher algorithm here, since the grouper indexes are being trounced via the adding of nodes to random groups, but it 
    // works out
    List<Node> forNewGroups = new ArrayList<>();
    for (Group g : mergedGroups) {
        for (PartitionNode n : extraPartitionInputs(g)) {
            Node idNode = makeIdentityNode(n.allOutputFields);
            Node newPartitionNode = new PartitionNode(idNode.streamId, n.name, idNode.allOutputFields, n.thriftGrouping);
            Node parentNode = TridentUtils.getParent(graph, n);
            Set<IndexedEdge> outgoing = graph.outgoingEdgesOf(n);
            graph.removeVertex(n);
            graph.addVertex(idNode);
            graph.addVertex(newPartitionNode);
            addEdge(graph, parentNode, idNode, 0);
            addEdge(graph, idNode, newPartitionNode, 0);
            for (IndexedEdge e : outgoing) {
                addEdge(graph, newPartitionNode, e.target, e.index);
            }
            Group parentGroup = grouper.nodeGroup(parentNode);
            if (parentGroup == null) {
                forNewGroups.add(idNode);
            } else {
                parentGroup.nodes.add(idNode);
            }
        }
    }
    for (Node n : forNewGroups) {
        grouper.addGroup(new Group(graph, n));
    }
    // add in spouts as groups so we can get parallelisms
    for (Node n : spoutNodes) {
        grouper.addGroup(new Group(graph, n));
    }
    grouper.reindex();
    mergedGroups = grouper.getAllGroups();
    Map<Node, String> batchGroupMap = new HashMap<>();
    List<Set<Node>> connectedComponents = new ConnectivityInspector<>(graph).connectedSets();
    for (int i = 0; i < connectedComponents.size(); i++) {
        String groupId = "bg" + i;
        for (Node n : connectedComponents.get(i)) {
            batchGroupMap.put(n, groupId);
        }
    }
    //        System.out.println("GRAPH:");
    //        System.out.println(graph);
    Map<Group, Integer> parallelisms = getGroupParallelisms(graph, grouper, mergedGroups);
    TridentTopologyBuilder builder = new TridentTopologyBuilder();
    Map<Node, String> spoutIds = genSpoutIds(spoutNodes);
    Map<Group, String> boltIds = genBoltIds(mergedGroups);
    Map defaults = Utils.readDefaultConfig();
    for (SpoutNode sn : spoutNodes) {
        Integer parallelism = parallelisms.get(grouper.nodeGroup(sn));
        Map<String, Number> spoutRes = null;
        spoutRes = mergeDefaultResources(sn.getResources(), defaults);
        Number onHeap = spoutRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB);
        Number offHeap = spoutRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB);
        Number cpuLoad = spoutRes.get(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT);
        if (sn.type == SpoutNode.SpoutType.DRPC) {
            builder.setBatchPerTupleSpout(spoutIds.get(sn), sn.streamId, (IRichSpout) sn.spout, parallelism, batchGroupMap.get(sn)).setMemoryLoad(onHeap, offHeap).setCPULoad(cpuLoad);
        } else {
            ITridentSpout s;
            if (sn.spout instanceof IBatchSpout) {
                s = new BatchSpoutExecutor((IBatchSpout) sn.spout);
            } else if (sn.spout instanceof ITridentSpout) {
                s = (ITridentSpout) sn.spout;
            } else {
                throw new RuntimeException("Regular rich spouts not supported yet... try wrapping in a RichSpoutBatchExecutor");
            // TODO: handle regular rich spout without batches (need lots of updates to support this throughout)
            }
            builder.setSpout(spoutIds.get(sn), sn.streamId, sn.txId, s, parallelism, batchGroupMap.get(sn)).setMemoryLoad(onHeap, offHeap).setCPULoad(cpuLoad);
        }
    }
    for (Group g : mergedGroups) {
        if (!isSpoutGroup(g)) {
            Integer p = parallelisms.get(g);
            Map<String, String> streamToGroup = getOutputStreamBatchGroups(g, batchGroupMap);
            Map<String, Number> groupRes = mergeDefaultResources(g.getResources(), defaults);
            Number onHeap = groupRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB);
            Number offHeap = groupRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB);
            Number cpuLoad = groupRes.get(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT);
            BoltDeclarer d = builder.setBolt(boltIds.get(g), new SubtopologyBolt(graph, g.nodes, batchGroupMap), p, committerBatches(g, batchGroupMap), streamToGroup).setMemoryLoad(onHeap, offHeap).setCPULoad(cpuLoad);
            Collection<PartitionNode> inputs = uniquedSubscriptions(externalGroupInputs(g));
            for (PartitionNode n : inputs) {
                Node parent = TridentUtils.getParent(graph, n);
                String componentId = parent instanceof SpoutNode ? spoutIds.get(parent) : boltIds.get(grouper.nodeGroup(parent));
                d.grouping(new GlobalStreamId(componentId, n.streamId), n.thriftGrouping);
            }
        }
    }
    return builder.buildTopology();
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Group(storm.trident.graph.Group) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) IBatchSpout(storm.trident.spout.IBatchSpout) DefaultDirectedGraph(org.jgrapht.graph.DefaultDirectedGraph) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SpoutNode(storm.trident.planner.SpoutNode) ProcessorNode(storm.trident.planner.ProcessorNode) PartitionNode(storm.trident.planner.PartitionNode) Node(storm.trident.planner.Node) ArrayList(java.util.ArrayList) GraphGrouper(storm.trident.graph.GraphGrouper) IndexedEdge(storm.trident.util.IndexedEdge) BatchSpoutExecutor(storm.trident.spout.BatchSpoutExecutor) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) TridentTopologyBuilder(storm.trident.topology.TridentTopologyBuilder) SpoutNode(storm.trident.planner.SpoutNode) PartitionNode(storm.trident.planner.PartitionNode) BoltDeclarer(backtype.storm.topology.BoltDeclarer) GlobalStreamId(backtype.storm.generated.GlobalStreamId) SubtopologyBolt(storm.trident.planner.SubtopologyBolt) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap) ITridentSpout(storm.trident.spout.ITridentSpout)

Aggregations

Node (storm.trident.planner.Node)25 PartitionNode (storm.trident.planner.PartitionNode)22 ProcessorNode (storm.trident.planner.ProcessorNode)22 SpoutNode (storm.trident.planner.SpoutNode)20 HashMap (java.util.HashMap)7 Fields (backtype.storm.tuple.Fields)6 ArrayList (java.util.ArrayList)6 HashSet (java.util.HashSet)6 Set (java.util.Set)6 GroupedStream (storm.trident.fluent.GroupedStream)6 IAggregatableStream (storm.trident.fluent.IAggregatableStream)6 Group (storm.trident.graph.Group)6 LinkedHashMap (java.util.LinkedHashMap)3 LinkedHashSet (java.util.LinkedHashSet)3 TreeMap (java.util.TreeMap)3 IndexedEdge (storm.trident.util.IndexedEdge)3 GlobalStreamId (backtype.storm.generated.GlobalStreamId)2 BoltDeclarer (backtype.storm.topology.BoltDeclarer)2 Map (java.util.Map)2 DefaultDirectedGraph (org.jgrapht.graph.DefaultDirectedGraph)2