Search in sources :

Example 16 with GlobalStreamId

use of backtype.storm.generated.GlobalStreamId in project jstorm by alibaba.

the class MkGrouper method parseGroupType.

private GrouperType parseGroupType(WorkerData workerData) {
    GrouperType grouperType = null;
    if (Grouping._Fields.FIELDS.equals(fields)) {
        if (Thrift.isGlobalGrouping(thrift_grouping)) {
            // global grouping, just send tuple to first task
            grouperType = GrouperType.global;
        } else {
            List<String> fields_group = Thrift.fieldGrouping(thrift_grouping);
            Fields fields = new Fields(fields_group);
            fields_grouper = new MkFieldsGrouper(out_fields, fields, out_tasks);
            // hashcode by fields
            grouperType = GrouperType.fields;
        }
    } else if (Grouping._Fields.ALL.equals(fields)) {
        // send to every task
        grouperType = GrouperType.all;
    } else if (Grouping._Fields.SHUFFLE.equals(fields)) {
        grouperType = GrouperType.shuffle;
        shuffer = new MkShuffer(topology_context.getThisComponentId(), targetComponent, workerData);
    } else if (Grouping._Fields.NONE.equals(fields)) {
        // random send one task
        this.random = new Random();
        grouperType = GrouperType.none;
    } else if (Grouping._Fields.CUSTOM_OBJECT.equals(fields)) {
        // user custom grouping by JavaObject
        JavaObject jobj = thrift_grouping.get_custom_object();
        CustomStreamGrouping g = Thrift.instantiateJavaObject(jobj);
        int myTaskId = topology_context.getThisTaskId();
        String componentId = topology_context.getComponentId(myTaskId);
        GlobalStreamId stream = new GlobalStreamId(componentId, streamId);
        custom_grouper = new MkCustomGrouper(topology_context, g, stream, out_tasks, myTaskId);
        grouperType = GrouperType.custom_obj;
    } else if (Grouping._Fields.CUSTOM_SERIALIZED.equals(fields)) {
        // user custom group by serialized Object
        byte[] obj = thrift_grouping.get_custom_serialized();
        CustomStreamGrouping g = (CustomStreamGrouping) Utils.javaDeserialize(obj);
        int myTaskId = topology_context.getThisTaskId();
        String componentId = topology_context.getComponentId(myTaskId);
        GlobalStreamId stream = new GlobalStreamId(componentId, streamId);
        custom_grouper = new MkCustomGrouper(topology_context, g, stream, out_tasks, myTaskId);
        grouperType = GrouperType.custom_serialized;
    } else if (Grouping._Fields.DIRECT.equals(fields)) {
        // directly send to a special task
        grouperType = GrouperType.direct;
    } else if (Grouping._Fields.LOCAL_OR_SHUFFLE.equals(fields)) {
        grouperType = GrouperType.shuffle;
        shuffer = new MkShuffer(topology_context.getThisComponentId(), targetComponent, workerData);
    } else if (Grouping._Fields.LOCAL_FIRST.equals(fields)) {
        grouperType = GrouperType.shuffle;
        shuffer = new MkShuffer(topology_context.getThisComponentId(), targetComponent, workerData);
    }
    return grouperType;
}
Also used : CustomStreamGrouping(backtype.storm.grouping.CustomStreamGrouping) Fields(backtype.storm.tuple.Fields) Random(java.util.Random) JavaObject(backtype.storm.generated.JavaObject) GlobalStreamId(backtype.storm.generated.GlobalStreamId)

Example 17 with GlobalStreamId

use of backtype.storm.generated.GlobalStreamId in project jstorm by alibaba.

the class TridentTopology method build.

public StormTopology build() {
    // Transaction is not compatible with jstorm batch mode(task.batch.tuple)
    // so we close batch mode via system property
    System.setProperty(ConfigExtension.TASK_BATCH_TUPLE, "false");
    DefaultDirectedGraph<Node, IndexedEdge> graph = (DefaultDirectedGraph) _graph.clone();
    completeDRPC(graph, _colocate, _gen);
    List<SpoutNode> spoutNodes = new ArrayList<>();
    // can be regular nodes (static state) or processor nodes
    Set<Node> boltNodes = new LinkedHashSet<>();
    for (Node n : graph.vertexSet()) {
        if (n instanceof SpoutNode) {
            spoutNodes.add((SpoutNode) n);
        } else if (!(n instanceof PartitionNode)) {
            boltNodes.add(n);
        }
    }
    Set<Group> initialGroups = new LinkedHashSet<>();
    for (List<Node> colocate : _colocate.values()) {
        Group g = new Group(graph, colocate);
        boltNodes.removeAll(colocate);
        initialGroups.add(g);
    }
    for (Node n : boltNodes) {
        initialGroups.add(new Group(graph, n));
    }
    GraphGrouper grouper = new GraphGrouper(graph, initialGroups);
    grouper.mergeFully();
    Collection<Group> mergedGroups = grouper.getAllGroups();
    // add identity partitions between groups
    for (IndexedEdge<Node> e : new HashSet<>(graph.edgeSet())) {
        if (!(e.source instanceof PartitionNode) && !(e.target instanceof PartitionNode)) {
            Group g1 = grouper.nodeGroup(e.source);
            Group g2 = grouper.nodeGroup(e.target);
            // g1 being null means the source is a spout node
            if (g1 == null && !(e.source instanceof SpoutNode))
                throw new RuntimeException("Planner exception: Null source group must indicate a spout node at this phase of planning");
            if (g1 == null || !g1.equals(g2)) {
                graph.removeEdge(e);
                PartitionNode pNode = makeIdentityPartition(e.source);
                graph.addVertex(pNode);
                graph.addEdge(e.source, pNode, new IndexedEdge(e.source, pNode, 0));
                graph.addEdge(pNode, e.target, new IndexedEdge(pNode, e.target, e.index));
            }
        }
    }
    // if one group subscribes to the same stream with same partitioning multiple times,
    // merge those together (otherwise can end up with many output streams created for that partitioning
    // if need to split into multiple output streams because of same input having different
    // partitioning to the group)
    // this is because can't currently merge splitting logic into a spout
    // not the most kosher algorithm here, since the grouper indexes are being trounced via the adding of nodes to random groups, but it 
    // works out
    List<Node> forNewGroups = new ArrayList<>();
    for (Group g : mergedGroups) {
        for (PartitionNode n : extraPartitionInputs(g)) {
            Node idNode = makeIdentityNode(n.allOutputFields);
            Node newPartitionNode = new PartitionNode(idNode.streamId, n.name, idNode.allOutputFields, n.thriftGrouping);
            Node parentNode = TridentUtils.getParent(graph, n);
            Set<IndexedEdge> outgoing = graph.outgoingEdgesOf(n);
            graph.removeVertex(n);
            graph.addVertex(idNode);
            graph.addVertex(newPartitionNode);
            addEdge(graph, parentNode, idNode, 0);
            addEdge(graph, idNode, newPartitionNode, 0);
            for (IndexedEdge e : outgoing) {
                addEdge(graph, newPartitionNode, e.target, e.index);
            }
            Group parentGroup = grouper.nodeGroup(parentNode);
            if (parentGroup == null) {
                forNewGroups.add(idNode);
            } else {
                parentGroup.nodes.add(idNode);
            }
        }
    }
    for (Node n : forNewGroups) {
        grouper.addGroup(new Group(graph, n));
    }
    // add in spouts as groups so we can get parallelisms
    for (Node n : spoutNodes) {
        grouper.addGroup(new Group(graph, n));
    }
    grouper.reindex();
    mergedGroups = grouper.getAllGroups();
    Map<Node, String> batchGroupMap = new HashMap<>();
    List<Set<Node>> connectedComponents = new ConnectivityInspector<>(graph).connectedSets();
    for (int i = 0; i < connectedComponents.size(); i++) {
        String groupId = "bg" + i;
        for (Node n : connectedComponents.get(i)) {
            batchGroupMap.put(n, groupId);
        }
    }
    //        System.out.println("GRAPH:");
    //        System.out.println(graph);
    Map<Group, Integer> parallelisms = getGroupParallelisms(graph, grouper, mergedGroups);
    TridentTopologyBuilder builder = new TridentTopologyBuilder();
    Map<Node, String> spoutIds = genSpoutIds(spoutNodes);
    Map<Group, String> boltIds = genBoltIds(mergedGroups);
    Map defaults = Utils.readDefaultConfig();
    for (SpoutNode sn : spoutNodes) {
        Integer parallelism = parallelisms.get(grouper.nodeGroup(sn));
        Map<String, Number> spoutRes = null;
        spoutRes = mergeDefaultResources(sn.getResources(), defaults);
        Number onHeap = spoutRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB);
        Number offHeap = spoutRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB);
        Number cpuLoad = spoutRes.get(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT);
        if (sn.type == SpoutNode.SpoutType.DRPC) {
            builder.setBatchPerTupleSpout(spoutIds.get(sn), sn.streamId, (IRichSpout) sn.spout, parallelism, batchGroupMap.get(sn)).setMemoryLoad(onHeap, offHeap).setCPULoad(cpuLoad);
        } else {
            ITridentSpout s;
            if (sn.spout instanceof IBatchSpout) {
                s = new BatchSpoutExecutor((IBatchSpout) sn.spout);
            } else if (sn.spout instanceof ITridentSpout) {
                s = (ITridentSpout) sn.spout;
            } else {
                throw new RuntimeException("Regular rich spouts not supported yet... try wrapping in a RichSpoutBatchExecutor");
            // TODO: handle regular rich spout without batches (need lots of updates to support this throughout)
            }
            builder.setSpout(spoutIds.get(sn), sn.streamId, sn.txId, s, parallelism, batchGroupMap.get(sn)).setMemoryLoad(onHeap, offHeap).setCPULoad(cpuLoad);
        }
    }
    for (Group g : mergedGroups) {
        if (!isSpoutGroup(g)) {
            Integer p = parallelisms.get(g);
            Map<String, String> streamToGroup = getOutputStreamBatchGroups(g, batchGroupMap);
            Map<String, Number> groupRes = mergeDefaultResources(g.getResources(), defaults);
            Number onHeap = groupRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB);
            Number offHeap = groupRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB);
            Number cpuLoad = groupRes.get(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT);
            BoltDeclarer d = builder.setBolt(boltIds.get(g), new SubtopologyBolt(graph, g.nodes, batchGroupMap), p, committerBatches(g, batchGroupMap), streamToGroup).setMemoryLoad(onHeap, offHeap).setCPULoad(cpuLoad);
            Collection<PartitionNode> inputs = uniquedSubscriptions(externalGroupInputs(g));
            for (PartitionNode n : inputs) {
                Node parent = TridentUtils.getParent(graph, n);
                String componentId = parent instanceof SpoutNode ? spoutIds.get(parent) : boltIds.get(grouper.nodeGroup(parent));
                d.grouping(new GlobalStreamId(componentId, n.streamId), n.thriftGrouping);
            }
        }
    }
    return builder.buildTopology();
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Group(storm.trident.graph.Group) Set(java.util.Set) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) IBatchSpout(storm.trident.spout.IBatchSpout) DefaultDirectedGraph(org.jgrapht.graph.DefaultDirectedGraph) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SpoutNode(storm.trident.planner.SpoutNode) ProcessorNode(storm.trident.planner.ProcessorNode) PartitionNode(storm.trident.planner.PartitionNode) Node(storm.trident.planner.Node) ArrayList(java.util.ArrayList) GraphGrouper(storm.trident.graph.GraphGrouper) IndexedEdge(storm.trident.util.IndexedEdge) BatchSpoutExecutor(storm.trident.spout.BatchSpoutExecutor) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) TridentTopologyBuilder(storm.trident.topology.TridentTopologyBuilder) SpoutNode(storm.trident.planner.SpoutNode) PartitionNode(storm.trident.planner.PartitionNode) BoltDeclarer(backtype.storm.topology.BoltDeclarer) GlobalStreamId(backtype.storm.generated.GlobalStreamId) SubtopologyBolt(storm.trident.planner.SubtopologyBolt) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap) ITridentSpout(storm.trident.spout.ITridentSpout)

Example 18 with GlobalStreamId

use of backtype.storm.generated.GlobalStreamId in project jstorm by alibaba.

the class TridentTopologyBuilder method fleshOutStreamBatchIds.

Map<GlobalStreamId, String> fleshOutStreamBatchIds(boolean includeCommitStream) {
    Map<GlobalStreamId, String> ret = new HashMap<>(_batchIds);
    Set<String> allBatches = new HashSet(_batchIds.values());
    for (String b : allBatches) {
        ret.put(new GlobalStreamId(masterCoordinator(b), MasterBatchCoordinator.BATCH_STREAM_ID), b);
        if (includeCommitStream) {
            ret.put(new GlobalStreamId(masterCoordinator(b), MasterBatchCoordinator.COMMIT_STREAM_ID), b);
        }
    // DO NOT include the success stream as part of the batch. it should not trigger coordination tuples,
    // and is just a metadata tuple to assist in cleanup, should not trigger batch tracking
    }
    for (String id : _spouts.keySet()) {
        TransactionalSpoutComponent c = _spouts.get(id);
        if (c.batchGroupId != null) {
            ret.put(new GlobalStreamId(spoutCoordinator(id), MasterBatchCoordinator.BATCH_STREAM_ID), c.batchGroupId);
        }
    }
    //this takes care of setting up coord streams for spouts and bolts
    for (GlobalStreamId s : _batchIds.keySet()) {
        String b = _batchIds.get(s);
        ret.put(new GlobalStreamId(s.get_componentId(), TridentBoltExecutor.COORD_STREAM(b)), b);
    }
    return ret;
}
Also used : GlobalStreamId(backtype.storm.generated.GlobalStreamId)

Example 19 with GlobalStreamId

use of backtype.storm.generated.GlobalStreamId in project jstorm by alibaba.

the class SingleJoinBolt method prepare.

@Override
public void prepare(Map conf, TopologyContext context, OutputCollector collector) {
    _fieldLocations = new HashMap<String, GlobalStreamId>();
    _collector = collector;
    int timeout = ((Number) conf.get(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS)).intValue();
    _pending = new TimeCacheMap<List<Object>, Map<GlobalStreamId, Tuple>>(timeout, new ExpireCallback());
    _numSources = context.getThisSources().size();
    Set<String> idFields = null;
    for (GlobalStreamId source : context.getThisSources().keySet()) {
        Fields fields = context.getComponentOutputFields(source.get_componentId(), source.get_streamId());
        Set<String> setFields = new HashSet<String>(fields.toList());
        if (idFields == null)
            idFields = setFields;
        else
            idFields.retainAll(setFields);
        for (String outfield : _outFields) {
            for (String sourcefield : fields) {
                if (outfield.equals(sourcefield)) {
                    _fieldLocations.put(outfield, source);
                }
            }
        }
    }
    _idFields = new Fields(new ArrayList<String>(idFields));
    if (_fieldLocations.size() != _outFields.size()) {
        throw new RuntimeException("Cannot find all outfields among sources");
    }
}
Also used : ArrayList(java.util.ArrayList) Fields(backtype.storm.tuple.Fields) GlobalStreamId(backtype.storm.generated.GlobalStreamId) ArrayList(java.util.ArrayList) List(java.util.List) TimeCacheMap(backtype.storm.utils.TimeCacheMap) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet)

Example 20 with GlobalStreamId

use of backtype.storm.generated.GlobalStreamId in project jstorm by alibaba.

the class SingleJoinBolt method execute.

@Override
public void execute(Tuple tuple) {
    List<Object> id = tuple.select(_idFields);
    GlobalStreamId streamId = new GlobalStreamId(tuple.getSourceComponent(), tuple.getSourceStreamId());
    if (!_pending.containsKey(id)) {
        _pending.put(id, new HashMap<GlobalStreamId, Tuple>());
    }
    Map<GlobalStreamId, Tuple> parts = _pending.get(id);
    if (parts.containsKey(streamId))
        throw new RuntimeException("Received same side of single join twice");
    parts.put(streamId, tuple);
    if (parts.size() == _numSources) {
        _pending.remove(id);
        List<Object> joinResult = new ArrayList<Object>();
        for (String outField : _outFields) {
            GlobalStreamId loc = _fieldLocations.get(outField);
            joinResult.add(parts.get(loc).getValueByField(outField));
        }
        _collector.emit(new ArrayList<Tuple>(parts.values()), joinResult);
        for (Tuple part : parts.values()) {
            _collector.ack(part);
        }
        SingleJoinTest.receiveCounter.incrementAndGet();
    }
}
Also used : GlobalStreamId(backtype.storm.generated.GlobalStreamId) ArrayList(java.util.ArrayList) Tuple(backtype.storm.tuple.Tuple)

Aggregations

GlobalStreamId (backtype.storm.generated.GlobalStreamId)24 HashMap (java.util.HashMap)11 ArrayList (java.util.ArrayList)8 Map (java.util.Map)8 HashSet (java.util.HashSet)7 Grouping (backtype.storm.generated.Grouping)6 Set (java.util.Set)4 CustomStreamGrouping (backtype.storm.grouping.CustomStreamGrouping)3 BoltDeclarer (backtype.storm.topology.BoltDeclarer)3 Fields (backtype.storm.tuple.Fields)3 List (java.util.List)3 ITridentSpout (storm.trident.spout.ITridentSpout)3 ComponentCommon (backtype.storm.generated.ComponentCommon)2 Tuple (backtype.storm.tuple.Tuple)2 DefaultDirectedGraph (org.jgrapht.graph.DefaultDirectedGraph)2 GraphGrouper (storm.trident.graph.GraphGrouper)2 Group (storm.trident.graph.Group)2 Node (storm.trident.planner.Node)2 PartitionNode (storm.trident.planner.PartitionNode)2 ProcessorNode (storm.trident.planner.ProcessorNode)2