use of backtype.storm.generated.GlobalStreamId in project jstorm by alibaba.
the class MkGrouper method parseGroupType.
private GrouperType parseGroupType(WorkerData workerData) {
GrouperType grouperType = null;
if (Grouping._Fields.FIELDS.equals(fields)) {
if (Thrift.isGlobalGrouping(thrift_grouping)) {
// global grouping, just send tuple to first task
grouperType = GrouperType.global;
} else {
List<String> fields_group = Thrift.fieldGrouping(thrift_grouping);
Fields fields = new Fields(fields_group);
fields_grouper = new MkFieldsGrouper(out_fields, fields, out_tasks);
// hashcode by fields
grouperType = GrouperType.fields;
}
} else if (Grouping._Fields.ALL.equals(fields)) {
// send to every task
grouperType = GrouperType.all;
} else if (Grouping._Fields.SHUFFLE.equals(fields)) {
grouperType = GrouperType.shuffle;
shuffer = new MkShuffer(topology_context.getThisComponentId(), targetComponent, workerData);
} else if (Grouping._Fields.NONE.equals(fields)) {
// random send one task
this.random = new Random();
grouperType = GrouperType.none;
} else if (Grouping._Fields.CUSTOM_OBJECT.equals(fields)) {
// user custom grouping by JavaObject
JavaObject jobj = thrift_grouping.get_custom_object();
CustomStreamGrouping g = Thrift.instantiateJavaObject(jobj);
int myTaskId = topology_context.getThisTaskId();
String componentId = topology_context.getComponentId(myTaskId);
GlobalStreamId stream = new GlobalStreamId(componentId, streamId);
custom_grouper = new MkCustomGrouper(topology_context, g, stream, out_tasks, myTaskId);
grouperType = GrouperType.custom_obj;
} else if (Grouping._Fields.CUSTOM_SERIALIZED.equals(fields)) {
// user custom group by serialized Object
byte[] obj = thrift_grouping.get_custom_serialized();
CustomStreamGrouping g = (CustomStreamGrouping) Utils.javaDeserialize(obj);
int myTaskId = topology_context.getThisTaskId();
String componentId = topology_context.getComponentId(myTaskId);
GlobalStreamId stream = new GlobalStreamId(componentId, streamId);
custom_grouper = new MkCustomGrouper(topology_context, g, stream, out_tasks, myTaskId);
grouperType = GrouperType.custom_serialized;
} else if (Grouping._Fields.DIRECT.equals(fields)) {
// directly send to a special task
grouperType = GrouperType.direct;
} else if (Grouping._Fields.LOCAL_OR_SHUFFLE.equals(fields)) {
grouperType = GrouperType.shuffle;
shuffer = new MkShuffer(topology_context.getThisComponentId(), targetComponent, workerData);
} else if (Grouping._Fields.LOCAL_FIRST.equals(fields)) {
grouperType = GrouperType.shuffle;
shuffer = new MkShuffer(topology_context.getThisComponentId(), targetComponent, workerData);
}
return grouperType;
}
use of backtype.storm.generated.GlobalStreamId in project jstorm by alibaba.
the class TridentTopology method build.
public StormTopology build() {
// Transaction is not compatible with jstorm batch mode(task.batch.tuple)
// so we close batch mode via system property
System.setProperty(ConfigExtension.TASK_BATCH_TUPLE, "false");
DefaultDirectedGraph<Node, IndexedEdge> graph = (DefaultDirectedGraph) _graph.clone();
completeDRPC(graph, _colocate, _gen);
List<SpoutNode> spoutNodes = new ArrayList<>();
// can be regular nodes (static state) or processor nodes
Set<Node> boltNodes = new LinkedHashSet<>();
for (Node n : graph.vertexSet()) {
if (n instanceof SpoutNode) {
spoutNodes.add((SpoutNode) n);
} else if (!(n instanceof PartitionNode)) {
boltNodes.add(n);
}
}
Set<Group> initialGroups = new LinkedHashSet<>();
for (List<Node> colocate : _colocate.values()) {
Group g = new Group(graph, colocate);
boltNodes.removeAll(colocate);
initialGroups.add(g);
}
for (Node n : boltNodes) {
initialGroups.add(new Group(graph, n));
}
GraphGrouper grouper = new GraphGrouper(graph, initialGroups);
grouper.mergeFully();
Collection<Group> mergedGroups = grouper.getAllGroups();
// add identity partitions between groups
for (IndexedEdge<Node> e : new HashSet<>(graph.edgeSet())) {
if (!(e.source instanceof PartitionNode) && !(e.target instanceof PartitionNode)) {
Group g1 = grouper.nodeGroup(e.source);
Group g2 = grouper.nodeGroup(e.target);
// g1 being null means the source is a spout node
if (g1 == null && !(e.source instanceof SpoutNode))
throw new RuntimeException("Planner exception: Null source group must indicate a spout node at this phase of planning");
if (g1 == null || !g1.equals(g2)) {
graph.removeEdge(e);
PartitionNode pNode = makeIdentityPartition(e.source);
graph.addVertex(pNode);
graph.addEdge(e.source, pNode, new IndexedEdge(e.source, pNode, 0));
graph.addEdge(pNode, e.target, new IndexedEdge(pNode, e.target, e.index));
}
}
}
// if one group subscribes to the same stream with same partitioning multiple times,
// merge those together (otherwise can end up with many output streams created for that partitioning
// if need to split into multiple output streams because of same input having different
// partitioning to the group)
// this is because can't currently merge splitting logic into a spout
// not the most kosher algorithm here, since the grouper indexes are being trounced via the adding of nodes to random groups, but it
// works out
List<Node> forNewGroups = new ArrayList<>();
for (Group g : mergedGroups) {
for (PartitionNode n : extraPartitionInputs(g)) {
Node idNode = makeIdentityNode(n.allOutputFields);
Node newPartitionNode = new PartitionNode(idNode.streamId, n.name, idNode.allOutputFields, n.thriftGrouping);
Node parentNode = TridentUtils.getParent(graph, n);
Set<IndexedEdge> outgoing = graph.outgoingEdgesOf(n);
graph.removeVertex(n);
graph.addVertex(idNode);
graph.addVertex(newPartitionNode);
addEdge(graph, parentNode, idNode, 0);
addEdge(graph, idNode, newPartitionNode, 0);
for (IndexedEdge e : outgoing) {
addEdge(graph, newPartitionNode, e.target, e.index);
}
Group parentGroup = grouper.nodeGroup(parentNode);
if (parentGroup == null) {
forNewGroups.add(idNode);
} else {
parentGroup.nodes.add(idNode);
}
}
}
for (Node n : forNewGroups) {
grouper.addGroup(new Group(graph, n));
}
// add in spouts as groups so we can get parallelisms
for (Node n : spoutNodes) {
grouper.addGroup(new Group(graph, n));
}
grouper.reindex();
mergedGroups = grouper.getAllGroups();
Map<Node, String> batchGroupMap = new HashMap<>();
List<Set<Node>> connectedComponents = new ConnectivityInspector<>(graph).connectedSets();
for (int i = 0; i < connectedComponents.size(); i++) {
String groupId = "bg" + i;
for (Node n : connectedComponents.get(i)) {
batchGroupMap.put(n, groupId);
}
}
// System.out.println("GRAPH:");
// System.out.println(graph);
Map<Group, Integer> parallelisms = getGroupParallelisms(graph, grouper, mergedGroups);
TridentTopologyBuilder builder = new TridentTopologyBuilder();
Map<Node, String> spoutIds = genSpoutIds(spoutNodes);
Map<Group, String> boltIds = genBoltIds(mergedGroups);
Map defaults = Utils.readDefaultConfig();
for (SpoutNode sn : spoutNodes) {
Integer parallelism = parallelisms.get(grouper.nodeGroup(sn));
Map<String, Number> spoutRes = null;
spoutRes = mergeDefaultResources(sn.getResources(), defaults);
Number onHeap = spoutRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB);
Number offHeap = spoutRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB);
Number cpuLoad = spoutRes.get(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT);
if (sn.type == SpoutNode.SpoutType.DRPC) {
builder.setBatchPerTupleSpout(spoutIds.get(sn), sn.streamId, (IRichSpout) sn.spout, parallelism, batchGroupMap.get(sn)).setMemoryLoad(onHeap, offHeap).setCPULoad(cpuLoad);
} else {
ITridentSpout s;
if (sn.spout instanceof IBatchSpout) {
s = new BatchSpoutExecutor((IBatchSpout) sn.spout);
} else if (sn.spout instanceof ITridentSpout) {
s = (ITridentSpout) sn.spout;
} else {
throw new RuntimeException("Regular rich spouts not supported yet... try wrapping in a RichSpoutBatchExecutor");
// TODO: handle regular rich spout without batches (need lots of updates to support this throughout)
}
builder.setSpout(spoutIds.get(sn), sn.streamId, sn.txId, s, parallelism, batchGroupMap.get(sn)).setMemoryLoad(onHeap, offHeap).setCPULoad(cpuLoad);
}
}
for (Group g : mergedGroups) {
if (!isSpoutGroup(g)) {
Integer p = parallelisms.get(g);
Map<String, String> streamToGroup = getOutputStreamBatchGroups(g, batchGroupMap);
Map<String, Number> groupRes = mergeDefaultResources(g.getResources(), defaults);
Number onHeap = groupRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB);
Number offHeap = groupRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB);
Number cpuLoad = groupRes.get(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT);
BoltDeclarer d = builder.setBolt(boltIds.get(g), new SubtopologyBolt(graph, g.nodes, batchGroupMap), p, committerBatches(g, batchGroupMap), streamToGroup).setMemoryLoad(onHeap, offHeap).setCPULoad(cpuLoad);
Collection<PartitionNode> inputs = uniquedSubscriptions(externalGroupInputs(g));
for (PartitionNode n : inputs) {
Node parent = TridentUtils.getParent(graph, n);
String componentId = parent instanceof SpoutNode ? spoutIds.get(parent) : boltIds.get(grouper.nodeGroup(parent));
d.grouping(new GlobalStreamId(componentId, n.streamId), n.thriftGrouping);
}
}
}
return builder.buildTopology();
}
use of backtype.storm.generated.GlobalStreamId in project jstorm by alibaba.
the class TridentTopologyBuilder method fleshOutStreamBatchIds.
Map<GlobalStreamId, String> fleshOutStreamBatchIds(boolean includeCommitStream) {
Map<GlobalStreamId, String> ret = new HashMap<>(_batchIds);
Set<String> allBatches = new HashSet(_batchIds.values());
for (String b : allBatches) {
ret.put(new GlobalStreamId(masterCoordinator(b), MasterBatchCoordinator.BATCH_STREAM_ID), b);
if (includeCommitStream) {
ret.put(new GlobalStreamId(masterCoordinator(b), MasterBatchCoordinator.COMMIT_STREAM_ID), b);
}
// DO NOT include the success stream as part of the batch. it should not trigger coordination tuples,
// and is just a metadata tuple to assist in cleanup, should not trigger batch tracking
}
for (String id : _spouts.keySet()) {
TransactionalSpoutComponent c = _spouts.get(id);
if (c.batchGroupId != null) {
ret.put(new GlobalStreamId(spoutCoordinator(id), MasterBatchCoordinator.BATCH_STREAM_ID), c.batchGroupId);
}
}
//this takes care of setting up coord streams for spouts and bolts
for (GlobalStreamId s : _batchIds.keySet()) {
String b = _batchIds.get(s);
ret.put(new GlobalStreamId(s.get_componentId(), TridentBoltExecutor.COORD_STREAM(b)), b);
}
return ret;
}
use of backtype.storm.generated.GlobalStreamId in project jstorm by alibaba.
the class SingleJoinBolt method prepare.
@Override
public void prepare(Map conf, TopologyContext context, OutputCollector collector) {
_fieldLocations = new HashMap<String, GlobalStreamId>();
_collector = collector;
int timeout = ((Number) conf.get(Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS)).intValue();
_pending = new TimeCacheMap<List<Object>, Map<GlobalStreamId, Tuple>>(timeout, new ExpireCallback());
_numSources = context.getThisSources().size();
Set<String> idFields = null;
for (GlobalStreamId source : context.getThisSources().keySet()) {
Fields fields = context.getComponentOutputFields(source.get_componentId(), source.get_streamId());
Set<String> setFields = new HashSet<String>(fields.toList());
if (idFields == null)
idFields = setFields;
else
idFields.retainAll(setFields);
for (String outfield : _outFields) {
for (String sourcefield : fields) {
if (outfield.equals(sourcefield)) {
_fieldLocations.put(outfield, source);
}
}
}
}
_idFields = new Fields(new ArrayList<String>(idFields));
if (_fieldLocations.size() != _outFields.size()) {
throw new RuntimeException("Cannot find all outfields among sources");
}
}
use of backtype.storm.generated.GlobalStreamId in project jstorm by alibaba.
the class SingleJoinBolt method execute.
@Override
public void execute(Tuple tuple) {
List<Object> id = tuple.select(_idFields);
GlobalStreamId streamId = new GlobalStreamId(tuple.getSourceComponent(), tuple.getSourceStreamId());
if (!_pending.containsKey(id)) {
_pending.put(id, new HashMap<GlobalStreamId, Tuple>());
}
Map<GlobalStreamId, Tuple> parts = _pending.get(id);
if (parts.containsKey(streamId))
throw new RuntimeException("Received same side of single join twice");
parts.put(streamId, tuple);
if (parts.size() == _numSources) {
_pending.remove(id);
List<Object> joinResult = new ArrayList<Object>();
for (String outField : _outFields) {
GlobalStreamId loc = _fieldLocations.get(outField);
joinResult.add(parts.get(loc).getValueByField(outField));
}
_collector.emit(new ArrayList<Tuple>(parts.values()), joinResult);
for (Tuple part : parts.values()) {
_collector.ack(part);
}
SingleJoinTest.receiveCounter.incrementAndGet();
}
}
Aggregations