use of org.apache.storm.trident.planner.Node in project storm by apache.
the class GraphGrouper method merge.
private void merge(Group g1, Group g2) {
Group newGroup = new Group(g1, g2);
currGroups.remove(g1);
currGroups.remove(g2);
currGroups.add(newGroup);
for (Node n : newGroup.nodes) {
groupIndex.put(n, newGroup);
}
}
use of org.apache.storm.trident.planner.Node in project storm by apache.
the class Group method getResources.
/**
* In case no resources are specified, returns empty map.
* In case differing types of resources are specified, throw.
* Otherwise, add all the resources for a group.
*/
public Map<String, Number> getResources(Map<String, Number> defaults) {
if (defaults == null) {
defaults = new HashMap<>();
}
Map<String, Number> resources = null;
for (Node n : nodes) {
if (resources == null) {
// After this, resources should contain all the kinds of resources
// we can count for the group. If we see a kind of resource in another
// node not in resources.keySet(), we'll throw.
resources = new HashMap<>(defaults);
resources.putAll(n.getResources());
} else {
Map<String, Number> node_res = new HashMap<>(defaults);
node_res.putAll(n.getResources());
if (!node_res.keySet().equals(resources.keySet())) {
StringBuilder ops = new StringBuilder();
for (Node nod : nodes) {
Set<String> resource_keys = new HashSet<>(defaults.keySet());
resource_keys.addAll(nod.getResources().keySet());
ops.append("\t[ " + nod.shortString() + ", Resources Set: " + resource_keys + " ]\n");
}
if (node_res.keySet().containsAll(resources.keySet())) {
Set<String> diffset = new HashSet<>(node_res.keySet());
diffset.removeAll(resources.keySet());
throw new RuntimeException("Found an operation with resources set which are not set in other operations in the group:\n" + "\t[ " + n.shortString() + " ]: " + diffset + "\n" + "Either set these resources in all other operations in the group, add a default setting, or remove the setting from this operation.\n" + "The group at fault:\n" + ops);
} else if (resources.keySet().containsAll(node_res.keySet())) {
Set<String> diffset = new HashSet<>(resources.keySet());
diffset.removeAll(node_res.keySet());
throw new RuntimeException("Found an operation with resources unset which are set in other operations in the group:\n" + "\t[ " + n.shortString() + " ]: " + diffset + "\n" + "Either set these resources in all other operations in the group, add a default setting, or remove the setting from all other operations.\n" + "The group at fault:\n" + ops);
}
}
for (Map.Entry<String, Number> kv : node_res.entrySet()) {
String key = kv.getKey();
Number val = kv.getValue();
Number newval = new Double(val.doubleValue() + resources.get(key).doubleValue());
resources.put(key, newval);
}
}
}
return resources;
}
use of org.apache.storm.trident.planner.Node in project storm by apache.
the class TridentTopology method checkValidJoins.
private static void checkValidJoins(Collection<Node> g) {
boolean hasDRPCSpout = false;
boolean hasBatchSpout = false;
for (Node n : g) {
if (n instanceof SpoutNode) {
SpoutNode.SpoutType type = ((SpoutNode) n).type;
if (type == SpoutNode.SpoutType.BATCH) {
hasBatchSpout = true;
} else if (type == SpoutNode.SpoutType.DRPC) {
hasDRPCSpout = true;
}
}
}
if (hasBatchSpout && hasDRPCSpout) {
throw new RuntimeException("Cannot join DRPC stream with streams originating from other spouts");
}
}
use of org.apache.storm.trident.planner.Node in project storm by apache.
the class TridentTopology method build.
public StormTopology build() {
DefaultDirectedGraph<Node, IndexedEdge> graph = (DefaultDirectedGraph) _graph.clone();
completeDRPC(graph, _colocate, _gen);
List<SpoutNode> spoutNodes = new ArrayList<>();
// can be regular nodes (static state) or processor nodes
Set<Node> boltNodes = new LinkedHashSet<>();
for (Node n : graph.vertexSet()) {
if (n instanceof SpoutNode) {
spoutNodes.add((SpoutNode) n);
} else if (!(n instanceof PartitionNode)) {
boltNodes.add(n);
}
}
Set<Group> initialGroups = new LinkedHashSet<>();
for (List<Node> colocate : _colocate.values()) {
Group g = new Group(graph, colocate);
boltNodes.removeAll(colocate);
initialGroups.add(g);
}
for (Node n : boltNodes) {
initialGroups.add(new Group(graph, n));
}
GraphGrouper grouper = new GraphGrouper(graph, initialGroups);
grouper.mergeFully();
Collection<Group> mergedGroups = grouper.getAllGroups();
// add identity partitions between groups
for (IndexedEdge<Node> e : new HashSet<>(graph.edgeSet())) {
if (!(e.source instanceof PartitionNode) && !(e.target instanceof PartitionNode)) {
Group g1 = grouper.nodeGroup(e.source);
Group g2 = grouper.nodeGroup(e.target);
// g1 being null means the source is a spout node
if (g1 == null && !(e.source instanceof SpoutNode))
throw new RuntimeException("Planner exception: Null source group must indicate a spout node at this phase of planning");
if (g1 == null || !g1.equals(g2)) {
graph.removeEdge(e);
PartitionNode pNode = makeIdentityPartition(e.source);
graph.addVertex(pNode);
graph.addEdge(e.source, pNode, new IndexedEdge(e.source, pNode, 0));
graph.addEdge(pNode, e.target, new IndexedEdge(pNode, e.target, e.index));
}
}
}
// if one group subscribes to the same stream with same partitioning multiple times,
// merge those together (otherwise can end up with many output streams created for that partitioning
// if need to split into multiple output streams because of same input having different
// partitioning to the group)
// this is because can't currently merge splitting logic into a spout
// not the most kosher algorithm here, since the grouper indexes are being trounced via the adding of nodes to random groups, but it
// works out
List<Node> forNewGroups = new ArrayList<>();
for (Group g : mergedGroups) {
for (PartitionNode n : extraPartitionInputs(g)) {
Node idNode = makeIdentityNode(n.allOutputFields);
Node newPartitionNode = new PartitionNode(idNode.streamId, n.name, idNode.allOutputFields, n.thriftGrouping);
Node parentNode = TridentUtils.getParent(graph, n);
Set<IndexedEdge> outgoing = graph.outgoingEdgesOf(n);
graph.removeVertex(n);
graph.addVertex(idNode);
graph.addVertex(newPartitionNode);
addEdge(graph, parentNode, idNode, 0);
addEdge(graph, idNode, newPartitionNode, 0);
for (IndexedEdge e : outgoing) {
addEdge(graph, newPartitionNode, e.target, e.index);
}
Group parentGroup = grouper.nodeGroup(parentNode);
if (parentGroup == null) {
forNewGroups.add(idNode);
} else {
parentGroup.nodes.add(idNode);
}
}
}
for (Node n : forNewGroups) {
grouper.addGroup(new Group(graph, n));
}
// add in spouts as groups so we can get parallelisms
for (Node n : spoutNodes) {
grouper.addGroup(new Group(graph, n));
}
grouper.reindex();
mergedGroups = grouper.getAllGroups();
Map<Node, String> batchGroupMap = new HashMap<>();
List<Set<Node>> connectedComponents = new ConnectivityInspector<>(graph).connectedSets();
for (int i = 0; i < connectedComponents.size(); i++) {
String groupId = "bg" + i;
for (Node n : connectedComponents.get(i)) {
batchGroupMap.put(n, groupId);
}
}
// System.out.println("GRAPH:");
// System.out.println(graph);
Map<Group, Integer> parallelisms = getGroupParallelisms(graph, grouper, mergedGroups);
TridentTopologyBuilder builder = new TridentTopologyBuilder();
Map<Node, String> spoutIds = genSpoutIds(spoutNodes);
Map<Group, String> boltIds = genBoltIds(mergedGroups);
for (SpoutNode sn : spoutNodes) {
Integer parallelism = parallelisms.get(grouper.nodeGroup(sn));
Map<String, Number> spoutRes = new HashMap<>(_resourceDefaults);
spoutRes.putAll(sn.getResources());
Number onHeap = spoutRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB);
Number offHeap = spoutRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB);
Number cpuLoad = spoutRes.get(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT);
SpoutDeclarer spoutDeclarer = null;
if (sn.type == SpoutNode.SpoutType.DRPC) {
spoutDeclarer = builder.setBatchPerTupleSpout(spoutIds.get(sn), sn.streamId, (IRichSpout) sn.spout, parallelism, batchGroupMap.get(sn));
} else {
ITridentSpout s;
if (sn.spout instanceof IBatchSpout) {
s = new BatchSpoutExecutor((IBatchSpout) sn.spout);
} else if (sn.spout instanceof ITridentSpout) {
s = (ITridentSpout) sn.spout;
} else {
throw new RuntimeException("Regular rich spouts not supported yet... try wrapping in a RichSpoutBatchExecutor");
// TODO: handle regular rich spout without batches (need lots of updates to support this throughout)
}
spoutDeclarer = builder.setSpout(spoutIds.get(sn), sn.streamId, sn.txId, s, parallelism, batchGroupMap.get(sn));
}
if (onHeap != null) {
if (offHeap != null) {
spoutDeclarer.setMemoryLoad(onHeap, offHeap);
} else {
spoutDeclarer.setMemoryLoad(onHeap);
}
}
if (cpuLoad != null) {
spoutDeclarer.setCPULoad(cpuLoad);
}
}
for (Group g : mergedGroups) {
if (!isSpoutGroup(g)) {
Integer p = parallelisms.get(g);
Map<String, String> streamToGroup = getOutputStreamBatchGroups(g, batchGroupMap);
Map<String, Number> groupRes = g.getResources(_resourceDefaults);
Number onHeap = groupRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_ONHEAP_MEMORY_MB);
Number offHeap = groupRes.get(Config.TOPOLOGY_COMPONENT_RESOURCES_OFFHEAP_MEMORY_MB);
Number cpuLoad = groupRes.get(Config.TOPOLOGY_COMPONENT_CPU_PCORE_PERCENT);
BoltDeclarer d = builder.setBolt(boltIds.get(g), new SubtopologyBolt(graph, g.nodes, batchGroupMap), p, committerBatches(g, batchGroupMap), streamToGroup);
if (onHeap != null) {
if (offHeap != null) {
d.setMemoryLoad(onHeap, offHeap);
} else {
d.setMemoryLoad(onHeap);
}
}
if (cpuLoad != null) {
d.setCPULoad(cpuLoad);
}
Collection<PartitionNode> inputs = uniquedSubscriptions(externalGroupInputs(g));
for (PartitionNode n : inputs) {
Node parent = TridentUtils.getParent(graph, n);
String componentId = parent instanceof SpoutNode ? spoutIds.get(parent) : boltIds.get(grouper.nodeGroup(parent));
d.grouping(new GlobalStreamId(componentId, n.streamId), n.thriftGrouping);
}
}
}
HashMap<String, Number> combinedMasterCoordResources = new HashMap<String, Number>(_resourceDefaults);
combinedMasterCoordResources.putAll(_masterCoordResources);
return builder.buildTopology(combinedMasterCoordResources);
}
use of org.apache.storm.trident.planner.Node in project storm by apache.
the class TridentTopology method multiReduce.
public Stream multiReduce(List<Fields> inputFields, List<Stream> streams, MultiReducer function, Fields outputFields) {
List<String> names = new ArrayList<>();
for (Stream s : streams) {
if (s._name != null) {
names.add(s._name);
}
}
Node n = new ProcessorNode(getUniqueStreamId(), Utils.join(names, "-"), outputFields, outputFields, new MultiReducerProcessor(inputFields, function));
return addSourcedNode(streams, n);
}
Aggregations