Search in sources :

Example 6 with Grouping

use of org.apache.storm.generated.Grouping in project storm by apache.

the class GeneralTopologyContext method getTargets.

/**
     * Gets information about who is consuming the outputs of the specified component,
     * and how.
     *
     * @return Map from stream id to component id to the Grouping used.
     */
public Map<String, Map<String, Grouping>> getTargets(String componentId) {
    Map<String, Map<String, Grouping>> ret = new HashMap<>();
    for (String otherComponentId : getComponentIds()) {
        Map<GlobalStreamId, Grouping> inputs = getComponentCommon(otherComponentId).get_inputs();
        for (Map.Entry<GlobalStreamId, Grouping> entry : inputs.entrySet()) {
            GlobalStreamId id = entry.getKey();
            if (id.get_componentId().equals(componentId)) {
                Map<String, Grouping> curr = ret.get(id.get_streamId());
                if (curr == null)
                    curr = new HashMap<>();
                curr.put(otherComponentId, entry.getValue());
                ret.put(id.get_streamId(), curr);
            }
        }
    }
    return ret;
}
Also used : HashMap(java.util.HashMap) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) Grouping(org.apache.storm.generated.Grouping) HashMap(java.util.HashMap) Map(java.util.Map)

Example 7 with Grouping

use of org.apache.storm.generated.Grouping in project storm by apache.

the class StreamBuilderTest method testBranch.

@Test
public void testBranch() throws Exception {
    Stream<Tuple> stream = streamBuilder.newStream(newSpout(Utils.DEFAULT_STREAM_ID));
    Stream<Tuple>[] streams = stream.branch(x -> true);
    StormTopology topology = streamBuilder.build();
    assertEquals(1, topology.get_spouts_size());
    assertEquals(1, topology.get_bolts_size());
    Map<GlobalStreamId, Grouping> expected = new HashMap<>();
    String spoutId = topology.get_spouts().keySet().iterator().next();
    expected.put(new GlobalStreamId(spoutId, "default"), Grouping.shuffle(new NullStruct()));
    assertEquals(expected, topology.get_bolts().values().iterator().next().get_common().get_inputs());
    assertEquals(1, streams.length);
    assertEquals(1, streams[0].node.getOutputStreams().size());
    String parentStream = streams[0].node.getOutputStreams().iterator().next() + "-branch";
    assertEquals(1, streams[0].node.getParents(parentStream).size());
    Node processorNdoe = streams[0].node.getParents(parentStream).iterator().next();
    assertTrue(processorNdoe instanceof ProcessorNode);
    assertTrue(((ProcessorNode) processorNdoe).getProcessor() instanceof BranchProcessor);
    assertTrue(processorNdoe.getParents("default").iterator().next() instanceof SpoutNode);
}
Also used : HashMap(java.util.HashMap) StormTopology(org.apache.storm.generated.StormTopology) Grouping(org.apache.storm.generated.Grouping) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) BranchProcessor(org.apache.storm.streams.processors.BranchProcessor) NullStruct(org.apache.storm.generated.NullStruct) Tuple(org.apache.storm.tuple.Tuple) Test(org.junit.Test)

Example 8 with Grouping

use of org.apache.storm.generated.Grouping in project storm by apache.

the class StreamBuilderTest method testMultiPartitionByKeyWithRepartition.

@Test
public void testMultiPartitionByKeyWithRepartition() {
    TopologyContext mockContext = Mockito.mock(TopologyContext.class);
    OutputCollector mockCollector = Mockito.mock(OutputCollector.class);
    Map<GlobalStreamId, Grouping> expected = new HashMap<>();
    expected.put(new GlobalStreamId("bolt2", "s3"), Grouping.fields(Collections.singletonList("key")));
    expected.put(new GlobalStreamId("bolt2", "s3__punctuation"), Grouping.all(new NullStruct()));
    Stream<Integer> stream = streamBuilder.newStream(newSpout(Utils.DEFAULT_STREAM_ID), new ValueMapper<>(0));
    stream.mapToPair(x -> Pair.of(x, x)).window(TumblingWindows.of(BaseWindowedBolt.Count.of(10))).reduceByKey((x, y) -> x + y).repartition(10).reduceByKey((x, y) -> 0).print();
    StormTopology topology = streamBuilder.build();
    assertEquals(3, topology.get_bolts_size());
    assertEquals(expected, topology.get_bolts().get("bolt3").get_common().get_inputs());
}
Also used : OutputFieldsDeclarer(org.apache.storm.topology.OutputFieldsDeclarer) BaseRichSpout(org.apache.storm.topology.base.BaseRichSpout) IRichSpout(org.apache.storm.topology.IRichSpout) BaseWindowedBolt(org.apache.storm.topology.base.BaseWindowedBolt) TopologyContext(org.apache.storm.task.TopologyContext) HashMap(java.util.HashMap) Count(org.apache.storm.streams.operations.aggregators.Count) Bolt(org.apache.storm.generated.Bolt) Tuple(org.apache.storm.tuple.Tuple) OutputCollector(org.apache.storm.task.OutputCollector) StormTopology(org.apache.storm.generated.StormTopology) BranchProcessor(org.apache.storm.streams.processors.BranchProcessor) Map(java.util.Map) ValueMapper(org.apache.storm.streams.operations.mappers.ValueMapper) Before(org.junit.Before) BaseRichBolt(org.apache.storm.topology.base.BaseRichBolt) PairValueMapper(org.apache.storm.streams.operations.mappers.PairValueMapper) Grouping(org.apache.storm.generated.Grouping) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) Fields(org.apache.storm.tuple.Fields) Utils(org.apache.storm.utils.Utils) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) Mockito(org.mockito.Mockito) TumblingWindows(org.apache.storm.streams.windowing.TumblingWindows) SpoutSpec(org.apache.storm.generated.SpoutSpec) IRichBolt(org.apache.storm.topology.IRichBolt) NullStruct(org.apache.storm.generated.NullStruct) SpoutOutputCollector(org.apache.storm.spout.SpoutOutputCollector) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) OutputCollector(org.apache.storm.task.OutputCollector) SpoutOutputCollector(org.apache.storm.spout.SpoutOutputCollector) HashMap(java.util.HashMap) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) StormTopology(org.apache.storm.generated.StormTopology) Grouping(org.apache.storm.generated.Grouping) TopologyContext(org.apache.storm.task.TopologyContext) NullStruct(org.apache.storm.generated.NullStruct) Test(org.junit.Test)

Example 9 with Grouping

use of org.apache.storm.generated.Grouping in project storm by apache.

the class WindowedBoltExecutorTest method getTopologyContext.

private TopologyContext getTopologyContext() {
    TopologyContext context = Mockito.mock(TopologyContext.class);
    Map<GlobalStreamId, Grouping> sources = Collections.singletonMap(new GlobalStreamId("s1", "default"), null);
    Mockito.when(context.getThisSources()).thenReturn(sources);
    return context;
}
Also used : GlobalStreamId(org.apache.storm.generated.GlobalStreamId) Grouping(org.apache.storm.generated.Grouping) TopologyContext(org.apache.storm.task.TopologyContext) GeneralTopologyContext(org.apache.storm.task.GeneralTopologyContext)

Example 10 with Grouping

use of org.apache.storm.generated.Grouping in project flink by apache.

the class FlinkTopology method translateTopology.

/**
	 * Creates a Flink program that uses the specified spouts and bolts.
	 */
private void translateTopology() {
    unprocessdInputsPerBolt.clear();
    outputStreams.clear();
    declarers.clear();
    availableInputs.clear();
    // Storm defaults to parallelism 1
    env.setParallelism(1);
    for (final Entry<String, IRichSpout> spout : spouts.entrySet()) {
        final String spoutId = spout.getKey();
        final IRichSpout userSpout = spout.getValue();
        final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer();
        userSpout.declareOutputFields(declarer);
        final HashMap<String, Fields> sourceStreams = declarer.outputStreams;
        this.outputStreams.put(spoutId, sourceStreams);
        declarers.put(spoutId, declarer);
        final HashMap<String, DataStream<Tuple>> outputStreams = new HashMap<String, DataStream<Tuple>>();
        final DataStreamSource<?> source;
        if (sourceStreams.size() == 1) {
            final SpoutWrapper<Tuple> spoutWrapperSingleOutput = new SpoutWrapper<Tuple>(userSpout, spoutId, null, null);
            spoutWrapperSingleOutput.setStormTopology(stormTopology);
            final String outputStreamId = (String) sourceStreams.keySet().toArray()[0];
            DataStreamSource<Tuple> src = env.addSource(spoutWrapperSingleOutput, spoutId, declarer.getOutputType(outputStreamId));
            outputStreams.put(outputStreamId, src);
            source = src;
        } else {
            final SpoutWrapper<SplitStreamType<Tuple>> spoutWrapperMultipleOutputs = new SpoutWrapper<SplitStreamType<Tuple>>(userSpout, spoutId, null, null);
            spoutWrapperMultipleOutputs.setStormTopology(stormTopology);
            @SuppressWarnings({ "unchecked", "rawtypes" }) DataStreamSource<SplitStreamType<Tuple>> multiSource = env.addSource(spoutWrapperMultipleOutputs, spoutId, (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class));
            SplitStream<SplitStreamType<Tuple>> splitSource = multiSource.split(new StormStreamSelector<Tuple>());
            for (String streamId : sourceStreams.keySet()) {
                SingleOutputStreamOperator<Tuple> outStream = splitSource.select(streamId).map(new SplitStreamMapper<Tuple>());
                outStream.getTransformation().setOutputType(declarer.getOutputType(streamId));
                outputStreams.put(streamId, outStream);
            }
            source = multiSource;
        }
        availableInputs.put(spoutId, outputStreams);
        final ComponentCommon common = stormTopology.get_spouts().get(spoutId).get_common();
        if (common.is_set_parallelism_hint()) {
            int dop = common.get_parallelism_hint();
            source.setParallelism(dop);
        } else {
            common.set_parallelism_hint(1);
        }
    }
    /**
		 * 1. Connect all spout streams with bolts streams
		 * 2. Then proceed with the bolts stream already connected
		 *
		 *  Because we do not know the order in which an iterator steps over a set, we might process a consumer before
		 * its producer
		 * ->thus, we might need to repeat multiple times
		 */
    boolean makeProgress = true;
    while (bolts.size() > 0) {
        if (!makeProgress) {
            StringBuilder strBld = new StringBuilder();
            strBld.append("Unable to build Topology. Could not connect the following bolts:");
            for (String boltId : bolts.keySet()) {
                strBld.append("\n  ");
                strBld.append(boltId);
                strBld.append(": missing input streams [");
                for (Entry<GlobalStreamId, Grouping> streams : unprocessdInputsPerBolt.get(boltId)) {
                    strBld.append("'");
                    strBld.append(streams.getKey().get_streamId());
                    strBld.append("' from '");
                    strBld.append(streams.getKey().get_componentId());
                    strBld.append("'; ");
                }
                strBld.append("]");
            }
            throw new RuntimeException(strBld.toString());
        }
        makeProgress = false;
        final Iterator<Entry<String, IRichBolt>> boltsIterator = bolts.entrySet().iterator();
        while (boltsIterator.hasNext()) {
            final Entry<String, IRichBolt> bolt = boltsIterator.next();
            final String boltId = bolt.getKey();
            final IRichBolt userBolt = copyObject(bolt.getValue());
            final ComponentCommon common = stormTopology.get_bolts().get(boltId).get_common();
            Set<Entry<GlobalStreamId, Grouping>> unprocessedBoltInputs = unprocessdInputsPerBolt.get(boltId);
            if (unprocessedBoltInputs == null) {
                unprocessedBoltInputs = new HashSet<>();
                unprocessedBoltInputs.addAll(common.get_inputs().entrySet());
                unprocessdInputsPerBolt.put(boltId, unprocessedBoltInputs);
            }
            // check if all inputs are available
            final int numberOfInputs = unprocessedBoltInputs.size();
            int inputsAvailable = 0;
            for (Entry<GlobalStreamId, Grouping> entry : unprocessedBoltInputs) {
                final String producerId = entry.getKey().get_componentId();
                final String streamId = entry.getKey().get_streamId();
                final HashMap<String, DataStream<Tuple>> streams = availableInputs.get(producerId);
                if (streams != null && streams.get(streamId) != null) {
                    inputsAvailable++;
                }
            }
            if (inputsAvailable != numberOfInputs) {
                // traverse other bolts first until inputs are available
                continue;
            } else {
                makeProgress = true;
                boltsIterator.remove();
            }
            final Map<GlobalStreamId, DataStream<Tuple>> inputStreams = new HashMap<>(numberOfInputs);
            for (Entry<GlobalStreamId, Grouping> input : unprocessedBoltInputs) {
                final GlobalStreamId streamId = input.getKey();
                final Grouping grouping = input.getValue();
                final String producerId = streamId.get_componentId();
                final Map<String, DataStream<Tuple>> producer = availableInputs.get(producerId);
                inputStreams.put(streamId, processInput(boltId, userBolt, streamId, grouping, producer));
            }
            final SingleOutputStreamOperator<?> outputStream = createOutput(boltId, userBolt, inputStreams);
            if (common.is_set_parallelism_hint()) {
                int dop = common.get_parallelism_hint();
                outputStream.setParallelism(dop);
            } else {
                common.set_parallelism_hint(1);
            }
        }
    }
}
Also used : SpoutWrapper(org.apache.flink.storm.wrappers.SpoutWrapper) HashMap(java.util.HashMap) DataStream(org.apache.flink.streaming.api.datastream.DataStream) Entry(java.util.Map.Entry) ComponentCommon(org.apache.storm.generated.ComponentCommon) IRichBolt(org.apache.storm.topology.IRichBolt) Grouping(org.apache.storm.generated.Grouping) IRichSpout(org.apache.storm.topology.IRichSpout) Fields(org.apache.storm.tuple.Fields) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) StormTuple(org.apache.flink.storm.wrappers.StormTuple) Tuple(org.apache.flink.api.java.tuple.Tuple) SplitStreamType(org.apache.flink.storm.util.SplitStreamType)

Aggregations

Grouping (org.apache.storm.generated.Grouping)18 GlobalStreamId (org.apache.storm.generated.GlobalStreamId)16 HashMap (java.util.HashMap)14 Map (java.util.Map)9 Bolt (org.apache.storm.generated.Bolt)6 StormTopology (org.apache.storm.generated.StormTopology)5 Test (org.junit.Test)5 ComponentCommon (org.apache.storm.generated.ComponentCommon)4 NullStruct (org.apache.storm.generated.NullStruct)4 SpoutSpec (org.apache.storm.generated.SpoutSpec)4 StreamInfo (org.apache.storm.generated.StreamInfo)4 Tuple (org.apache.storm.tuple.Tuple)4 ArrayList (java.util.ArrayList)3 TreeMap (java.util.TreeMap)3 IRichBolt (org.apache.storm.topology.IRichBolt)3 Fields (org.apache.storm.tuple.Fields)3 EventLoggerBolt (org.apache.storm.metric.EventLoggerBolt)2 MetricsConsumerBolt (org.apache.storm.metric.MetricsConsumerBolt)2 SystemBolt (org.apache.storm.metric.SystemBolt)2 BranchProcessor (org.apache.storm.streams.processors.BranchProcessor)2