Search in sources :

Example 11 with GlobalStreamId

use of org.apache.storm.generated.GlobalStreamId in project storm by apache.

the class StatefulWindowedBoltExecutorTest method getMockTuples.

private List<Tuple> getMockTuples(int count) {
    List<Tuple> mockTuples = new ArrayList<>();
    for (long i = 0; i < count; i++) {
        Tuple mockTuple = Mockito.mock(Tuple.class);
        Mockito.when(mockTuple.getLongByField("msgid")).thenReturn(i);
        Mockito.when(mockTuple.getSourceTask()).thenReturn(1);
        Mockito.when(mockTuple.getSourceGlobalStreamId()).thenReturn(new GlobalStreamId("a", "s"));
        mockTuples.add(mockTuple);
    }
    return mockTuples;
}
Also used : GlobalStreamId(org.apache.storm.generated.GlobalStreamId) ArrayList(java.util.ArrayList) Tuple(org.apache.storm.tuple.Tuple)

Example 12 with GlobalStreamId

use of org.apache.storm.generated.GlobalStreamId in project storm by apache.

the class WindowedBoltExecutorTest method getTopologyContext.

private TopologyContext getTopologyContext() {
    TopologyContext context = Mockito.mock(TopologyContext.class);
    Map<GlobalStreamId, Grouping> sources = Collections.singletonMap(new GlobalStreamId("s1", "default"), null);
    Mockito.when(context.getThisSources()).thenReturn(sources);
    return context;
}
Also used : GlobalStreamId(org.apache.storm.generated.GlobalStreamId) Grouping(org.apache.storm.generated.Grouping) TopologyContext(org.apache.storm.task.TopologyContext) GeneralTopologyContext(org.apache.storm.task.GeneralTopologyContext)

Example 13 with GlobalStreamId

use of org.apache.storm.generated.GlobalStreamId in project flink by apache.

the class FlinkTopology method translateTopology.

/**
	 * Creates a Flink program that uses the specified spouts and bolts.
	 */
private void translateTopology() {
    unprocessdInputsPerBolt.clear();
    outputStreams.clear();
    declarers.clear();
    availableInputs.clear();
    // Storm defaults to parallelism 1
    env.setParallelism(1);
    for (final Entry<String, IRichSpout> spout : spouts.entrySet()) {
        final String spoutId = spout.getKey();
        final IRichSpout userSpout = spout.getValue();
        final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer();
        userSpout.declareOutputFields(declarer);
        final HashMap<String, Fields> sourceStreams = declarer.outputStreams;
        this.outputStreams.put(spoutId, sourceStreams);
        declarers.put(spoutId, declarer);
        final HashMap<String, DataStream<Tuple>> outputStreams = new HashMap<String, DataStream<Tuple>>();
        final DataStreamSource<?> source;
        if (sourceStreams.size() == 1) {
            final SpoutWrapper<Tuple> spoutWrapperSingleOutput = new SpoutWrapper<Tuple>(userSpout, spoutId, null, null);
            spoutWrapperSingleOutput.setStormTopology(stormTopology);
            final String outputStreamId = (String) sourceStreams.keySet().toArray()[0];
            DataStreamSource<Tuple> src = env.addSource(spoutWrapperSingleOutput, spoutId, declarer.getOutputType(outputStreamId));
            outputStreams.put(outputStreamId, src);
            source = src;
        } else {
            final SpoutWrapper<SplitStreamType<Tuple>> spoutWrapperMultipleOutputs = new SpoutWrapper<SplitStreamType<Tuple>>(userSpout, spoutId, null, null);
            spoutWrapperMultipleOutputs.setStormTopology(stormTopology);
            @SuppressWarnings({ "unchecked", "rawtypes" }) DataStreamSource<SplitStreamType<Tuple>> multiSource = env.addSource(spoutWrapperMultipleOutputs, spoutId, (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class));
            SplitStream<SplitStreamType<Tuple>> splitSource = multiSource.split(new StormStreamSelector<Tuple>());
            for (String streamId : sourceStreams.keySet()) {
                SingleOutputStreamOperator<Tuple> outStream = splitSource.select(streamId).map(new SplitStreamMapper<Tuple>());
                outStream.getTransformation().setOutputType(declarer.getOutputType(streamId));
                outputStreams.put(streamId, outStream);
            }
            source = multiSource;
        }
        availableInputs.put(spoutId, outputStreams);
        final ComponentCommon common = stormTopology.get_spouts().get(spoutId).get_common();
        if (common.is_set_parallelism_hint()) {
            int dop = common.get_parallelism_hint();
            source.setParallelism(dop);
        } else {
            common.set_parallelism_hint(1);
        }
    }
    /**
		 * 1. Connect all spout streams with bolts streams
		 * 2. Then proceed with the bolts stream already connected
		 *
		 *  Because we do not know the order in which an iterator steps over a set, we might process a consumer before
		 * its producer
		 * ->thus, we might need to repeat multiple times
		 */
    boolean makeProgress = true;
    while (bolts.size() > 0) {
        if (!makeProgress) {
            StringBuilder strBld = new StringBuilder();
            strBld.append("Unable to build Topology. Could not connect the following bolts:");
            for (String boltId : bolts.keySet()) {
                strBld.append("\n  ");
                strBld.append(boltId);
                strBld.append(": missing input streams [");
                for (Entry<GlobalStreamId, Grouping> streams : unprocessdInputsPerBolt.get(boltId)) {
                    strBld.append("'");
                    strBld.append(streams.getKey().get_streamId());
                    strBld.append("' from '");
                    strBld.append(streams.getKey().get_componentId());
                    strBld.append("'; ");
                }
                strBld.append("]");
            }
            throw new RuntimeException(strBld.toString());
        }
        makeProgress = false;
        final Iterator<Entry<String, IRichBolt>> boltsIterator = bolts.entrySet().iterator();
        while (boltsIterator.hasNext()) {
            final Entry<String, IRichBolt> bolt = boltsIterator.next();
            final String boltId = bolt.getKey();
            final IRichBolt userBolt = copyObject(bolt.getValue());
            final ComponentCommon common = stormTopology.get_bolts().get(boltId).get_common();
            Set<Entry<GlobalStreamId, Grouping>> unprocessedBoltInputs = unprocessdInputsPerBolt.get(boltId);
            if (unprocessedBoltInputs == null) {
                unprocessedBoltInputs = new HashSet<>();
                unprocessedBoltInputs.addAll(common.get_inputs().entrySet());
                unprocessdInputsPerBolt.put(boltId, unprocessedBoltInputs);
            }
            // check if all inputs are available
            final int numberOfInputs = unprocessedBoltInputs.size();
            int inputsAvailable = 0;
            for (Entry<GlobalStreamId, Grouping> entry : unprocessedBoltInputs) {
                final String producerId = entry.getKey().get_componentId();
                final String streamId = entry.getKey().get_streamId();
                final HashMap<String, DataStream<Tuple>> streams = availableInputs.get(producerId);
                if (streams != null && streams.get(streamId) != null) {
                    inputsAvailable++;
                }
            }
            if (inputsAvailable != numberOfInputs) {
                // traverse other bolts first until inputs are available
                continue;
            } else {
                makeProgress = true;
                boltsIterator.remove();
            }
            final Map<GlobalStreamId, DataStream<Tuple>> inputStreams = new HashMap<>(numberOfInputs);
            for (Entry<GlobalStreamId, Grouping> input : unprocessedBoltInputs) {
                final GlobalStreamId streamId = input.getKey();
                final Grouping grouping = input.getValue();
                final String producerId = streamId.get_componentId();
                final Map<String, DataStream<Tuple>> producer = availableInputs.get(producerId);
                inputStreams.put(streamId, processInput(boltId, userBolt, streamId, grouping, producer));
            }
            final SingleOutputStreamOperator<?> outputStream = createOutput(boltId, userBolt, inputStreams);
            if (common.is_set_parallelism_hint()) {
                int dop = common.get_parallelism_hint();
                outputStream.setParallelism(dop);
            } else {
                common.set_parallelism_hint(1);
            }
        }
    }
}
Also used : SpoutWrapper(org.apache.flink.storm.wrappers.SpoutWrapper) HashMap(java.util.HashMap) DataStream(org.apache.flink.streaming.api.datastream.DataStream) Entry(java.util.Map.Entry) ComponentCommon(org.apache.storm.generated.ComponentCommon) IRichBolt(org.apache.storm.topology.IRichBolt) Grouping(org.apache.storm.generated.Grouping) IRichSpout(org.apache.storm.topology.IRichSpout) Fields(org.apache.storm.tuple.Fields) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) StormTuple(org.apache.flink.storm.wrappers.StormTuple) Tuple(org.apache.flink.api.java.tuple.Tuple) SplitStreamType(org.apache.flink.storm.util.SplitStreamType)

Example 14 with GlobalStreamId

use of org.apache.storm.generated.GlobalStreamId in project flink by apache.

the class FlinkTopology method createOutput.

@SuppressWarnings({ "unchecked", "rawtypes" })
private SingleOutputStreamOperator<?> createOutput(String boltId, IRichBolt bolt, Map<GlobalStreamId, DataStream<Tuple>> inputStreams) {
    assert (boltId != null);
    assert (bolt != null);
    assert (inputStreams != null);
    Iterator<Entry<GlobalStreamId, DataStream<Tuple>>> iterator = inputStreams.entrySet().iterator();
    Entry<GlobalStreamId, DataStream<Tuple>> input1 = iterator.next();
    GlobalStreamId streamId1 = input1.getKey();
    String inputStreamId1 = streamId1.get_streamId();
    String inputComponentId1 = streamId1.get_componentId();
    Fields inputSchema1 = this.outputStreams.get(inputComponentId1).get(inputStreamId1);
    DataStream<Tuple> singleInputStream = input1.getValue();
    DataStream<StormTuple<Tuple>> mergedInputStream = null;
    while (iterator.hasNext()) {
        Entry<GlobalStreamId, DataStream<Tuple>> input2 = iterator.next();
        GlobalStreamId streamId2 = input2.getKey();
        DataStream<Tuple> inputStream2 = input2.getValue();
        if (mergedInputStream == null) {
            mergedInputStream = singleInputStream.connect(inputStream2).flatMap(new TwoFlinkStreamsMerger(streamId1, inputSchema1, streamId2, this.outputStreams.get(streamId2.get_componentId()).get(streamId2.get_streamId()))).returns(StormTuple.class);
        } else {
            mergedInputStream = mergedInputStream.connect(inputStream2).flatMap(new StormFlinkStreamMerger(streamId2, this.outputStreams.get(streamId2.get_componentId()).get(streamId2.get_streamId()))).returns(StormTuple.class);
        }
    }
    final HashMap<String, Fields> boltOutputs = this.outputStreams.get(boltId);
    final FlinkOutputFieldsDeclarer declarer = this.declarers.get(boltId);
    final SingleOutputStreamOperator<?> outputStream;
    if (boltOutputs.size() < 2) {
        // single output stream or sink
        String outputStreamId;
        if (boltOutputs.size() == 1) {
            outputStreamId = (String) boltOutputs.keySet().toArray()[0];
        } else {
            outputStreamId = null;
        }
        final TypeInformation<Tuple> outType = declarer.getOutputType(outputStreamId);
        final SingleOutputStreamOperator<Tuple> outStream;
        // only one input
        if (inputStreams.entrySet().size() == 1) {
            BoltWrapper<Tuple, Tuple> boltWrapper = new BoltWrapper<>(bolt, boltId, inputStreamId1, inputComponentId1, inputSchema1, null);
            boltWrapper.setStormTopology(stormTopology);
            outStream = singleInputStream.transform(boltId, outType, boltWrapper);
        } else {
            MergedInputsBoltWrapper<Tuple, Tuple> boltWrapper = new MergedInputsBoltWrapper<Tuple, Tuple>(bolt, boltId, null);
            boltWrapper.setStormTopology(stormTopology);
            outStream = mergedInputStream.transform(boltId, outType, boltWrapper);
        }
        if (outType != null) {
            // only for non-sink nodes
            final HashMap<String, DataStream<Tuple>> op = new HashMap<>();
            op.put(outputStreamId, outStream);
            availableInputs.put(boltId, op);
        }
        outputStream = outStream;
    } else {
        final TypeInformation<SplitStreamType<Tuple>> outType = (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class);
        final SingleOutputStreamOperator<SplitStreamType<Tuple>> multiStream;
        // only one input
        if (inputStreams.entrySet().size() == 1) {
            final BoltWrapper<Tuple, SplitStreamType<Tuple>> boltWrapperMultipleOutputs = new BoltWrapper<>(bolt, boltId, inputStreamId1, inputComponentId1, inputSchema1, null);
            boltWrapperMultipleOutputs.setStormTopology(stormTopology);
            multiStream = singleInputStream.transform(boltId, outType, boltWrapperMultipleOutputs);
        } else {
            final MergedInputsBoltWrapper<Tuple, SplitStreamType<Tuple>> boltWrapperMultipleOutputs = new MergedInputsBoltWrapper<Tuple, SplitStreamType<Tuple>>(bolt, boltId, null);
            boltWrapperMultipleOutputs.setStormTopology(stormTopology);
            multiStream = mergedInputStream.transform(boltId, outType, boltWrapperMultipleOutputs);
        }
        final SplitStream<SplitStreamType<Tuple>> splitStream = multiStream.split(new StormStreamSelector<Tuple>());
        final HashMap<String, DataStream<Tuple>> op = new HashMap<>();
        for (String outputStreamId : boltOutputs.keySet()) {
            op.put(outputStreamId, splitStream.select(outputStreamId).map(new SplitStreamMapper<Tuple>()));
            SingleOutputStreamOperator<Tuple> outStream = splitStream.select(outputStreamId).map(new SplitStreamMapper<Tuple>());
            outStream.getTransformation().setOutputType(declarer.getOutputType(outputStreamId));
            op.put(outputStreamId, outStream);
        }
        availableInputs.put(boltId, op);
        outputStream = multiStream;
    }
    return outputStream;
}
Also used : DataStream(org.apache.flink.streaming.api.datastream.DataStream) HashMap(java.util.HashMap) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Entry(java.util.Map.Entry) MergedInputsBoltWrapper(org.apache.flink.storm.wrappers.MergedInputsBoltWrapper) BoltWrapper(org.apache.flink.storm.wrappers.BoltWrapper) MergedInputsBoltWrapper(org.apache.flink.storm.wrappers.MergedInputsBoltWrapper) StormTuple(org.apache.flink.storm.wrappers.StormTuple) Fields(org.apache.storm.tuple.Fields) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) SplitStreamMapper(org.apache.flink.storm.util.SplitStreamMapper) StormTuple(org.apache.flink.storm.wrappers.StormTuple) Tuple(org.apache.flink.api.java.tuple.Tuple) SplitStreamType(org.apache.flink.storm.util.SplitStreamType)

Example 15 with GlobalStreamId

use of org.apache.storm.generated.GlobalStreamId in project flink by apache.

the class BoltWrapper method open.

@Override
public void open() throws Exception {
    super.open();
    this.flinkCollector = new TimestampedCollector<>(this.output);
    GlobalJobParameters config = getExecutionConfig().getGlobalJobParameters();
    StormConfig stormConfig = new StormConfig();
    if (config != null) {
        if (config instanceof StormConfig) {
            stormConfig = (StormConfig) config;
        } else {
            stormConfig.putAll(config.toMap());
        }
    }
    this.topologyContext = WrapperSetupHelper.createTopologyContext(getRuntimeContext(), this.bolt, this.name, this.stormTopology, stormConfig);
    final OutputCollector stormCollector = new OutputCollector(new BoltCollector<OUT>(this.numberOfAttributes, this.topologyContext.getThisTaskId(), this.flinkCollector));
    if (this.stormTopology != null) {
        Map<GlobalStreamId, Grouping> inputs = this.topologyContext.getThisSources();
        for (GlobalStreamId inputStream : inputs.keySet()) {
            for (Integer tid : this.topologyContext.getComponentTasks(inputStream.get_componentId())) {
                this.inputComponentIds.put(tid, inputStream.get_componentId());
                this.inputStreamIds.put(tid, inputStream.get_streamId());
                this.inputSchemas.put(tid, this.topologyContext.getComponentOutputFields(inputStream));
            }
        }
    }
    this.bolt.prepare(stormConfig, this.topologyContext, stormCollector);
}
Also used : StormConfig(org.apache.flink.storm.util.StormConfig) OutputCollector(org.apache.storm.task.OutputCollector) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) Grouping(org.apache.storm.generated.Grouping) GlobalJobParameters(org.apache.flink.api.common.ExecutionConfig.GlobalJobParameters)

Aggregations

GlobalStreamId (org.apache.storm.generated.GlobalStreamId)37 HashMap (java.util.HashMap)21 Grouping (org.apache.storm.generated.Grouping)16 Map (java.util.Map)13 Test (org.junit.Test)10 ArrayList (java.util.ArrayList)8 Bolt (org.apache.storm.generated.Bolt)7 Tuple (org.apache.storm.tuple.Tuple)7 StormTopology (org.apache.storm.generated.StormTopology)6 HashSet (java.util.HashSet)5 ComponentCommon (org.apache.storm.generated.ComponentCommon)4 NullStruct (org.apache.storm.generated.NullStruct)4 SpoutSpec (org.apache.storm.generated.SpoutSpec)4 StreamInfo (org.apache.storm.generated.StreamInfo)4 TopologyContext (org.apache.storm.task.TopologyContext)4 IRichSpout (org.apache.storm.topology.IRichSpout)4 Fields (org.apache.storm.tuple.Fields)4 TreeMap (java.util.TreeMap)3 OutputCollector (org.apache.storm.task.OutputCollector)3 IRichBolt (org.apache.storm.topology.IRichBolt)3