Search in sources :

Example 1 with SpoutWrapper

use of org.apache.flink.storm.wrappers.SpoutWrapper in project flink by apache.

the class FlinkTopology method translateTopology.

/**
	 * Creates a Flink program that uses the specified spouts and bolts.
	 */
private void translateTopology() {
    unprocessdInputsPerBolt.clear();
    outputStreams.clear();
    declarers.clear();
    availableInputs.clear();
    // Storm defaults to parallelism 1
    env.setParallelism(1);
    for (final Entry<String, IRichSpout> spout : spouts.entrySet()) {
        final String spoutId = spout.getKey();
        final IRichSpout userSpout = spout.getValue();
        final FlinkOutputFieldsDeclarer declarer = new FlinkOutputFieldsDeclarer();
        userSpout.declareOutputFields(declarer);
        final HashMap<String, Fields> sourceStreams = declarer.outputStreams;
        this.outputStreams.put(spoutId, sourceStreams);
        declarers.put(spoutId, declarer);
        final HashMap<String, DataStream<Tuple>> outputStreams = new HashMap<String, DataStream<Tuple>>();
        final DataStreamSource<?> source;
        if (sourceStreams.size() == 1) {
            final SpoutWrapper<Tuple> spoutWrapperSingleOutput = new SpoutWrapper<Tuple>(userSpout, spoutId, null, null);
            spoutWrapperSingleOutput.setStormTopology(stormTopology);
            final String outputStreamId = (String) sourceStreams.keySet().toArray()[0];
            DataStreamSource<Tuple> src = env.addSource(spoutWrapperSingleOutput, spoutId, declarer.getOutputType(outputStreamId));
            outputStreams.put(outputStreamId, src);
            source = src;
        } else {
            final SpoutWrapper<SplitStreamType<Tuple>> spoutWrapperMultipleOutputs = new SpoutWrapper<SplitStreamType<Tuple>>(userSpout, spoutId, null, null);
            spoutWrapperMultipleOutputs.setStormTopology(stormTopology);
            @SuppressWarnings({ "unchecked", "rawtypes" }) DataStreamSource<SplitStreamType<Tuple>> multiSource = env.addSource(spoutWrapperMultipleOutputs, spoutId, (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class));
            SplitStream<SplitStreamType<Tuple>> splitSource = multiSource.split(new StormStreamSelector<Tuple>());
            for (String streamId : sourceStreams.keySet()) {
                SingleOutputStreamOperator<Tuple> outStream = splitSource.select(streamId).map(new SplitStreamMapper<Tuple>());
                outStream.getTransformation().setOutputType(declarer.getOutputType(streamId));
                outputStreams.put(streamId, outStream);
            }
            source = multiSource;
        }
        availableInputs.put(spoutId, outputStreams);
        final ComponentCommon common = stormTopology.get_spouts().get(spoutId).get_common();
        if (common.is_set_parallelism_hint()) {
            int dop = common.get_parallelism_hint();
            source.setParallelism(dop);
        } else {
            common.set_parallelism_hint(1);
        }
    }
    /**
		 * 1. Connect all spout streams with bolts streams
		 * 2. Then proceed with the bolts stream already connected
		 *
		 *  Because we do not know the order in which an iterator steps over a set, we might process a consumer before
		 * its producer
		 * ->thus, we might need to repeat multiple times
		 */
    boolean makeProgress = true;
    while (bolts.size() > 0) {
        if (!makeProgress) {
            StringBuilder strBld = new StringBuilder();
            strBld.append("Unable to build Topology. Could not connect the following bolts:");
            for (String boltId : bolts.keySet()) {
                strBld.append("\n  ");
                strBld.append(boltId);
                strBld.append(": missing input streams [");
                for (Entry<GlobalStreamId, Grouping> streams : unprocessdInputsPerBolt.get(boltId)) {
                    strBld.append("'");
                    strBld.append(streams.getKey().get_streamId());
                    strBld.append("' from '");
                    strBld.append(streams.getKey().get_componentId());
                    strBld.append("'; ");
                }
                strBld.append("]");
            }
            throw new RuntimeException(strBld.toString());
        }
        makeProgress = false;
        final Iterator<Entry<String, IRichBolt>> boltsIterator = bolts.entrySet().iterator();
        while (boltsIterator.hasNext()) {
            final Entry<String, IRichBolt> bolt = boltsIterator.next();
            final String boltId = bolt.getKey();
            final IRichBolt userBolt = copyObject(bolt.getValue());
            final ComponentCommon common = stormTopology.get_bolts().get(boltId).get_common();
            Set<Entry<GlobalStreamId, Grouping>> unprocessedBoltInputs = unprocessdInputsPerBolt.get(boltId);
            if (unprocessedBoltInputs == null) {
                unprocessedBoltInputs = new HashSet<>();
                unprocessedBoltInputs.addAll(common.get_inputs().entrySet());
                unprocessdInputsPerBolt.put(boltId, unprocessedBoltInputs);
            }
            // check if all inputs are available
            final int numberOfInputs = unprocessedBoltInputs.size();
            int inputsAvailable = 0;
            for (Entry<GlobalStreamId, Grouping> entry : unprocessedBoltInputs) {
                final String producerId = entry.getKey().get_componentId();
                final String streamId = entry.getKey().get_streamId();
                final HashMap<String, DataStream<Tuple>> streams = availableInputs.get(producerId);
                if (streams != null && streams.get(streamId) != null) {
                    inputsAvailable++;
                }
            }
            if (inputsAvailable != numberOfInputs) {
                // traverse other bolts first until inputs are available
                continue;
            } else {
                makeProgress = true;
                boltsIterator.remove();
            }
            final Map<GlobalStreamId, DataStream<Tuple>> inputStreams = new HashMap<>(numberOfInputs);
            for (Entry<GlobalStreamId, Grouping> input : unprocessedBoltInputs) {
                final GlobalStreamId streamId = input.getKey();
                final Grouping grouping = input.getValue();
                final String producerId = streamId.get_componentId();
                final Map<String, DataStream<Tuple>> producer = availableInputs.get(producerId);
                inputStreams.put(streamId, processInput(boltId, userBolt, streamId, grouping, producer));
            }
            final SingleOutputStreamOperator<?> outputStream = createOutput(boltId, userBolt, inputStreams);
            if (common.is_set_parallelism_hint()) {
                int dop = common.get_parallelism_hint();
                outputStream.setParallelism(dop);
            } else {
                common.set_parallelism_hint(1);
            }
        }
    }
}
Also used : SpoutWrapper(org.apache.flink.storm.wrappers.SpoutWrapper) HashMap(java.util.HashMap) DataStream(org.apache.flink.streaming.api.datastream.DataStream) Entry(java.util.Map.Entry) ComponentCommon(org.apache.storm.generated.ComponentCommon) IRichBolt(org.apache.storm.topology.IRichBolt) Grouping(org.apache.storm.generated.Grouping) IRichSpout(org.apache.storm.topology.IRichSpout) Fields(org.apache.storm.tuple.Fields) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) StormTuple(org.apache.flink.storm.wrappers.StormTuple) Tuple(org.apache.flink.api.java.tuple.Tuple) SplitStreamType(org.apache.flink.storm.util.SplitStreamType)

Example 2 with SpoutWrapper

use of org.apache.flink.storm.wrappers.SpoutWrapper in project flink by apache.

the class SpoutSourceWordCount method getTextDataStream.

private static DataStream<String> getTextDataStream(final StreamExecutionEnvironment env) {
    if (fileOutput) {
        // read the text file from given input path
        final String[] tokens = textPath.split(":");
        final String localFile = tokens[tokens.length - 1];
        return env.addSource(new SpoutWrapper<String>(new WordCountFileSpout(localFile), new String[] { Utils.DEFAULT_STREAM_ID }, -1), TypeExtractor.getForClass(String.class)).setParallelism(1);
    }
    return env.addSource(new SpoutWrapper<String>(new WordCountInMemorySpout(), new String[] { Utils.DEFAULT_STREAM_ID }, -1), TypeExtractor.getForClass(String.class)).setParallelism(1);
}
Also used : WordCountFileSpout(org.apache.flink.storm.wordcount.operators.WordCountFileSpout) SpoutWrapper(org.apache.flink.storm.wrappers.SpoutWrapper) WordCountInMemorySpout(org.apache.flink.storm.wordcount.operators.WordCountInMemorySpout)

Example 3 with SpoutWrapper

use of org.apache.flink.storm.wrappers.SpoutWrapper in project flink by apache.

the class ExclamationWithSpout method getTextDataStream.

private static DataStream<String> getTextDataStream(final StreamExecutionEnvironment env) {
    if (fileOutput) {
        final String[] tokens = textPath.split(":");
        final String inputFile = tokens[tokens.length - 1];
        // set Storm configuration
        StormConfig config = new StormConfig();
        config.put(FiniteFileSpout.INPUT_FILE_PATH, inputFile);
        env.getConfig().setGlobalJobParameters(config);
        return env.addSource(new SpoutWrapper<String>(new FiniteFileSpout(), new String[] { Utils.DEFAULT_STREAM_ID }), TypeExtractor.getForClass(String.class)).setParallelism(1);
    }
    return env.addSource(new SpoutWrapper<String>(new FiniteInMemorySpout(WordCountData.WORDS), new String[] { Utils.DEFAULT_STREAM_ID }), TypeExtractor.getForClass(String.class)).setParallelism(1);
}
Also used : FiniteInMemorySpout(org.apache.flink.storm.util.FiniteInMemorySpout) StormConfig(org.apache.flink.storm.util.StormConfig) SpoutWrapper(org.apache.flink.storm.wrappers.SpoutWrapper) FiniteFileSpout(org.apache.flink.storm.util.FiniteFileSpout)

Aggregations

SpoutWrapper (org.apache.flink.storm.wrappers.SpoutWrapper)3 HashMap (java.util.HashMap)1 Entry (java.util.Map.Entry)1 Tuple (org.apache.flink.api.java.tuple.Tuple)1 FiniteFileSpout (org.apache.flink.storm.util.FiniteFileSpout)1 FiniteInMemorySpout (org.apache.flink.storm.util.FiniteInMemorySpout)1 SplitStreamType (org.apache.flink.storm.util.SplitStreamType)1 StormConfig (org.apache.flink.storm.util.StormConfig)1 WordCountFileSpout (org.apache.flink.storm.wordcount.operators.WordCountFileSpout)1 WordCountInMemorySpout (org.apache.flink.storm.wordcount.operators.WordCountInMemorySpout)1 StormTuple (org.apache.flink.storm.wrappers.StormTuple)1 DataStream (org.apache.flink.streaming.api.datastream.DataStream)1 ComponentCommon (org.apache.storm.generated.ComponentCommon)1 GlobalStreamId (org.apache.storm.generated.GlobalStreamId)1 Grouping (org.apache.storm.generated.Grouping)1 IRichBolt (org.apache.storm.topology.IRichBolt)1 IRichSpout (org.apache.storm.topology.IRichSpout)1 Fields (org.apache.storm.tuple.Fields)1