Search in sources :

Example 1 with SplitStreamMapper

use of org.apache.flink.storm.util.SplitStreamMapper in project flink by apache.

the class SpoutSplitExample method main.

// *************************************************************************
// PROGRAM
// *************************************************************************
public static void main(final String[] args) throws Exception {
    boolean useFile = SpoutSplitExample.parseParameters(args);
    // set up the execution environment
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    String[] rawOutputs = new String[] { RandomSpout.EVEN_STREAM, RandomSpout.ODD_STREAM };
    final DataStream<SplitStreamType<Integer>> numbers = env.addSource(new SpoutWrapper<SplitStreamType<Integer>>(new RandomSpout(true, seed), rawOutputs, 1000), TypeExtractor.getForObject(new SplitStreamType<Integer>()));
    SplitStream<SplitStreamType<Integer>> splitStream = numbers.split(new StormStreamSelector<Integer>());
    DataStream<SplitStreamType<Integer>> evenStream = splitStream.select(RandomSpout.EVEN_STREAM);
    DataStream<SplitStreamType<Integer>> oddStream = splitStream.select(RandomSpout.ODD_STREAM);
    DataStream<Tuple2<String, Integer>> evenResult = evenStream.map(new SplitStreamMapper<Integer>()).returns(Integer.class).map(new Enrich(true));
    DataStream<Tuple2<String, Integer>> oddResult = oddStream.map(new SplitStreamMapper<Integer>()).transform("oddBolt", TypeExtractor.getForObject(new Tuple2<String, Integer>("", 0)), new BoltWrapper<Integer, Tuple2<String, Integer>>(new VerifyAndEnrichBolt(false)));
    if (useFile) {
        evenResult.writeAsText(outputPath + "/even");
        oddResult.writeAsText(outputPath + "/odd");
    } else {
        evenResult.print();
        oddResult.print();
    }
    // execute program
    env.execute("Spout split stream example");
}
Also used : RandomSpout(org.apache.flink.storm.split.operators.RandomSpout) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SplitStreamMapper(org.apache.flink.storm.util.SplitStreamMapper) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) VerifyAndEnrichBolt(org.apache.flink.storm.split.operators.VerifyAndEnrichBolt) SplitStreamType(org.apache.flink.storm.util.SplitStreamType)

Example 2 with SplitStreamMapper

use of org.apache.flink.storm.util.SplitStreamMapper in project flink by apache.

the class FlinkTopology method createOutput.

@SuppressWarnings({ "unchecked", "rawtypes" })
private SingleOutputStreamOperator<?> createOutput(String boltId, IRichBolt bolt, Map<GlobalStreamId, DataStream<Tuple>> inputStreams) {
    assert (boltId != null);
    assert (bolt != null);
    assert (inputStreams != null);
    Iterator<Entry<GlobalStreamId, DataStream<Tuple>>> iterator = inputStreams.entrySet().iterator();
    Entry<GlobalStreamId, DataStream<Tuple>> input1 = iterator.next();
    GlobalStreamId streamId1 = input1.getKey();
    String inputStreamId1 = streamId1.get_streamId();
    String inputComponentId1 = streamId1.get_componentId();
    Fields inputSchema1 = this.outputStreams.get(inputComponentId1).get(inputStreamId1);
    DataStream<Tuple> singleInputStream = input1.getValue();
    DataStream<StormTuple<Tuple>> mergedInputStream = null;
    while (iterator.hasNext()) {
        Entry<GlobalStreamId, DataStream<Tuple>> input2 = iterator.next();
        GlobalStreamId streamId2 = input2.getKey();
        DataStream<Tuple> inputStream2 = input2.getValue();
        if (mergedInputStream == null) {
            mergedInputStream = singleInputStream.connect(inputStream2).flatMap(new TwoFlinkStreamsMerger(streamId1, inputSchema1, streamId2, this.outputStreams.get(streamId2.get_componentId()).get(streamId2.get_streamId()))).returns(StormTuple.class);
        } else {
            mergedInputStream = mergedInputStream.connect(inputStream2).flatMap(new StormFlinkStreamMerger(streamId2, this.outputStreams.get(streamId2.get_componentId()).get(streamId2.get_streamId()))).returns(StormTuple.class);
        }
    }
    final HashMap<String, Fields> boltOutputs = this.outputStreams.get(boltId);
    final FlinkOutputFieldsDeclarer declarer = this.declarers.get(boltId);
    final SingleOutputStreamOperator<?> outputStream;
    if (boltOutputs.size() < 2) {
        // single output stream or sink
        String outputStreamId;
        if (boltOutputs.size() == 1) {
            outputStreamId = (String) boltOutputs.keySet().toArray()[0];
        } else {
            outputStreamId = null;
        }
        final TypeInformation<Tuple> outType = declarer.getOutputType(outputStreamId);
        final SingleOutputStreamOperator<Tuple> outStream;
        // only one input
        if (inputStreams.entrySet().size() == 1) {
            BoltWrapper<Tuple, Tuple> boltWrapper = new BoltWrapper<>(bolt, boltId, inputStreamId1, inputComponentId1, inputSchema1, null);
            boltWrapper.setStormTopology(stormTopology);
            outStream = singleInputStream.transform(boltId, outType, boltWrapper);
        } else {
            MergedInputsBoltWrapper<Tuple, Tuple> boltWrapper = new MergedInputsBoltWrapper<Tuple, Tuple>(bolt, boltId, null);
            boltWrapper.setStormTopology(stormTopology);
            outStream = mergedInputStream.transform(boltId, outType, boltWrapper);
        }
        if (outType != null) {
            // only for non-sink nodes
            final HashMap<String, DataStream<Tuple>> op = new HashMap<>();
            op.put(outputStreamId, outStream);
            availableInputs.put(boltId, op);
        }
        outputStream = outStream;
    } else {
        final TypeInformation<SplitStreamType<Tuple>> outType = (TypeInformation) TypeExtractor.getForClass(SplitStreamType.class);
        final SingleOutputStreamOperator<SplitStreamType<Tuple>> multiStream;
        // only one input
        if (inputStreams.entrySet().size() == 1) {
            final BoltWrapper<Tuple, SplitStreamType<Tuple>> boltWrapperMultipleOutputs = new BoltWrapper<>(bolt, boltId, inputStreamId1, inputComponentId1, inputSchema1, null);
            boltWrapperMultipleOutputs.setStormTopology(stormTopology);
            multiStream = singleInputStream.transform(boltId, outType, boltWrapperMultipleOutputs);
        } else {
            final MergedInputsBoltWrapper<Tuple, SplitStreamType<Tuple>> boltWrapperMultipleOutputs = new MergedInputsBoltWrapper<Tuple, SplitStreamType<Tuple>>(bolt, boltId, null);
            boltWrapperMultipleOutputs.setStormTopology(stormTopology);
            multiStream = mergedInputStream.transform(boltId, outType, boltWrapperMultipleOutputs);
        }
        final SplitStream<SplitStreamType<Tuple>> splitStream = multiStream.split(new StormStreamSelector<Tuple>());
        final HashMap<String, DataStream<Tuple>> op = new HashMap<>();
        for (String outputStreamId : boltOutputs.keySet()) {
            op.put(outputStreamId, splitStream.select(outputStreamId).map(new SplitStreamMapper<Tuple>()));
            SingleOutputStreamOperator<Tuple> outStream = splitStream.select(outputStreamId).map(new SplitStreamMapper<Tuple>());
            outStream.getTransformation().setOutputType(declarer.getOutputType(outputStreamId));
            op.put(outputStreamId, outStream);
        }
        availableInputs.put(boltId, op);
        outputStream = multiStream;
    }
    return outputStream;
}
Also used : DataStream(org.apache.flink.streaming.api.datastream.DataStream) HashMap(java.util.HashMap) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Entry(java.util.Map.Entry) MergedInputsBoltWrapper(org.apache.flink.storm.wrappers.MergedInputsBoltWrapper) BoltWrapper(org.apache.flink.storm.wrappers.BoltWrapper) MergedInputsBoltWrapper(org.apache.flink.storm.wrappers.MergedInputsBoltWrapper) StormTuple(org.apache.flink.storm.wrappers.StormTuple) Fields(org.apache.storm.tuple.Fields) GlobalStreamId(org.apache.storm.generated.GlobalStreamId) SplitStreamMapper(org.apache.flink.storm.util.SplitStreamMapper) StormTuple(org.apache.flink.storm.wrappers.StormTuple) Tuple(org.apache.flink.api.java.tuple.Tuple) SplitStreamType(org.apache.flink.storm.util.SplitStreamType)

Aggregations

SplitStreamMapper (org.apache.flink.storm.util.SplitStreamMapper)2 SplitStreamType (org.apache.flink.storm.util.SplitStreamType)2 HashMap (java.util.HashMap)1 Entry (java.util.Map.Entry)1 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)1 Tuple (org.apache.flink.api.java.tuple.Tuple)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 RandomSpout (org.apache.flink.storm.split.operators.RandomSpout)1 VerifyAndEnrichBolt (org.apache.flink.storm.split.operators.VerifyAndEnrichBolt)1 BoltWrapper (org.apache.flink.storm.wrappers.BoltWrapper)1 MergedInputsBoltWrapper (org.apache.flink.storm.wrappers.MergedInputsBoltWrapper)1 StormTuple (org.apache.flink.storm.wrappers.StormTuple)1 DataStream (org.apache.flink.streaming.api.datastream.DataStream)1 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)1 GlobalStreamId (org.apache.storm.generated.GlobalStreamId)1 Fields (org.apache.storm.tuple.Fields)1