Search in sources :

Example 36 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class BroadcastStateITCase method testKeyedWithBroadcastTranslation.

@Test
public void testKeyedWithBroadcastTranslation() throws Exception {
    final MapStateDescriptor<Long, String> utterDescriptor = new MapStateDescriptor<>("broadcast-state", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    final Map<Long, String> expected = new HashMap<>();
    expected.put(0L, "test:0");
    expected.put(1L, "test:1");
    expected.put(2L, "test:2");
    expected.put(3L, "test:3");
    expected.put(4L, "test:4");
    expected.put(5L, "test:5");
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    final DataStream<Long> srcOne = env.generateSequence(0L, 5L).assignTimestampsAndWatermarks(new CustomWmEmitter<Long>() {

        private static final long serialVersionUID = -8500904795760316195L;

        @Override
        public long extractTimestamp(Long element, long previousElementTimestamp) {
            return element;
        }
    }).keyBy((KeySelector<Long, Long>) value -> value);
    final DataStream<String> srcTwo = env.fromCollection(expected.values()).assignTimestampsAndWatermarks(new CustomWmEmitter<String>() {

        private static final long serialVersionUID = -2148318224248467213L;

        @Override
        public long extractTimestamp(String element, long previousElementTimestamp) {
            return Long.parseLong(element.split(":")[1]);
        }
    });
    final BroadcastStream<String> broadcast = srcTwo.broadcast(utterDescriptor);
    // the timestamp should be high enough to trigger the timer after all the elements arrive.
    final DataStream<String> output = srcOne.connect(broadcast).process(new TestKeyedBroadcastProcessFunction(100000L, expected));
    output.addSink(new TestSink(expected.size())).setParallelism(1);
    env.execute();
}
Also used : KeySelector(org.apache.flink.api.java.functions.KeySelector) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) Configuration(org.apache.flink.configuration.Configuration) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test) HashMap(java.util.HashMap) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) RichSinkFunction(org.apache.flink.streaming.api.functions.sink.RichSinkFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) DataStream(org.apache.flink.streaming.api.datastream.DataStream) Rule(org.junit.Rule) Collector(org.apache.flink.util.Collector) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) Map(java.util.Map) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) ExpectedException(org.junit.rules.ExpectedException) Nullable(javax.annotation.Nullable) Assert.assertEquals(org.junit.Assert.assertEquals) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) HashMap(java.util.HashMap) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 37 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class SortingBoundedInputITCase method testThreeInputOperator.

@Test
public void testThreeInputOperator() {
    long numberOfRecords = 500_000;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    Configuration config = new Configuration();
    config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
    env.configure(config, this.getClass().getClassLoader());
    KeyedStream<Tuple2<Integer, byte[]>, Object> elements1 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)).keyBy(el -> el.f0);
    KeyedStream<Tuple2<Integer, byte[]>, Object> elements2 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)).keyBy(el -> el.f0);
    KeyedStream<Tuple2<Integer, byte[]>, Object> elements3 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)).keyBy(el -> el.f0);
    KeyedMultipleInputTransformation<Long> assertingTransformation = new KeyedMultipleInputTransformation<>("Asserting operator", new AssertingThreeInputOperatorFactory(), BasicTypeInfo.LONG_TYPE_INFO, -1, BasicTypeInfo.INT_TYPE_INFO);
    assertingTransformation.addInput(elements1.getTransformation(), elements1.getKeySelector());
    assertingTransformation.addInput(elements2.getTransformation(), elements2.getKeySelector());
    assertingTransformation.addInput(elements3.getTransformation(), elements3.getKeySelector());
    env.addOperator(assertingTransformation);
    DataStream<Long> counts = new DataStream<>(env, assertingTransformation);
    long sum = CollectionUtil.iteratorToList(DataStreamUtils.collect(counts)).stream().mapToLong(l -> l).sum();
    assertThat(sum, equalTo(numberOfRecords * 3));
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) WatermarkGenerator(org.apache.flink.api.common.eventtime.WatermarkGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) KeyedCoProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction) Random(java.util.Random) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) SplittableIterator(org.apache.flink.util.SplittableIterator) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ExecutionOptions(org.apache.flink.configuration.ExecutionOptions) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) Set(java.util.Set) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) OutputTag(org.apache.flink.util.OutputTag) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) Objects(java.util.Objects) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) Watermark(org.apache.flink.api.common.eventtime.Watermark) Optional(java.util.Optional) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) HashSet(java.util.HashSet) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Collector(org.apache.flink.util.Collector) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Iterator(java.util.Iterator) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) WatermarkOutput(org.apache.flink.api.common.eventtime.WatermarkOutput) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Consumer(java.util.function.Consumer) MapState(org.apache.flink.api.common.state.MapState) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Input(org.apache.flink.streaming.api.operators.Input) Configuration(org.apache.flink.configuration.Configuration) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) DataStream(org.apache.flink.streaming.api.datastream.DataStream) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 38 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class SortingBoundedInputITCase method testBatchExecutionWithTimersOneInput.

@Test
public void testBatchExecutionWithTimersOneInput() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // set parallelism to 1 to have consistent order of results
    env.setParallelism(1);
    Configuration config = new Configuration();
    config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
    env.configure(config, this.getClass().getClassLoader());
    WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
    SingleOutputStreamOperator<Tuple2<Integer, Integer>> elements = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
    Tuple2.of(2, 3), // late element
    Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
    Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy);
    OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
    SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements.map(element -> element.f0).keyBy(element -> element).process(new KeyedProcessFunction<Integer, Integer, Tuple3<Long, Integer, Integer>>() {

        private MapState<Long, Integer> countState;

        private ValueState<Long> previousTimestampState;

        @Override
        public void open(Configuration parameters) {
            countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
            previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
        }

        @Override
        public void processElement(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            Long elementTimestamp = ctx.timestamp();
            long nextTen = ((elementTimestamp + 10) / 10) * 10;
            ctx.timerService().registerEventTimeTimer(nextTen);
            if (elementTimestamp < ctx.timerService().currentWatermark()) {
                ctx.output(lateElements, value);
            } else {
                Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
                assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
                previousTimestampState.update(elementTimestamp);
                Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
                countState.put(nextTen, currentCount + 1);
            }
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
            countState.remove(timestamp);
            // this would go in infinite loop if we did not quiesce the
            // timer service.
            ctx.timerService().registerEventTimeTimer(timestamp + 1);
        }
    });
    DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
    List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
    List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
    assertTrue(lateRecordsCollected.isEmpty());
    assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 4), Tuple3.of(20L, 1, 3), Tuple3.of(10L, 2, 2), Tuple3.of(20L, 2, 1))));
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) WatermarkGenerator(org.apache.flink.api.common.eventtime.WatermarkGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) KeyedCoProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction) Random(java.util.Random) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) SplittableIterator(org.apache.flink.util.SplittableIterator) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ExecutionOptions(org.apache.flink.configuration.ExecutionOptions) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) Set(java.util.Set) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) OutputTag(org.apache.flink.util.OutputTag) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) Objects(java.util.Objects) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) Watermark(org.apache.flink.api.common.eventtime.Watermark) Optional(java.util.Optional) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) HashSet(java.util.HashSet) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Collector(org.apache.flink.util.Collector) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Iterator(java.util.Iterator) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) WatermarkOutput(org.apache.flink.api.common.eventtime.WatermarkOutput) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Consumer(java.util.function.Consumer) MapState(org.apache.flink.api.common.state.MapState) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Input(org.apache.flink.streaming.api.operators.Input) Configuration(org.apache.flink.configuration.Configuration) OutputTag(org.apache.flink.util.OutputTag) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 39 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class SortingBoundedInputITCase method testBatchExecutionWithTimersTwoInput.

@Test
public void testBatchExecutionWithTimersTwoInput() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // set parallelism to 1 to have consistent order of results
    env.setParallelism(1);
    Configuration config = new Configuration();
    config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
    env.configure(config, this.getClass().getClassLoader());
    WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
    SingleOutputStreamOperator<Integer> elements1 = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
    Tuple2.of(2, 3), // late element
    Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
    Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy).map(element -> element.f0);
    SingleOutputStreamOperator<Integer> elements2 = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
    Tuple2.of(2, 3), // late element
    Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
    Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy).map(element -> element.f0);
    OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
    SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements1.connect(elements2).keyBy(element -> element, element -> element).process(new KeyedCoProcessFunction<Integer, Integer, Integer, Tuple3<Long, Integer, Integer>>() {

        private MapState<Long, Integer> countState;

        private ValueState<Long> previousTimestampState;

        @Override
        public void open(Configuration parameters) {
            countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
            previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
        }

        @Override
        public void processElement1(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            processElement(value, ctx);
        }

        @Override
        public void processElement2(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            processElement(value, ctx);
        }

        private void processElement(Integer value, Context ctx) throws Exception {
            Long elementTimestamp = ctx.timestamp();
            long nextTen = ((elementTimestamp + 10) / 10) * 10;
            ctx.timerService().registerEventTimeTimer(nextTen);
            if (elementTimestamp < ctx.timerService().currentWatermark()) {
                ctx.output(lateElements, value);
            } else {
                Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
                assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
                previousTimestampState.update(elementTimestamp);
                Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
                countState.put(nextTen, currentCount + 1);
            }
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
            countState.remove(timestamp);
            // this would go in infinite loop if we did not quiesce the
            // timer service.
            ctx.timerService().registerEventTimeTimer(timestamp + 1);
        }
    });
    DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
    List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
    List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
    assertTrue(lateRecordsCollected.isEmpty());
    assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 8), Tuple3.of(20L, 1, 6), Tuple3.of(10L, 2, 4), Tuple3.of(20L, 2, 2))));
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) WatermarkGenerator(org.apache.flink.api.common.eventtime.WatermarkGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) KeyedCoProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction) Random(java.util.Random) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) SplittableIterator(org.apache.flink.util.SplittableIterator) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ExecutionOptions(org.apache.flink.configuration.ExecutionOptions) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) Set(java.util.Set) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) OutputTag(org.apache.flink.util.OutputTag) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) Objects(java.util.Objects) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) Watermark(org.apache.flink.api.common.eventtime.Watermark) Optional(java.util.Optional) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) HashSet(java.util.HashSet) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Collector(org.apache.flink.util.Collector) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Iterator(java.util.Iterator) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) WatermarkOutput(org.apache.flink.api.common.eventtime.WatermarkOutput) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Consumer(java.util.function.Consumer) MapState(org.apache.flink.api.common.state.MapState) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Input(org.apache.flink.streaming.api.operators.Input) Configuration(org.apache.flink.configuration.Configuration) OutputTag(org.apache.flink.util.OutputTag) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 40 with DataStream

use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.

the class SourceNAryInputChainingITCase method createProgramWithMultipleUnionInputs.

/**
 * Creates a DataStream program as shown below.
 *
 * <pre>
 *                                   +--------------+
 *             (src 1) --> (map) --> |              |
 *                                   |              |
 *           (src 2) --+             |              |
 *                     +-- UNION --> |              |
 *           (src 3) --+             |    N-Ary     |
 *                                   |   Operator   |
 *   (src 4) -> (map) --+            |              |
 *                      +-- UNION -> |              |
 *   (src 5) -> (map) --+            |              |
 *                                   |              |
 *                       (src 6) --> |              |
 *                                   +--------------+
 * </pre>
 */
private DataStream<Long> createProgramWithMultipleUnionInputs() {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(PARALLELISM);
    env.getConfig().enableObjectReuse();
    final DataStream<Long> source1 = env.fromSource(new NumberSequenceSource(1L, 10L), WatermarkStrategy.noWatermarks(), "source-1");
    final DataStream<Long> source2 = env.fromSource(new NumberSequenceSource(11L, 20L), WatermarkStrategy.noWatermarks(), "source-2");
    final DataStream<Long> source3 = env.fromSource(new NumberSequenceSource(21L, 30L), WatermarkStrategy.noWatermarks(), "source-3");
    final DataStream<Long> source4 = env.fromSource(new NumberSequenceSource(31L, 40L), WatermarkStrategy.noWatermarks(), "source-4");
    final DataStream<Long> source5 = env.fromSource(new NumberSequenceSource(41L, 50L), WatermarkStrategy.noWatermarks(), "source-5");
    final DataStream<Long> source6 = env.fromSource(new NumberSequenceSource(51L, 60L), WatermarkStrategy.noWatermarks(), "source-6");
    return nAryInputStreamOperation(source1.map((v) -> v), source2.union(source3), source4.map((v) -> v).union(source5.map((v) -> v)), source6);
}
Also used : MultipleInputTransformation(org.apache.flink.streaming.api.transformations.MultipleInputTransformation) NumberSequenceSource(org.apache.flink.api.connector.source.lib.NumberSequenceSource) MultipleConnectedStreams(org.apache.flink.streaming.api.datastream.MultipleConnectedStreams) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) AbstractInput(org.apache.flink.streaming.api.operators.AbstractInput) ArrayList(java.util.ArrayList) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) TestLogger(org.apache.flink.util.TestLogger) Assert.fail(org.junit.Assert.fail) ClassRule(org.junit.ClassRule) Types(org.apache.flink.api.common.typeinfo.Types) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) StreamingJobGraphGenerator(org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) TemporaryFolder(org.junit.rules.TemporaryFolder) Assert.assertEquals(org.junit.Assert.assertEquals) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Input(org.apache.flink.streaming.api.operators.Input) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) NumberSequenceSource(org.apache.flink.api.connector.source.lib.NumberSequenceSource)

Aggregations

DataStream (org.apache.flink.streaming.api.datastream.DataStream)87 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)78 Test (org.junit.Test)70 List (java.util.List)62 Collector (org.apache.flink.util.Collector)60 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)50 SingleOutputStreamOperator (org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator)48 Arrays (java.util.Arrays)46 ArrayList (java.util.ArrayList)40 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)40 Assert.assertEquals (org.junit.Assert.assertEquals)38 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)36 Configuration (org.apache.flink.configuration.Configuration)36 Assert.assertTrue (org.junit.Assert.assertTrue)33 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)32 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)32 Types (org.apache.flink.api.common.typeinfo.Types)31 Assert (org.junit.Assert)31 ReduceFunction (org.apache.flink.api.common.functions.ReduceFunction)29 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)29