Search in sources :

Example 6 with SingleOutputStreamOperator

use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.

the class SortingBoundedInputITCase method testOneInputOperator.

@Test
public void testOneInputOperator() {
    long numberOfRecords = 1_000_000;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    Configuration config = new Configuration();
    config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
    env.configure(config, this.getClass().getClassLoader());
    DataStreamSource<Tuple2<Integer, byte[]>> elements = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO));
    SingleOutputStreamOperator<Long> counts = elements.keyBy(element -> element.f0).transform("Asserting operator", BasicTypeInfo.LONG_TYPE_INFO, new AssertingOperator());
    long sum = CollectionUtil.iteratorToList(DataStreamUtils.collect(counts)).stream().mapToLong(l -> l).sum();
    assertThat(sum, equalTo(numberOfRecords));
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) WatermarkGenerator(org.apache.flink.api.common.eventtime.WatermarkGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) KeyedCoProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction) Random(java.util.Random) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) SplittableIterator(org.apache.flink.util.SplittableIterator) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ExecutionOptions(org.apache.flink.configuration.ExecutionOptions) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) Set(java.util.Set) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) OutputTag(org.apache.flink.util.OutputTag) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) Objects(java.util.Objects) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) Watermark(org.apache.flink.api.common.eventtime.Watermark) Optional(java.util.Optional) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) HashSet(java.util.HashSet) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Collector(org.apache.flink.util.Collector) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Iterator(java.util.Iterator) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) WatermarkOutput(org.apache.flink.api.common.eventtime.WatermarkOutput) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Consumer(java.util.function.Consumer) MapState(org.apache.flink.api.common.state.MapState) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Input(org.apache.flink.streaming.api.operators.Input) Configuration(org.apache.flink.configuration.Configuration) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 7 with SingleOutputStreamOperator

use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.

the class SortingBoundedInputITCase method testBatchExecutionWithTimersOneInput.

@Test
public void testBatchExecutionWithTimersOneInput() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // set parallelism to 1 to have consistent order of results
    env.setParallelism(1);
    Configuration config = new Configuration();
    config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
    env.configure(config, this.getClass().getClassLoader());
    WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
    SingleOutputStreamOperator<Tuple2<Integer, Integer>> elements = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
    Tuple2.of(2, 3), // late element
    Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
    Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy);
    OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
    SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements.map(element -> element.f0).keyBy(element -> element).process(new KeyedProcessFunction<Integer, Integer, Tuple3<Long, Integer, Integer>>() {

        private MapState<Long, Integer> countState;

        private ValueState<Long> previousTimestampState;

        @Override
        public void open(Configuration parameters) {
            countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
            previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
        }

        @Override
        public void processElement(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            Long elementTimestamp = ctx.timestamp();
            long nextTen = ((elementTimestamp + 10) / 10) * 10;
            ctx.timerService().registerEventTimeTimer(nextTen);
            if (elementTimestamp < ctx.timerService().currentWatermark()) {
                ctx.output(lateElements, value);
            } else {
                Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
                assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
                previousTimestampState.update(elementTimestamp);
                Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
                countState.put(nextTen, currentCount + 1);
            }
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
            countState.remove(timestamp);
            // this would go in infinite loop if we did not quiesce the
            // timer service.
            ctx.timerService().registerEventTimeTimer(timestamp + 1);
        }
    });
    DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
    List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
    List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
    assertTrue(lateRecordsCollected.isEmpty());
    assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 4), Tuple3.of(20L, 1, 3), Tuple3.of(10L, 2, 2), Tuple3.of(20L, 2, 1))));
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) WatermarkGenerator(org.apache.flink.api.common.eventtime.WatermarkGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) KeyedCoProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction) Random(java.util.Random) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) SplittableIterator(org.apache.flink.util.SplittableIterator) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ExecutionOptions(org.apache.flink.configuration.ExecutionOptions) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) Set(java.util.Set) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) OutputTag(org.apache.flink.util.OutputTag) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) Objects(java.util.Objects) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) Watermark(org.apache.flink.api.common.eventtime.Watermark) Optional(java.util.Optional) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) HashSet(java.util.HashSet) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Collector(org.apache.flink.util.Collector) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Iterator(java.util.Iterator) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) WatermarkOutput(org.apache.flink.api.common.eventtime.WatermarkOutput) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Consumer(java.util.function.Consumer) MapState(org.apache.flink.api.common.state.MapState) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Input(org.apache.flink.streaming.api.operators.Input) Configuration(org.apache.flink.configuration.Configuration) OutputTag(org.apache.flink.util.OutputTag) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 8 with SingleOutputStreamOperator

use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.

the class SortingBoundedInputITCase method testBatchExecutionWithTimersTwoInput.

@Test
public void testBatchExecutionWithTimersTwoInput() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    // set parallelism to 1 to have consistent order of results
    env.setParallelism(1);
    Configuration config = new Configuration();
    config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
    env.configure(config, this.getClass().getClassLoader());
    WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
    SingleOutputStreamOperator<Integer> elements1 = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
    Tuple2.of(2, 3), // late element
    Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
    Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy).map(element -> element.f0);
    SingleOutputStreamOperator<Integer> elements2 = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
    Tuple2.of(2, 3), // late element
    Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
    Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy).map(element -> element.f0);
    OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
    SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements1.connect(elements2).keyBy(element -> element, element -> element).process(new KeyedCoProcessFunction<Integer, Integer, Integer, Tuple3<Long, Integer, Integer>>() {

        private MapState<Long, Integer> countState;

        private ValueState<Long> previousTimestampState;

        @Override
        public void open(Configuration parameters) {
            countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
            previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
        }

        @Override
        public void processElement1(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            processElement(value, ctx);
        }

        @Override
        public void processElement2(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            processElement(value, ctx);
        }

        private void processElement(Integer value, Context ctx) throws Exception {
            Long elementTimestamp = ctx.timestamp();
            long nextTen = ((elementTimestamp + 10) / 10) * 10;
            ctx.timerService().registerEventTimeTimer(nextTen);
            if (elementTimestamp < ctx.timerService().currentWatermark()) {
                ctx.output(lateElements, value);
            } else {
                Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
                assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
                previousTimestampState.update(elementTimestamp);
                Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
                countState.put(nextTen, currentCount + 1);
            }
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
            out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
            countState.remove(timestamp);
            // this would go in infinite loop if we did not quiesce the
            // timer service.
            ctx.timerService().registerEventTimeTimer(timestamp + 1);
        }
    });
    DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
    List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
    List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
    assertTrue(lateRecordsCollected.isEmpty());
    assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 8), Tuple3.of(20L, 1, 6), Tuple3.of(10L, 2, 4), Tuple3.of(20L, 2, 2))));
}
Also used : Arrays(java.util.Arrays) Tuple3(org.apache.flink.api.java.tuple.Tuple3) WatermarkGenerator(org.apache.flink.api.common.eventtime.WatermarkGenerator) Tuple2(org.apache.flink.api.java.tuple.Tuple2) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) KeyedCoProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction) Random(java.util.Random) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) SplittableIterator(org.apache.flink.util.SplittableIterator) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ExecutionOptions(org.apache.flink.configuration.ExecutionOptions) WatermarkStatus(org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) BoundedMultiInput(org.apache.flink.streaming.api.operators.BoundedMultiInput) DataStreamUtils(org.apache.flink.streaming.api.datastream.DataStreamUtils) Set(java.util.Set) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) OutputTag(org.apache.flink.util.OutputTag) BoundedOneInput(org.apache.flink.streaming.api.operators.BoundedOneInput) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) Objects(java.util.Objects) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) Watermark(org.apache.flink.api.common.eventtime.Watermark) Optional(java.util.Optional) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) HashSet(java.util.HashSet) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Collector(org.apache.flink.util.Collector) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Iterator(java.util.Iterator) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) WatermarkOutput(org.apache.flink.api.common.eventtime.WatermarkOutput) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) Consumer(java.util.function.Consumer) MapState(org.apache.flink.api.common.state.MapState) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) Assert(org.junit.Assert) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Input(org.apache.flink.streaming.api.operators.Input) Configuration(org.apache.flink.configuration.Configuration) OutputTag(org.apache.flink.util.OutputTag) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Tuple3(org.apache.flink.api.java.tuple.Tuple3) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 9 with SingleOutputStreamOperator

use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.

the class StreamOperatorChainingTest method testMultiChainingWithSplit.

/**
 * Verify that multi-chaining works with object reuse enabled.
 */
private void testMultiChainingWithSplit(StreamExecutionEnvironment env) throws Exception {
    // set parallelism to 2 to avoid chaining with source in case when available processors is
    // 1.
    env.setParallelism(2);
    // the actual elements will not be used
    DataStream<Integer> input = env.fromElements(1, 2, 3);
    sink1Results = new ArrayList<>();
    sink2Results = new ArrayList<>();
    sink3Results = new ArrayList<>();
    input = input.map(value -> value);
    OutputTag<Integer> oneOutput = new OutputTag<Integer>("one") {
    };
    OutputTag<Integer> otherOutput = new OutputTag<Integer>("other") {
    };
    SingleOutputStreamOperator<Object> split = input.process(new ProcessFunction<Integer, Object>() {

        private static final long serialVersionUID = 1L;

        @Override
        public void processElement(Integer value, Context ctx, Collector<Object> out) throws Exception {
            if (value.equals(1)) {
                ctx.output(oneOutput, value);
            } else {
                ctx.output(otherOutput, value);
            }
        }
    });
    split.getSideOutput(oneOutput).map(value -> "First 1: " + value).addSink(new SinkFunction<String>() {

        @Override
        public void invoke(String value, Context ctx) throws Exception {
            sink1Results.add(value);
        }
    });
    split.getSideOutput(oneOutput).map(value -> "First 2: " + value).addSink(new SinkFunction<String>() {

        @Override
        public void invoke(String value, Context ctx) throws Exception {
            sink2Results.add(value);
        }
    });
    split.getSideOutput(otherOutput).map(value -> "Second: " + value).addSink(new SinkFunction<String>() {

        @Override
        public void invoke(String value, Context ctx) throws Exception {
            sink3Results.add(value);
        }
    });
    // be build our own StreamTask and OperatorChain
    JobGraph jobGraph = env.getStreamGraph().getJobGraph();
    Assert.assertTrue(jobGraph.getVerticesSortedTopologicallyFromSources().size() == 2);
    JobVertex chainedVertex = jobGraph.getVerticesSortedTopologicallyFromSources().get(1);
    Configuration configuration = chainedVertex.getConfiguration();
    StreamConfig streamConfig = new StreamConfig(configuration);
    StreamMap<Integer, Integer> headOperator = streamConfig.getStreamOperator(Thread.currentThread().getContextClassLoader());
    try (MockEnvironment environment = createMockEnvironment(chainedVertex.getName())) {
        StreamTask<Integer, StreamMap<Integer, Integer>> mockTask = createMockTask(streamConfig, environment);
        OperatorChain<Integer, StreamMap<Integer, Integer>> operatorChain = createOperatorChain(streamConfig, environment, mockTask);
        headOperator.setup(mockTask, streamConfig, operatorChain.getMainOperatorOutput());
        operatorChain.initializeStateAndOpenOperators(null);
        headOperator.processElement(new StreamRecord<>(1));
        headOperator.processElement(new StreamRecord<>(2));
        headOperator.processElement(new StreamRecord<>(3));
        assertThat(sink1Results, contains("First 1: 1"));
        assertThat(sink2Results, contains("First 2: 1"));
        assertThat(sink3Results, contains("Second: 2", "Second: 3"));
    }
}
Also used : StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) RecordWriterDelegate(org.apache.flink.runtime.io.network.api.writer.RecordWriterDelegate) ArrayList(java.util.ArrayList) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) StreamTaskStateInitializer(org.apache.flink.streaming.api.operators.StreamTaskStateInitializer) Collector(org.apache.flink.util.Collector) StreamMap(org.apache.flink.streaming.api.operators.StreamMap) ProcessFunction(org.apache.flink.streaming.api.functions.ProcessFunction) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) MockInputSplitProvider(org.apache.flink.runtime.operators.testutils.MockInputSplitProvider) StreamTask(org.apache.flink.streaming.runtime.tasks.StreamTask) Configuration(org.apache.flink.configuration.Configuration) SerializationDelegate(org.apache.flink.runtime.plugable.SerializationDelegate) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) MockStreamTaskBuilder(org.apache.flink.streaming.util.MockStreamTaskBuilder) OutputTag(org.apache.flink.util.OutputTag) Test(org.junit.Test) MockEnvironmentBuilder(org.apache.flink.runtime.operators.testutils.MockEnvironmentBuilder) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) List(java.util.List) Matchers.contains(org.hamcrest.Matchers.contains) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) OperatorChain(org.apache.flink.streaming.runtime.tasks.OperatorChain) RegularOperatorChain(org.apache.flink.streaming.runtime.tasks.RegularOperatorChain) Assert(org.junit.Assert) Environment(org.apache.flink.runtime.execution.Environment) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) StreamOperatorWrapper(org.apache.flink.streaming.runtime.tasks.StreamOperatorWrapper) Configuration(org.apache.flink.configuration.Configuration) MockEnvironment(org.apache.flink.runtime.operators.testutils.MockEnvironment) OutputTag(org.apache.flink.util.OutputTag) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) StreamMap(org.apache.flink.streaming.api.operators.StreamMap)

Example 10 with SingleOutputStreamOperator

use of org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator in project flink by apache.

the class DataStreamJavaITCase method testTableStreamConversionBatch.

@Test
public void testTableStreamConversionBatch() throws Exception {
    env.setRuntimeMode(RuntimeExecutionMode.BATCH);
    DataStreamSource<Row> streamSource = env.fromElements(Row.of("Alice"), Row.of("alice"), Row.of("lily"), Row.of("Bob"), Row.of("lily"), Row.of("lily"));
    StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env);
    Table sourceTable = tableEnvironment.fromDataStream(streamSource).as("word");
    tableEnvironment.createTemporaryView("tmp_table", sourceTable);
    Table resultTable = tableEnvironment.sqlQuery("select UPPER(word) as word from tmp_table");
    SingleOutputStreamOperator<Tuple2<String, Integer>> resultStream = tableEnvironment.toDataStream(resultTable).map(row -> (String) row.getField("word")).returns(TypeInformation.of(String.class)).map(s -> new Tuple2<>(s, 1)).returns(TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {
    })).keyBy(tuple -> tuple.f0).sum(1);
    testResult(resultStream, new Tuple2<>("ALICE", 2), new Tuple2<>("BOB", 1), new Tuple2<>("LILY", 3));
}
Also used : DataType(org.apache.flink.table.types.DataType) BIGINT(org.apache.flink.table.api.DataTypes.BIGINT) STRING(org.apache.flink.table.api.DataTypes.STRING) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Arrays(java.util.Arrays) Schema(org.apache.flink.table.api.Schema) Tuple3(org.apache.flink.api.java.tuple.Tuple3) TableDescriptor(org.apache.flink.table.api.TableDescriptor) Tuple2(org.apache.flink.api.java.tuple.Tuple2) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) TIMESTAMP_LTZ(org.apache.flink.table.api.DataTypes.TIMESTAMP_LTZ) RawType(org.apache.flink.table.types.logical.RawType) ZoneOffset(java.time.ZoneOffset) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) FIELD(org.apache.flink.table.api.DataTypes.FIELD) Parameterized(org.junit.runners.Parameterized) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) DOUBLE(org.apache.flink.table.api.DataTypes.DOUBLE) TableConfig(org.apache.flink.table.api.TableConfig) Expressions.$(org.apache.flink.table.api.Expressions.$) TestValuesTableFactory(org.apache.flink.table.planner.factories.TestValuesTableFactory) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) Table(org.apache.flink.table.api.Table) ResolvedExpressionMock(org.apache.flink.table.expressions.utils.ResolvedExpressionMock) ZoneId(java.time.ZoneId) Objects(java.util.Objects) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) ValueState(org.apache.flink.api.common.state.ValueState) TumblingEventTimeWindows(org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) STRUCTURED(org.apache.flink.table.api.DataTypes.STRUCTURED) TableResult(org.apache.flink.table.api.TableResult) Row(org.apache.flink.types.Row) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) MAP(org.apache.flink.table.api.DataTypes.MAP) BOOLEAN(org.apache.flink.table.api.DataTypes.BOOLEAN) Either(org.apache.flink.types.Either) ChangelogMode(org.apache.flink.table.connector.ChangelogMode) ROW(org.apache.flink.table.api.DataTypes.ROW) Column(org.apache.flink.table.catalog.Column) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) LocalDateTime(java.time.LocalDateTime) Expressions.sourceWatermark(org.apache.flink.table.api.Expressions.sourceWatermark) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) ArrayList(java.util.ArrayList) Collector(org.apache.flink.util.Collector) ProcessFunction(org.apache.flink.streaming.api.functions.ProcessFunction) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) INT(org.apache.flink.table.api.DataTypes.INT) Before(org.junit.Before) Types(org.apache.flink.api.common.typeinfo.Types) Time(org.apache.flink.streaming.api.windowing.time.Time) WatermarkSpec(org.apache.flink.table.catalog.WatermarkSpec) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) Parameter(org.junit.runners.Parameterized.Parameter) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) DataTypes(org.apache.flink.table.api.DataTypes) Test(org.junit.Test) IOException(java.io.IOException) CollectionUtil(org.apache.flink.util.CollectionUtil) DataStream(org.apache.flink.streaming.api.datastream.DataStream) RowKind(org.apache.flink.types.RowKind) DayOfWeek(java.time.DayOfWeek) TIMESTAMP(org.apache.flink.table.api.DataTypes.TIMESTAMP) EnumTypeInfo(org.apache.flink.api.java.typeutils.EnumTypeInfo) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Table(org.apache.flink.table.api.Table) TypeHint(org.apache.flink.api.common.typeinfo.TypeHint) Tuple2(org.apache.flink.api.java.tuple.Tuple2) Row(org.apache.flink.types.Row) StreamTableEnvironment(org.apache.flink.table.api.bridge.java.StreamTableEnvironment) Test(org.junit.Test)

Aggregations

SingleOutputStreamOperator (org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator)15 DataStream (org.apache.flink.streaming.api.datastream.DataStream)14 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)14 Collector (org.apache.flink.util.Collector)14 Test (org.junit.Test)14 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)11 Assert.assertTrue (org.junit.Assert.assertTrue)11 Time (org.apache.flink.streaming.api.windowing.time.Time)8 List (java.util.List)7 KeySelector (org.apache.flink.api.java.functions.KeySelector)7 TumblingEventTimeWindows (org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows)7 Kryo (com.esotericsoftware.kryo.Kryo)6 JavaSerializer (com.esotericsoftware.kryo.serializers.JavaSerializer)6 File (java.io.File)6 Arrays (java.util.Arrays)6 ReduceFunction (org.apache.flink.api.common.functions.ReduceFunction)6 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)6 StateDescriptor (org.apache.flink.api.common.state.StateDescriptor)6 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)6 TypeSerializer (org.apache.flink.api.common.typeutils.TypeSerializer)6