Search in sources :

Example 6 with CloseableIterator

use of org.apache.flink.util.CloseableIterator in project flink by apache.

the class DataStreamBatchExecutionITCase method batchNonKeyedKeyedTwoInputOperator.

/**
 * Verifies that all regular input is processed before keyed input.
 *
 * <p>Here, the first input is not keyed while the second input is keyed.
 */
@Test
public void batchNonKeyedKeyedTwoInputOperator() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.setRuntimeMode(RuntimeExecutionMode.BATCH);
    DataStream<Tuple2<String, Integer>> keyedInput = env.fromElements(Tuple2.of("regular2", 4), Tuple2.of("regular1", 3), Tuple2.of("regular1", 2), Tuple2.of("regular2", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular4", 4), Tuple2.of("regular3", 3), Tuple2.of("regular3", 2), Tuple2.of("regular4", 1)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    DataStream<String> result = regularInput.connect(keyedInput.keyBy(in -> in.f0)).transform("operator", BasicTypeInfo.STRING_TYPE_INFO, new TwoInputIdentityOperator());
    try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
        List<String> results = CollectionUtil.iteratorToList(resultIterator);
        assertThat(results, equalTo(Arrays.asList("(regular4,4)", "(regular3,3)", "(regular3,2)", "(regular4,1)", "(regular1,2)", "(regular1,3)", "(regular2,1)", "(regular2,4)")));
    }
}
Also used : BroadcastState(org.apache.flink.api.common.state.BroadcastState) Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) MultipleConnectedStreams(org.apache.flink.streaming.api.datastream.MultipleConnectedStreams) CollectionUtil.iteratorToList(org.apache.flink.util.CollectionUtil.iteratorToList) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) AbstractInput(org.apache.flink.streaming.api.operators.AbstractInput) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) ReadOnlyBroadcastState(org.apache.flink.api.common.state.ReadOnlyBroadcastState) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ClassRule(org.junit.ClassRule) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Time(org.apache.flink.api.common.time.Time) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Input(org.apache.flink.streaming.api.operators.Input) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 7 with CloseableIterator

use of org.apache.flink.util.CloseableIterator in project flink by apache.

the class DataStreamBatchExecutionITCase method batchKeyedBroadcastExecution.

/**
 * Verifies that all broadcast input is processed before keyed input.
 */
@Test
public void batchKeyedBroadcastExecution() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.setRuntimeMode(RuntimeExecutionMode.BATCH);
    DataStream<Tuple2<String, Integer>> bcInput = env.fromElements(Tuple2.of("bc1", 1), Tuple2.of("bc2", 2), Tuple2.of("bc3", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular1", 1), Tuple2.of("regular1", 2), Tuple2.of("regular2", 2), Tuple2.of("regular1", 3), Tuple2.of("regular1", 4), Tuple2.of("regular1", 3), Tuple2.of("regular2", 5), Tuple2.of("regular1", 5), Tuple2.of("regular2", 3), Tuple2.of("regular1", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    BroadcastStream<Tuple2<String, Integer>> broadcastStream = bcInput.broadcast(STATE_DESCRIPTOR);
    DataStream<String> result = regularInput.keyBy((input) -> input.f0).connect(broadcastStream).process(new TestKeyedBroadcastFunction());
    try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
        List<String> results = CollectionUtil.iteratorToList(resultIterator);
        assertThat(results, equalTo(Arrays.asList("(regular1,1): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,2): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,4): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,5): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular2,2): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular2,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular2,5): [bc2=bc2, bc1=bc1, bc3=bc3]")));
    }
}
Also used : BroadcastState(org.apache.flink.api.common.state.BroadcastState) Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) MultipleConnectedStreams(org.apache.flink.streaming.api.datastream.MultipleConnectedStreams) CollectionUtil.iteratorToList(org.apache.flink.util.CollectionUtil.iteratorToList) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) AbstractInput(org.apache.flink.streaming.api.operators.AbstractInput) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) ReadOnlyBroadcastState(org.apache.flink.api.common.state.ReadOnlyBroadcastState) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ClassRule(org.junit.ClassRule) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Time(org.apache.flink.api.common.time.Time) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Input(org.apache.flink.streaming.api.operators.Input) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

CloseableIterator (org.apache.flink.util.CloseableIterator)7 List (java.util.List)5 Arrays (java.util.Arrays)4 RuntimeExecutionMode (org.apache.flink.api.common.RuntimeExecutionMode)4 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)4 RichMapFunction (org.apache.flink.api.common.functions.RichMapFunction)4 RestartStrategies (org.apache.flink.api.common.restartstrategy.RestartStrategies)4 BroadcastState (org.apache.flink.api.common.state.BroadcastState)4 ListState (org.apache.flink.api.common.state.ListState)4 ListStateDescriptor (org.apache.flink.api.common.state.ListStateDescriptor)4 MapStateDescriptor (org.apache.flink.api.common.state.MapStateDescriptor)4 ReadOnlyBroadcastState (org.apache.flink.api.common.state.ReadOnlyBroadcastState)4 ValueStateDescriptor (org.apache.flink.api.common.state.ValueStateDescriptor)4 Time (org.apache.flink.api.common.time.Time)4 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)4 StringSerializer (org.apache.flink.api.common.typeutils.base.StringSerializer)4 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)4 IOException (java.io.IOException)3 MiniClusterResourceConfiguration (org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration)3 BroadcastStream (org.apache.flink.streaming.api.datastream.BroadcastStream)3