Search in sources :

Example 1 with BroadcastStream

use of org.apache.flink.streaming.api.datastream.BroadcastStream in project flink by apache.

the class StreamGraphGeneratorTest method testUnalignedCheckpointDisabledOnPointwise.

@Test
public void testUnalignedCheckpointDisabledOnPointwise() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(42);
    DataStream<Long> source1 = env.fromSequence(1L, 10L);
    DataStream<Long> map1 = source1.forward().map(l -> l);
    DataStream<Long> source2 = env.fromSequence(2L, 11L);
    DataStream<Long> map2 = source2.shuffle().map(l -> l);
    final MapStateDescriptor<Long, Long> descriptor = new MapStateDescriptor<>("broadcast", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO);
    final BroadcastStream<Long> broadcast = map1.broadcast(descriptor);
    final SingleOutputStreamOperator<Long> joined = map2.connect(broadcast).process(new BroadcastProcessFunction<Long, Long, Long>() {

        @Override
        public void processElement(Long value, ReadOnlyContext ctx, Collector<Long> out) {
        }

        @Override
        public void processBroadcastElement(Long value, Context ctx, Collector<Long> out) {
        }
    });
    DataStream<Long> map3 = joined.shuffle().map(l -> l);
    DataStream<Long> map4 = map3.rescale().map(l -> l).setParallelism(1337);
    StreamGraph streamGraph = env.getStreamGraph();
    assertEquals(7, streamGraph.getStreamNodes().size());
    // forward
    assertThat(edge(streamGraph, source1, map1), supportsUnalignedCheckpoints(false));
    // shuffle
    assertThat(edge(streamGraph, source2, map2), supportsUnalignedCheckpoints(true));
    // broadcast, but other channel is forwarded
    assertThat(edge(streamGraph, map1, joined), supportsUnalignedCheckpoints(false));
    // forward
    assertThat(edge(streamGraph, map2, joined), supportsUnalignedCheckpoints(false));
    // shuffle
    assertThat(edge(streamGraph, joined, map3), supportsUnalignedCheckpoints(true));
    // rescale
    assertThat(edge(streamGraph, map3, map4), supportsUnalignedCheckpoints(false));
}
Also used : Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) SlotSharingGroup(org.apache.flink.api.common.operators.SlotSharingGroup) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) ShufflePartitioner(org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) ManagedMemoryUseCase(org.apache.flink.core.memory.ManagedMemoryUseCase) Map(java.util.Map) TestLogger(org.apache.flink.util.TestLogger) Function(org.apache.flink.api.common.functions.Function) Assertions(org.assertj.core.api.Assertions) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) PartitionTransformation(org.apache.flink.streaming.api.transformations.PartitionTransformation) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) StreamTask(org.apache.flink.streaming.runtime.tasks.StreamTask) Collection(java.util.Collection) ConnectedStreams(org.apache.flink.streaming.api.datastream.ConnectedStreams) TypeSafeMatcher(org.hamcrest.TypeSafeMatcher) ResourceProfile(org.apache.flink.runtime.clusterframework.types.ResourceProfile) GlobalPartitioner(org.apache.flink.streaming.runtime.partitioner.GlobalPartitioner) List(java.util.List) NoOpIntMap(org.apache.flink.streaming.util.NoOpIntMap) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) CheckpointConfig(org.apache.flink.streaming.api.environment.CheckpointConfig) Matchers.is(org.hamcrest.Matchers.is) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) MultipleInputTransformation(org.apache.flink.streaming.api.transformations.MultipleInputTransformation) IterativeStream(org.apache.flink.streaming.api.datastream.IterativeStream) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) AbstractUdfStreamOperator(org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator) StreamOperatorFactory(org.apache.flink.streaming.api.operators.StreamOperatorFactory) Watermark(org.apache.flink.streaming.api.watermark.Watermark) SavepointConfigOptions(org.apache.flink.runtime.jobgraph.SavepointConfigOptions) HashMap(java.util.HashMap) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ArrayList(java.util.ArrayList) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Collector(org.apache.flink.util.Collector) Matchers.iterableWithSize(org.hamcrest.Matchers.iterableWithSize) Output(org.apache.flink.streaming.api.operators.Output) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) TestExpandingSink(org.apache.flink.streaming.util.TestExpandingSink) RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) Description(org.hamcrest.Description) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) Configuration(org.apache.flink.configuration.Configuration) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) Assert.assertTrue(org.junit.Assert.assertTrue) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) FeatureMatcher(org.hamcrest.FeatureMatcher) StreamExchangeMode(org.apache.flink.streaming.api.transformations.StreamExchangeMode) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) Matcher(org.hamcrest.Matcher) Transformation(org.apache.flink.api.dag.Transformation) LatencyMarker(org.apache.flink.streaming.runtime.streamrecord.LatencyMarker) SavepointRestoreSettings(org.apache.flink.runtime.jobgraph.SavepointRestoreSettings) OutputTypeConfigurable(org.apache.flink.streaming.api.operators.OutputTypeConfigurable) StreamSource(org.apache.flink.streaming.api.operators.StreamSource) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 2 with BroadcastStream

use of org.apache.flink.streaming.api.datastream.BroadcastStream in project flink by apache.

the class BroadcastStateITCase method testKeyedWithBroadcastTranslation.

@Test
public void testKeyedWithBroadcastTranslation() throws Exception {
    final MapStateDescriptor<Long, String> utterDescriptor = new MapStateDescriptor<>("broadcast-state", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    final Map<Long, String> expected = new HashMap<>();
    expected.put(0L, "test:0");
    expected.put(1L, "test:1");
    expected.put(2L, "test:2");
    expected.put(3L, "test:3");
    expected.put(4L, "test:4");
    expected.put(5L, "test:5");
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    final DataStream<Long> srcOne = env.generateSequence(0L, 5L).assignTimestampsAndWatermarks(new CustomWmEmitter<Long>() {

        private static final long serialVersionUID = -8500904795760316195L;

        @Override
        public long extractTimestamp(Long element, long previousElementTimestamp) {
            return element;
        }
    }).keyBy((KeySelector<Long, Long>) value -> value);
    final DataStream<String> srcTwo = env.fromCollection(expected.values()).assignTimestampsAndWatermarks(new CustomWmEmitter<String>() {

        private static final long serialVersionUID = -2148318224248467213L;

        @Override
        public long extractTimestamp(String element, long previousElementTimestamp) {
            return Long.parseLong(element.split(":")[1]);
        }
    });
    final BroadcastStream<String> broadcast = srcTwo.broadcast(utterDescriptor);
    // the timestamp should be high enough to trigger the timer after all the elements arrive.
    final DataStream<String> output = srcOne.connect(broadcast).process(new TestKeyedBroadcastProcessFunction(100000L, expected));
    output.addSink(new TestSink(expected.size())).setParallelism(1);
    env.execute();
}
Also used : KeySelector(org.apache.flink.api.java.functions.KeySelector) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) Configuration(org.apache.flink.configuration.Configuration) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test) HashMap(java.util.HashMap) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) RichSinkFunction(org.apache.flink.streaming.api.functions.sink.RichSinkFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) DataStream(org.apache.flink.streaming.api.datastream.DataStream) Rule(org.junit.Rule) Collector(org.apache.flink.util.Collector) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) Map(java.util.Map) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) ExpectedException(org.junit.rules.ExpectedException) Nullable(javax.annotation.Nullable) Assert.assertEquals(org.junit.Assert.assertEquals) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) AbstractTestBase(org.apache.flink.test.util.AbstractTestBase) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) HashMap(java.util.HashMap) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 3 with BroadcastStream

use of org.apache.flink.streaming.api.datastream.BroadcastStream in project flink by apache.

the class DataStreamTest method testFailedTranslationOnKeyed.

/**
 * Tests that with a {@link KeyedStream} we have to provide a {@link
 * KeyedBroadcastProcessFunction}.
 */
@Test
public void testFailedTranslationOnKeyed() {
    final MapStateDescriptor<Long, String> descriptor = new MapStateDescriptor<>("broadcast", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    final DataStream<Long> srcOne = env.generateSequence(0L, 5L).assignTimestampsAndWatermarks(new CustomWmEmitter<Long>() {

        @Override
        public long extractTimestamp(Long element, long previousElementTimestamp) {
            return element;
        }
    }).keyBy((KeySelector<Long, Long>) value -> value);
    final DataStream<String> srcTwo = env.fromElements("Test:0", "Test:1", "Test:2", "Test:3", "Test:4", "Test:5").assignTimestampsAndWatermarks(new CustomWmEmitter<String>() {

        @Override
        public long extractTimestamp(String element, long previousElementTimestamp) {
            return Long.parseLong(element.split(":")[1]);
        }
    });
    BroadcastStream<String> broadcast = srcTwo.broadcast(descriptor);
    BroadcastConnectedStream<Long, String> bcStream = srcOne.connect(broadcast);
    expectedException.expect(IllegalArgumentException.class);
    bcStream.process(new BroadcastProcessFunction<Long, String, String>() {

        @Override
        public void processBroadcastElement(String value, Context ctx, Collector<String> out) throws Exception {
        // do nothing
        }

        @Override
        public void processElement(Long value, ReadOnlyContext ctx, Collector<String> out) throws Exception {
        // do nothing
        }
    });
}
Also used : Tuple1(org.apache.flink.api.java.tuple.Tuple1) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BasicArrayTypeInfo(org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo) PurgingTrigger(org.apache.flink.streaming.api.windowing.triggers.PurgingTrigger) BroadcastPartitioner(org.apache.flink.streaming.runtime.partitioner.BroadcastPartitioner) TupleTypeInfo(org.apache.flink.api.java.typeutils.TupleTypeInfo) CoFlatMapFunction(org.apache.flink.streaming.api.functions.co.CoFlatMapFunction) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) MapFunction(org.apache.flink.api.common.functions.MapFunction) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) AggregateFunction(org.apache.flink.api.common.functions.AggregateFunction) ShufflePartitioner(org.apache.flink.streaming.runtime.partitioner.ShufflePartitioner) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ResourceSpec(org.apache.flink.api.common.operators.ResourceSpec) CustomPartitionerWrapper(org.apache.flink.streaming.runtime.partitioner.CustomPartitionerWrapper) Duration(java.time.Duration) StreamGraph(org.apache.flink.streaming.api.graph.StreamGraph) TestLogger(org.apache.flink.util.TestLogger) Function(org.apache.flink.api.common.functions.Function) Assert.fail(org.junit.Assert.fail) AssignerWithPunctuatedWatermarks(org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Method(java.lang.reflect.Method) GlobalWindow(org.apache.flink.streaming.api.windowing.windows.GlobalWindow) StringStartsWith(org.hamcrest.core.StringStartsWith) CoMapFunction(org.apache.flink.streaming.api.functions.co.CoMapFunction) KeySelector(org.apache.flink.api.java.functions.KeySelector) ForwardPartitioner(org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner) SinkFunction(org.apache.flink.streaming.api.functions.sink.SinkFunction) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) ConnectedStreams(org.apache.flink.streaming.api.datastream.ConnectedStreams) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) GlobalPartitioner(org.apache.flink.streaming.runtime.partitioner.GlobalPartitioner) FilterFunction(org.apache.flink.api.common.functions.FilterFunction) List(java.util.List) TypeExtractor(org.apache.flink.api.java.typeutils.TypeExtractor) BroadcastConnectedStream(org.apache.flink.streaming.api.datastream.BroadcastConnectedStream) TumblingEventTimeWindows(org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows) Assert.assertFalse(org.junit.Assert.assertFalse) AllWindowFunction(org.apache.flink.streaming.api.functions.windowing.AllWindowFunction) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) CountTrigger(org.apache.flink.streaming.api.windowing.triggers.CountTrigger) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) AbstractUdfStreamOperator(org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) Watermark(org.apache.flink.streaming.api.watermark.Watermark) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ObjectArrayTypeInfo(org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedProcessFunction(org.apache.flink.streaming.api.functions.KeyedProcessFunction) Partitioner(org.apache.flink.api.common.functions.Partitioner) KeyedProcessOperator(org.apache.flink.streaming.api.operators.KeyedProcessOperator) StreamPartitioner(org.apache.flink.streaming.runtime.partitioner.StreamPartitioner) Collector(org.apache.flink.util.Collector) ProcessOperator(org.apache.flink.streaming.api.operators.ProcessOperator) ProcessFunction(org.apache.flink.streaming.api.functions.ProcessFunction) ReduceFunction(org.apache.flink.api.common.functions.ReduceFunction) ExpectedException(org.junit.rules.ExpectedException) Nullable(javax.annotation.Nullable) Types(org.apache.flink.api.common.typeinfo.Types) DataStreamSink(org.apache.flink.streaming.api.datastream.DataStreamSink) RebalancePartitioner(org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner) StreamEdge(org.apache.flink.streaming.api.graph.StreamEdge) Time(org.apache.flink.streaming.api.windowing.time.Time) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) DiscardingSink(org.apache.flink.streaming.api.functions.sink.DiscardingSink) Assert.assertNotNull(org.junit.Assert.assertNotNull) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) LegacyKeyedProcessOperator(org.apache.flink.streaming.api.operators.LegacyKeyedProcessOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) KeyGroupStreamPartitioner(org.apache.flink.streaming.runtime.partitioner.KeyGroupStreamPartitioner) Rule(org.junit.Rule) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) EnumTypeInfo(org.apache.flink.api.java.typeutils.EnumTypeInfo) Assert(org.junit.Assert) GlobalWindows(org.apache.flink.streaming.api.windowing.assigners.GlobalWindows) Assert.assertEquals(org.junit.Assert.assertEquals) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) InvalidProgramException(org.apache.flink.api.common.InvalidProgramException) ExpectedException(org.junit.rules.ExpectedException) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 4 with BroadcastStream

use of org.apache.flink.streaming.api.datastream.BroadcastStream in project flink by apache.

the class DataStreamBatchExecutionITCase method batchBroadcastExecution.

/**
 * Verifies that all broadcast input is processed before regular input.
 */
@Test
public void batchBroadcastExecution() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.setRuntimeMode(RuntimeExecutionMode.BATCH);
    DataStream<Tuple2<String, Integer>> bcInput = env.fromElements(Tuple2.of("bc1", 1), Tuple2.of("bc2", 2), Tuple2.of("bc3", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular1", 1), Tuple2.of("regular1", 2), Tuple2.of("regular1", 3), Tuple2.of("regular1", 4), Tuple2.of("regular1", 3), Tuple2.of("regular1", 5), Tuple2.of("regular1", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    BroadcastStream<Tuple2<String, Integer>> broadcastStream = bcInput.broadcast(STATE_DESCRIPTOR);
    DataStream<String> result = regularInput.connect(broadcastStream).process(new TestBroadcastFunction());
    try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
        List<String> results = CollectionUtil.iteratorToList(resultIterator);
        // regular, that is non-keyed input is not sorted by timestamp. For keyed inputs
        // this is a by-product of the grouping/sorting we use to get the keyed groups.
        assertThat(results, equalTo(Arrays.asList("(regular1,1): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,2): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,4): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,5): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]")));
    }
}
Also used : BroadcastState(org.apache.flink.api.common.state.BroadcastState) Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) MultipleConnectedStreams(org.apache.flink.streaming.api.datastream.MultipleConnectedStreams) CollectionUtil.iteratorToList(org.apache.flink.util.CollectionUtil.iteratorToList) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) AbstractInput(org.apache.flink.streaming.api.operators.AbstractInput) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) ReadOnlyBroadcastState(org.apache.flink.api.common.state.ReadOnlyBroadcastState) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ClassRule(org.junit.ClassRule) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Time(org.apache.flink.api.common.time.Time) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Input(org.apache.flink.streaming.api.operators.Input) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Example 5 with BroadcastStream

use of org.apache.flink.streaming.api.datastream.BroadcastStream in project flink by apache.

the class DataStreamBatchExecutionITCase method batchKeyedBroadcastExecution.

/**
 * Verifies that all broadcast input is processed before keyed input.
 */
@Test
public void batchKeyedBroadcastExecution() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    env.setRuntimeMode(RuntimeExecutionMode.BATCH);
    DataStream<Tuple2<String, Integer>> bcInput = env.fromElements(Tuple2.of("bc1", 1), Tuple2.of("bc2", 2), Tuple2.of("bc3", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular1", 1), Tuple2.of("regular1", 2), Tuple2.of("regular2", 2), Tuple2.of("regular1", 3), Tuple2.of("regular1", 4), Tuple2.of("regular1", 3), Tuple2.of("regular2", 5), Tuple2.of("regular1", 5), Tuple2.of("regular2", 3), Tuple2.of("regular1", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
    BroadcastStream<Tuple2<String, Integer>> broadcastStream = bcInput.broadcast(STATE_DESCRIPTOR);
    DataStream<String> result = regularInput.keyBy((input) -> input.f0).connect(broadcastStream).process(new TestKeyedBroadcastFunction());
    try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
        List<String> results = CollectionUtil.iteratorToList(resultIterator);
        assertThat(results, equalTo(Arrays.asList("(regular1,1): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,2): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,4): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,5): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular2,2): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular2,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular2,5): [bc2=bc2, bc1=bc1, bc3=bc3]")));
    }
}
Also used : BroadcastState(org.apache.flink.api.common.state.BroadcastState) Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BroadcastStream(org.apache.flink.streaming.api.datastream.BroadcastStream) MultipleConnectedStreams(org.apache.flink.streaming.api.datastream.MultipleConnectedStreams) CollectionUtil.iteratorToList(org.apache.flink.util.CollectionUtil.iteratorToList) CoreMatchers.equalTo(org.hamcrest.CoreMatchers.equalTo) AbstractStreamOperatorV2(org.apache.flink.streaming.api.operators.AbstractStreamOperatorV2) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) RestartStrategies(org.apache.flink.api.common.restartstrategy.RestartStrategies) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) KeyedBroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction) MiniClusterResourceConfiguration(org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration) AbstractInput(org.apache.flink.streaming.api.operators.AbstractInput) BasicTypeInfo(org.apache.flink.api.common.typeinfo.BasicTypeInfo) Assert.assertThat(org.junit.Assert.assertThat) ListState(org.apache.flink.api.common.state.ListState) ReadOnlyBroadcastState(org.apache.flink.api.common.state.ReadOnlyBroadcastState) AbstractStreamOperatorFactory(org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) ClassRule(org.junit.ClassRule) MiniClusterWithClientResource(org.apache.flink.test.util.MiniClusterWithClientResource) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) KeyedMultipleInputTransformation(org.apache.flink.streaming.api.transformations.KeyedMultipleInputTransformation) WatermarkStrategy(org.apache.flink.api.common.eventtime.WatermarkStrategy) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) StreamOperatorParameters(org.apache.flink.streaming.api.operators.StreamOperatorParameters) Test(org.junit.Test) CollectionUtil(org.apache.flink.util.CollectionUtil) AbstractStreamOperator(org.apache.flink.streaming.api.operators.AbstractStreamOperator) DataStream(org.apache.flink.streaming.api.datastream.DataStream) StreamOperator(org.apache.flink.streaming.api.operators.StreamOperator) MultipleInputStreamOperator(org.apache.flink.streaming.api.operators.MultipleInputStreamOperator) CloseableIterator(org.apache.flink.util.CloseableIterator) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) BroadcastProcessFunction(org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction) RuntimeExecutionMode(org.apache.flink.api.common.RuntimeExecutionMode) Time(org.apache.flink.api.common.time.Time) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Input(org.apache.flink.streaming.api.operators.Input) Tuple2(org.apache.flink.api.java.tuple.Tuple2) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Test(org.junit.Test)

Aggregations

MapStateDescriptor (org.apache.flink.api.common.state.MapStateDescriptor)5 BasicTypeInfo (org.apache.flink.api.common.typeinfo.BasicTypeInfo)5 BroadcastStream (org.apache.flink.streaming.api.datastream.BroadcastStream)5 DataStream (org.apache.flink.streaming.api.datastream.DataStream)5 StreamExecutionEnvironment (org.apache.flink.streaming.api.environment.StreamExecutionEnvironment)5 BroadcastProcessFunction (org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction)5 KeyedBroadcastProcessFunction (org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction)5 Collector (org.apache.flink.util.Collector)5 Test (org.junit.Test)5 List (java.util.List)4 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)4 SingleOutputStreamOperator (org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator)4 Arrays (java.util.Arrays)3 WatermarkStrategy (org.apache.flink.api.common.eventtime.WatermarkStrategy)3 DataStreamSource (org.apache.flink.streaming.api.datastream.DataStreamSource)3 KeyedStream (org.apache.flink.streaming.api.datastream.KeyedStream)3 StreamOperator (org.apache.flink.streaming.api.operators.StreamOperator)3 CoreMatchers.equalTo (org.hamcrest.CoreMatchers.equalTo)3 Assert.assertThat (org.junit.Assert.assertThat)3 HashMap (java.util.HashMap)2