use of org.apache.flink.streaming.api.datastream.BroadcastStream in project flink by apache.
the class StreamGraphGeneratorTest method testUnalignedCheckpointDisabledOnPointwise.
@Test
public void testUnalignedCheckpointDisabledOnPointwise() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(42);
DataStream<Long> source1 = env.fromSequence(1L, 10L);
DataStream<Long> map1 = source1.forward().map(l -> l);
DataStream<Long> source2 = env.fromSequence(2L, 11L);
DataStream<Long> map2 = source2.shuffle().map(l -> l);
final MapStateDescriptor<Long, Long> descriptor = new MapStateDescriptor<>("broadcast", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO);
final BroadcastStream<Long> broadcast = map1.broadcast(descriptor);
final SingleOutputStreamOperator<Long> joined = map2.connect(broadcast).process(new BroadcastProcessFunction<Long, Long, Long>() {
@Override
public void processElement(Long value, ReadOnlyContext ctx, Collector<Long> out) {
}
@Override
public void processBroadcastElement(Long value, Context ctx, Collector<Long> out) {
}
});
DataStream<Long> map3 = joined.shuffle().map(l -> l);
DataStream<Long> map4 = map3.rescale().map(l -> l).setParallelism(1337);
StreamGraph streamGraph = env.getStreamGraph();
assertEquals(7, streamGraph.getStreamNodes().size());
// forward
assertThat(edge(streamGraph, source1, map1), supportsUnalignedCheckpoints(false));
// shuffle
assertThat(edge(streamGraph, source2, map2), supportsUnalignedCheckpoints(true));
// broadcast, but other channel is forwarded
assertThat(edge(streamGraph, map1, joined), supportsUnalignedCheckpoints(false));
// forward
assertThat(edge(streamGraph, map2, joined), supportsUnalignedCheckpoints(false));
// shuffle
assertThat(edge(streamGraph, joined, map3), supportsUnalignedCheckpoints(true));
// rescale
assertThat(edge(streamGraph, map3, map4), supportsUnalignedCheckpoints(false));
}
use of org.apache.flink.streaming.api.datastream.BroadcastStream in project flink by apache.
the class BroadcastStateITCase method testKeyedWithBroadcastTranslation.
@Test
public void testKeyedWithBroadcastTranslation() throws Exception {
final MapStateDescriptor<Long, String> utterDescriptor = new MapStateDescriptor<>("broadcast-state", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
final Map<Long, String> expected = new HashMap<>();
expected.put(0L, "test:0");
expected.put(1L, "test:1");
expected.put(2L, "test:2");
expected.put(3L, "test:3");
expected.put(4L, "test:4");
expected.put(5L, "test:5");
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final DataStream<Long> srcOne = env.generateSequence(0L, 5L).assignTimestampsAndWatermarks(new CustomWmEmitter<Long>() {
private static final long serialVersionUID = -8500904795760316195L;
@Override
public long extractTimestamp(Long element, long previousElementTimestamp) {
return element;
}
}).keyBy((KeySelector<Long, Long>) value -> value);
final DataStream<String> srcTwo = env.fromCollection(expected.values()).assignTimestampsAndWatermarks(new CustomWmEmitter<String>() {
private static final long serialVersionUID = -2148318224248467213L;
@Override
public long extractTimestamp(String element, long previousElementTimestamp) {
return Long.parseLong(element.split(":")[1]);
}
});
final BroadcastStream<String> broadcast = srcTwo.broadcast(utterDescriptor);
// the timestamp should be high enough to trigger the timer after all the elements arrive.
final DataStream<String> output = srcOne.connect(broadcast).process(new TestKeyedBroadcastProcessFunction(100000L, expected));
output.addSink(new TestSink(expected.size())).setParallelism(1);
env.execute();
}
use of org.apache.flink.streaming.api.datastream.BroadcastStream in project flink by apache.
the class DataStreamTest method testFailedTranslationOnKeyed.
/**
* Tests that with a {@link KeyedStream} we have to provide a {@link
* KeyedBroadcastProcessFunction}.
*/
@Test
public void testFailedTranslationOnKeyed() {
final MapStateDescriptor<Long, String> descriptor = new MapStateDescriptor<>("broadcast", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final DataStream<Long> srcOne = env.generateSequence(0L, 5L).assignTimestampsAndWatermarks(new CustomWmEmitter<Long>() {
@Override
public long extractTimestamp(Long element, long previousElementTimestamp) {
return element;
}
}).keyBy((KeySelector<Long, Long>) value -> value);
final DataStream<String> srcTwo = env.fromElements("Test:0", "Test:1", "Test:2", "Test:3", "Test:4", "Test:5").assignTimestampsAndWatermarks(new CustomWmEmitter<String>() {
@Override
public long extractTimestamp(String element, long previousElementTimestamp) {
return Long.parseLong(element.split(":")[1]);
}
});
BroadcastStream<String> broadcast = srcTwo.broadcast(descriptor);
BroadcastConnectedStream<Long, String> bcStream = srcOne.connect(broadcast);
expectedException.expect(IllegalArgumentException.class);
bcStream.process(new BroadcastProcessFunction<Long, String, String>() {
@Override
public void processBroadcastElement(String value, Context ctx, Collector<String> out) throws Exception {
// do nothing
}
@Override
public void processElement(Long value, ReadOnlyContext ctx, Collector<String> out) throws Exception {
// do nothing
}
});
}
use of org.apache.flink.streaming.api.datastream.BroadcastStream in project flink by apache.
the class DataStreamBatchExecutionITCase method batchBroadcastExecution.
/**
* Verifies that all broadcast input is processed before regular input.
*/
@Test
public void batchBroadcastExecution() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
DataStream<Tuple2<String, Integer>> bcInput = env.fromElements(Tuple2.of("bc1", 1), Tuple2.of("bc2", 2), Tuple2.of("bc3", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular1", 1), Tuple2.of("regular1", 2), Tuple2.of("regular1", 3), Tuple2.of("regular1", 4), Tuple2.of("regular1", 3), Tuple2.of("regular1", 5), Tuple2.of("regular1", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
BroadcastStream<Tuple2<String, Integer>> broadcastStream = bcInput.broadcast(STATE_DESCRIPTOR);
DataStream<String> result = regularInput.connect(broadcastStream).process(new TestBroadcastFunction());
try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
List<String> results = CollectionUtil.iteratorToList(resultIterator);
// regular, that is non-keyed input is not sorted by timestamp. For keyed inputs
// this is a by-product of the grouping/sorting we use to get the keyed groups.
assertThat(results, equalTo(Arrays.asList("(regular1,1): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,2): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,4): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,5): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]")));
}
}
use of org.apache.flink.streaming.api.datastream.BroadcastStream in project flink by apache.
the class DataStreamBatchExecutionITCase method batchKeyedBroadcastExecution.
/**
* Verifies that all broadcast input is processed before keyed input.
*/
@Test
public void batchKeyedBroadcastExecution() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
DataStream<Tuple2<String, Integer>> bcInput = env.fromElements(Tuple2.of("bc1", 1), Tuple2.of("bc2", 2), Tuple2.of("bc3", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
DataStream<Tuple2<String, Integer>> regularInput = env.fromElements(Tuple2.of("regular1", 1), Tuple2.of("regular1", 2), Tuple2.of("regular2", 2), Tuple2.of("regular1", 3), Tuple2.of("regular1", 4), Tuple2.of("regular1", 3), Tuple2.of("regular2", 5), Tuple2.of("regular1", 5), Tuple2.of("regular2", 3), Tuple2.of("regular1", 3)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>forMonotonousTimestamps().withTimestampAssigner((in, ts) -> in.f1));
BroadcastStream<Tuple2<String, Integer>> broadcastStream = bcInput.broadcast(STATE_DESCRIPTOR);
DataStream<String> result = regularInput.keyBy((input) -> input.f0).connect(broadcastStream).process(new TestKeyedBroadcastFunction());
try (CloseableIterator<String> resultIterator = result.executeAndCollect()) {
List<String> results = CollectionUtil.iteratorToList(resultIterator);
assertThat(results, equalTo(Arrays.asList("(regular1,1): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,2): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,4): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular1,5): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular2,2): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular2,3): [bc2=bc2, bc1=bc1, bc3=bc3]", "(regular2,5): [bc2=bc2, bc1=bc1, bc3=bc3]")));
}
}
Aggregations