use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class BroadcastStateITCase method testKeyedWithBroadcastTranslation.
@Test
public void testKeyedWithBroadcastTranslation() throws Exception {
final MapStateDescriptor<Long, String> utterDescriptor = new MapStateDescriptor<>("broadcast-state", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO);
final Map<Long, String> expected = new HashMap<>();
expected.put(0L, "test:0");
expected.put(1L, "test:1");
expected.put(2L, "test:2");
expected.put(3L, "test:3");
expected.put(4L, "test:4");
expected.put(5L, "test:5");
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final DataStream<Long> srcOne = env.generateSequence(0L, 5L).assignTimestampsAndWatermarks(new CustomWmEmitter<Long>() {
private static final long serialVersionUID = -8500904795760316195L;
@Override
public long extractTimestamp(Long element, long previousElementTimestamp) {
return element;
}
}).keyBy((KeySelector<Long, Long>) value -> value);
final DataStream<String> srcTwo = env.fromCollection(expected.values()).assignTimestampsAndWatermarks(new CustomWmEmitter<String>() {
private static final long serialVersionUID = -2148318224248467213L;
@Override
public long extractTimestamp(String element, long previousElementTimestamp) {
return Long.parseLong(element.split(":")[1]);
}
});
final BroadcastStream<String> broadcast = srcTwo.broadcast(utterDescriptor);
// the timestamp should be high enough to trigger the timer after all the elements arrive.
final DataStream<String> output = srcOne.connect(broadcast).process(new TestKeyedBroadcastProcessFunction(100000L, expected));
output.addSink(new TestSink(expected.size())).setParallelism(1);
env.execute();
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class SortingBoundedInputITCase method testThreeInputOperator.
@Test
public void testThreeInputOperator() {
long numberOfRecords = 500_000;
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Configuration config = new Configuration();
config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
env.configure(config, this.getClass().getClassLoader());
KeyedStream<Tuple2<Integer, byte[]>, Object> elements1 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)).keyBy(el -> el.f0);
KeyedStream<Tuple2<Integer, byte[]>, Object> elements2 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)).keyBy(el -> el.f0);
KeyedStream<Tuple2<Integer, byte[]>, Object> elements3 = env.fromParallelCollection(new InputGenerator(numberOfRecords), new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)).keyBy(el -> el.f0);
KeyedMultipleInputTransformation<Long> assertingTransformation = new KeyedMultipleInputTransformation<>("Asserting operator", new AssertingThreeInputOperatorFactory(), BasicTypeInfo.LONG_TYPE_INFO, -1, BasicTypeInfo.INT_TYPE_INFO);
assertingTransformation.addInput(elements1.getTransformation(), elements1.getKeySelector());
assertingTransformation.addInput(elements2.getTransformation(), elements2.getKeySelector());
assertingTransformation.addInput(elements3.getTransformation(), elements3.getKeySelector());
env.addOperator(assertingTransformation);
DataStream<Long> counts = new DataStream<>(env, assertingTransformation);
long sum = CollectionUtil.iteratorToList(DataStreamUtils.collect(counts)).stream().mapToLong(l -> l).sum();
assertThat(sum, equalTo(numberOfRecords * 3));
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class SortingBoundedInputITCase method testBatchExecutionWithTimersOneInput.
@Test
public void testBatchExecutionWithTimersOneInput() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// set parallelism to 1 to have consistent order of results
env.setParallelism(1);
Configuration config = new Configuration();
config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
env.configure(config, this.getClass().getClassLoader());
WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
SingleOutputStreamOperator<Tuple2<Integer, Integer>> elements = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
Tuple2.of(2, 3), // late element
Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy);
OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements.map(element -> element.f0).keyBy(element -> element).process(new KeyedProcessFunction<Integer, Integer, Tuple3<Long, Integer, Integer>>() {
private MapState<Long, Integer> countState;
private ValueState<Long> previousTimestampState;
@Override
public void open(Configuration parameters) {
countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
}
@Override
public void processElement(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
Long elementTimestamp = ctx.timestamp();
long nextTen = ((elementTimestamp + 10) / 10) * 10;
ctx.timerService().registerEventTimeTimer(nextTen);
if (elementTimestamp < ctx.timerService().currentWatermark()) {
ctx.output(lateElements, value);
} else {
Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
previousTimestampState.update(elementTimestamp);
Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
countState.put(nextTen, currentCount + 1);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
countState.remove(timestamp);
// this would go in infinite loop if we did not quiesce the
// timer service.
ctx.timerService().registerEventTimeTimer(timestamp + 1);
}
});
DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
assertTrue(lateRecordsCollected.isEmpty());
assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 4), Tuple3.of(20L, 1, 3), Tuple3.of(10L, 2, 2), Tuple3.of(20L, 2, 1))));
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class SortingBoundedInputITCase method testBatchExecutionWithTimersTwoInput.
@Test
public void testBatchExecutionWithTimersTwoInput() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// set parallelism to 1 to have consistent order of results
env.setParallelism(1);
Configuration config = new Configuration();
config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
env.configure(config, this.getClass().getClassLoader());
WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
SingleOutputStreamOperator<Integer> elements1 = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
Tuple2.of(2, 3), // late element
Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy).map(element -> element.f0);
SingleOutputStreamOperator<Integer> elements2 = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
Tuple2.of(2, 3), // late element
Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy).map(element -> element.f0);
OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements1.connect(elements2).keyBy(element -> element, element -> element).process(new KeyedCoProcessFunction<Integer, Integer, Integer, Tuple3<Long, Integer, Integer>>() {
private MapState<Long, Integer> countState;
private ValueState<Long> previousTimestampState;
@Override
public void open(Configuration parameters) {
countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
}
@Override
public void processElement1(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
processElement(value, ctx);
}
@Override
public void processElement2(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
processElement(value, ctx);
}
private void processElement(Integer value, Context ctx) throws Exception {
Long elementTimestamp = ctx.timestamp();
long nextTen = ((elementTimestamp + 10) / 10) * 10;
ctx.timerService().registerEventTimeTimer(nextTen);
if (elementTimestamp < ctx.timerService().currentWatermark()) {
ctx.output(lateElements, value);
} else {
Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
previousTimestampState.update(elementTimestamp);
Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
countState.put(nextTen, currentCount + 1);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
countState.remove(timestamp);
// this would go in infinite loop if we did not quiesce the
// timer service.
ctx.timerService().registerEventTimeTimer(timestamp + 1);
}
});
DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
assertTrue(lateRecordsCollected.isEmpty());
assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 8), Tuple3.of(20L, 1, 6), Tuple3.of(10L, 2, 4), Tuple3.of(20L, 2, 2))));
}
use of org.apache.flink.streaming.api.datastream.DataStream in project flink by apache.
the class SourceNAryInputChainingITCase method createProgramWithMultipleUnionInputs.
/**
* Creates a DataStream program as shown below.
*
* <pre>
* +--------------+
* (src 1) --> (map) --> | |
* | |
* (src 2) --+ | |
* +-- UNION --> | |
* (src 3) --+ | N-Ary |
* | Operator |
* (src 4) -> (map) --+ | |
* +-- UNION -> | |
* (src 5) -> (map) --+ | |
* | |
* (src 6) --> | |
* +--------------+
* </pre>
*/
private DataStream<Long> createProgramWithMultipleUnionInputs() {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(PARALLELISM);
env.getConfig().enableObjectReuse();
final DataStream<Long> source1 = env.fromSource(new NumberSequenceSource(1L, 10L), WatermarkStrategy.noWatermarks(), "source-1");
final DataStream<Long> source2 = env.fromSource(new NumberSequenceSource(11L, 20L), WatermarkStrategy.noWatermarks(), "source-2");
final DataStream<Long> source3 = env.fromSource(new NumberSequenceSource(21L, 30L), WatermarkStrategy.noWatermarks(), "source-3");
final DataStream<Long> source4 = env.fromSource(new NumberSequenceSource(31L, 40L), WatermarkStrategy.noWatermarks(), "source-4");
final DataStream<Long> source5 = env.fromSource(new NumberSequenceSource(41L, 50L), WatermarkStrategy.noWatermarks(), "source-5");
final DataStream<Long> source6 = env.fromSource(new NumberSequenceSource(51L, 60L), WatermarkStrategy.noWatermarks(), "source-6");
return nAryInputStreamOperation(source1.map((v) -> v), source2.union(source3), source4.map((v) -> v).union(source5.map((v) -> v)), source6);
}
Aggregations