use of org.apache.flink.api.common.eventtime.WatermarkStrategy in project flink by apache.
the class SortingBoundedInputITCase method testBatchExecutionWithTimersOneInput.
@Test
public void testBatchExecutionWithTimersOneInput() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// set parallelism to 1 to have consistent order of results
env.setParallelism(1);
Configuration config = new Configuration();
config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
env.configure(config, this.getClass().getClassLoader());
WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
SingleOutputStreamOperator<Tuple2<Integer, Integer>> elements = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
Tuple2.of(2, 3), // late element
Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy);
OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements.map(element -> element.f0).keyBy(element -> element).process(new KeyedProcessFunction<Integer, Integer, Tuple3<Long, Integer, Integer>>() {
private MapState<Long, Integer> countState;
private ValueState<Long> previousTimestampState;
@Override
public void open(Configuration parameters) {
countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
}
@Override
public void processElement(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
Long elementTimestamp = ctx.timestamp();
long nextTen = ((elementTimestamp + 10) / 10) * 10;
ctx.timerService().registerEventTimeTimer(nextTen);
if (elementTimestamp < ctx.timerService().currentWatermark()) {
ctx.output(lateElements, value);
} else {
Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
previousTimestampState.update(elementTimestamp);
Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
countState.put(nextTen, currentCount + 1);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
countState.remove(timestamp);
// this would go in infinite loop if we did not quiesce the
// timer service.
ctx.timerService().registerEventTimeTimer(timestamp + 1);
}
});
DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
assertTrue(lateRecordsCollected.isEmpty());
assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 4), Tuple3.of(20L, 1, 3), Tuple3.of(10L, 2, 2), Tuple3.of(20L, 2, 1))));
}
use of org.apache.flink.api.common.eventtime.WatermarkStrategy in project flink by apache.
the class SortingBoundedInputITCase method testBatchExecutionWithTimersTwoInput.
@Test
public void testBatchExecutionWithTimersTwoInput() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// set parallelism to 1 to have consistent order of results
env.setParallelism(1);
Configuration config = new Configuration();
config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH);
env.configure(config, this.getClass().getClassLoader());
WatermarkStrategy<Tuple2<Integer, Integer>> watermarkStrategy = WatermarkStrategy.forGenerator(ctx -> GENERATE_WATERMARK_AFTER_4_14_TIMESTAMP).withTimestampAssigner((r, previousTimestamp) -> r.f1);
SingleOutputStreamOperator<Integer> elements1 = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
Tuple2.of(2, 3), // late element
Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy).map(element -> element.f0);
SingleOutputStreamOperator<Integer> elements2 = env.fromElements(Tuple2.of(1, 3), Tuple2.of(1, 1), Tuple2.of(2, 1), Tuple2.of(1, 4), // late element
Tuple2.of(2, 3), // late element
Tuple2.of(1, 2), Tuple2.of(1, 13), Tuple2.of(1, 11), Tuple2.of(2, 14), // late element
Tuple2.of(1, 11)).assignTimestampsAndWatermarks(watermarkStrategy).map(element -> element.f0);
OutputTag<Integer> lateElements = new OutputTag<>("late_elements", BasicTypeInfo.INT_TYPE_INFO);
SingleOutputStreamOperator<Tuple3<Long, Integer, Integer>> sums = elements1.connect(elements2).keyBy(element -> element, element -> element).process(new KeyedCoProcessFunction<Integer, Integer, Integer, Tuple3<Long, Integer, Integer>>() {
private MapState<Long, Integer> countState;
private ValueState<Long> previousTimestampState;
@Override
public void open(Configuration parameters) {
countState = getRuntimeContext().getMapState(new MapStateDescriptor<>("sum", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO));
previousTimestampState = getRuntimeContext().getState(new ValueStateDescriptor<>("previousTimestamp", BasicTypeInfo.LONG_TYPE_INFO));
}
@Override
public void processElement1(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
processElement(value, ctx);
}
@Override
public void processElement2(Integer value, Context ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
processElement(value, ctx);
}
private void processElement(Integer value, Context ctx) throws Exception {
Long elementTimestamp = ctx.timestamp();
long nextTen = ((elementTimestamp + 10) / 10) * 10;
ctx.timerService().registerEventTimeTimer(nextTen);
if (elementTimestamp < ctx.timerService().currentWatermark()) {
ctx.output(lateElements, value);
} else {
Long previousTimestamp = Optional.ofNullable(previousTimestampState.value()).orElse(0L);
assertThat(elementTimestamp, greaterThanOrEqualTo(previousTimestamp));
previousTimestampState.update(elementTimestamp);
Integer currentCount = Optional.ofNullable(countState.get(nextTen)).orElse(0);
countState.put(nextTen, currentCount + 1);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Long, Integer, Integer>> out) throws Exception {
out.collect(Tuple3.of(timestamp, ctx.getCurrentKey(), countState.get(timestamp)));
countState.remove(timestamp);
// this would go in infinite loop if we did not quiesce the
// timer service.
ctx.timerService().registerEventTimeTimer(timestamp + 1);
}
});
DataStream<Integer> lateStream = sums.getSideOutput(lateElements);
List<Integer> lateRecordsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(lateStream));
List<Tuple3<Long, Integer, Integer>> sumsCollected = CollectionUtil.iteratorToList(DataStreamUtils.collect(sums));
assertTrue(lateRecordsCollected.isEmpty());
assertThat(sumsCollected, equalTo(Arrays.asList(Tuple3.of(10L, 1, 8), Tuple3.of(20L, 1, 6), Tuple3.of(10L, 2, 4), Tuple3.of(20L, 2, 2))));
}
use of org.apache.flink.api.common.eventtime.WatermarkStrategy in project flink by apache.
the class SavepointWindowReaderITCase method testApplyEvictorWindowStateReader.
@Test
public void testApplyEvictorWindowStateReader() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(getStateBackend());
env.setParallelism(4);
env.addSource(createSource(numbers)).rebalance().assignTimestampsAndWatermarks(WatermarkStrategy.<Integer>noWatermarks().withTimestampAssigner((event, timestamp) -> 0)).keyBy(id -> id).window(TumblingEventTimeWindows.of(Time.milliseconds(10))).evictor(new NoOpEvictor<>()).apply(new NoOpWindowFunction()).uid(uid).addSink(new DiscardingSink<>());
String savepointPath = takeSavepoint(env);
SavepointReader savepoint = SavepointReader.read(env, savepointPath, getStateBackend());
List<Integer> results = JobResultRetriever.collect(savepoint.window(TumblingEventTimeWindows.of(Time.milliseconds(1))).evictor().process(uid, new BasicReaderFunction(), Types.INT, Types.INT, Types.INT));
Assert.assertThat("Unexpected results from keyed state", results, Matchers.containsInAnyOrder(numbers));
}
use of org.apache.flink.api.common.eventtime.WatermarkStrategy in project flink by apache.
the class SavepointWriterWindowITCase method testSlideWindow.
@Test
public void testSlideWindow() throws Exception {
final String savepointPath = getTempDirPath(new AbstractID().toHexString());
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(stateBackend);
env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
DataStream<Tuple2<String, Integer>> bootstrapData = env.fromCollection(WORDS).map(word -> Tuple2.of(word, 1), TUPLE_TYPE_INFO).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String, Integer>>noWatermarks().withTimestampAssigner((record, ts) -> 2L));
WindowedStateTransformation<Tuple2<String, Integer>, String, TimeWindow> transformation = OperatorTransformation.bootstrapWith(bootstrapData).keyBy(tuple -> tuple.f0, Types.STRING).window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(1)));
SavepointWriter.newSavepoint(stateBackend, 128).withOperator(UID, windowBootstrap.bootstrap(transformation)).write(savepointPath);
env.execute("write state");
WindowedStream<Tuple2<String, Integer>, String, TimeWindow> stream = env.addSource(new MaxWatermarkSource<Tuple2<String, Integer>>()).returns(TUPLE_TYPE_INFO).keyBy(tuple -> tuple.f0).window(SlidingEventTimeWindows.of(Time.milliseconds(5), Time.milliseconds(1)));
DataStream<Tuple2<String, Integer>> windowed = windowStream.window(stream).uid(UID);
CompletableFuture<Collection<Tuple2<String, Integer>>> future = collector.collect(windowed);
submitJob(savepointPath, env);
Collection<Tuple2<String, Integer>> results = future.get().stream().distinct().collect(Collectors.toList());
Assert.assertThat("Incorrect results from bootstrapped windows", results, STANDARD_MATCHER);
}
use of org.apache.flink.api.common.eventtime.WatermarkStrategy in project flink by apache.
the class DataSetSavepointWindowReaderITCase method testAggregateEvictorWindowStateReader.
@Test
public void testAggregateEvictorWindowStateReader() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStateBackend(getStateBackend());
env.setParallelism(4);
env.addSource(createSource(numbers)).rebalance().assignTimestampsAndWatermarks(WatermarkStrategy.<Integer>noWatermarks().withTimestampAssigner((event, timestamp) -> 0)).keyBy(id -> id).window(TumblingEventTimeWindows.of(Time.milliseconds(10))).evictor(new NoOpEvictor<>()).aggregate(new AggregateSum()).uid(uid).addSink(new DiscardingSink<>());
String savepointPath = takeSavepoint(env);
ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment();
ExistingSavepoint savepoint = Savepoint.load(batchEnv, savepointPath, getStateBackend());
List<Integer> results = savepoint.window(TumblingEventTimeWindows.of(Time.milliseconds(10))).evictor().aggregate(uid, new AggregateSum(), Types.INT, Types.INT, Types.INT).collect();
Assert.assertThat("Unexpected results from keyed state", results, Matchers.containsInAnyOrder(numbers));
}
Aggregations