use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class DoFnOperatorTest method testTimersRestore.
@Test
public void testTimersRestore() throws Exception {
final Instant timerTimestamp = new Instant(1000);
final String outputMessage = "Timer fired";
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10_000)));
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
private static final String EVENT_TIMER_ID = "eventTimer";
@TimerId(EVENT_TIMER_ID)
private final TimerSpec eventTimer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@ProcessElement
public void processElement(ProcessContext context, @TimerId(EVENT_TIMER_ID) Timer timer) {
timer.set(timerTimestamp);
}
@OnTimer(EVENT_TIMER_ID)
public void onEventTime(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.timestamp());
context.outputWithTimestamp(outputMessage, context.timestamp());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
WindowedValue.FullWindowedValueCoder<Integer> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
WindowedValue.FullWindowedValueCoder<String> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
TupleTag<String> outputTag = new TupleTag<>("main-output");
final CoderTypeSerializer<WindowedValue<String>> outputSerializer = new CoderTypeSerializer<>(outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
CoderTypeInformation<ByteBuffer> keyCoderInfo = new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = createTestHarness(windowingStrategy, fn, inputCoder, outputCoder, keyCoder, outputTag, keyCoderInfo, keySelector);
testHarness.setup(outputSerializer);
testHarness.open();
testHarness.processWatermark(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10_000));
// this should register a timer
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
// snapshot and restore
final OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
testHarness.close();
testHarness = createTestHarness(windowingStrategy, fn, inputCoder, outputCoder, VarIntCoder.of(), outputTag, keyCoderInfo, keySelector);
testHarness.setup(outputSerializer);
testHarness.initializeState(snapshot);
testHarness.open();
// this must fire the timer
testHarness.processWatermark(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(outputMessage, timerTimestamp, window1, PaneInfo.NO_FIRING)));
testHarness.close();
}
use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class WindowingStrategyTranslation method toProto.
/**
* Converts a {@link WindowingStrategy} into a {@link RunnerApi.WindowingStrategy}, registering
* any components in the provided {@link SdkComponents}.
*/
public static RunnerApi.WindowingStrategy toProto(WindowingStrategy<?, ?> windowingStrategy, SdkComponents components) throws IOException {
WindowFn<?, ?> windowFn = windowingStrategy.getWindowFn();
FunctionSpec windowFnSpec = toProto(windowFn, components);
String environmentId = Strings.isNullOrEmpty(windowingStrategy.getEnvironmentId()) ? components.getEnvironmentIdFor(ResourceHints.create()) : windowingStrategy.getEnvironmentId();
RunnerApi.WindowingStrategy.Builder windowingStrategyProto = RunnerApi.WindowingStrategy.newBuilder().setOutputTime(toProto(windowingStrategy.getTimestampCombiner())).setAccumulationMode(toProto(windowingStrategy.getMode())).setClosingBehavior(toProto(windowingStrategy.getClosingBehavior())).setAllowedLateness(windowingStrategy.getAllowedLateness().getMillis()).setTrigger(TriggerTranslation.toProto(windowingStrategy.getTrigger())).setWindowFn(windowFnSpec).setAssignsToOneWindow(windowFn.assignsToOneWindow()).setMergeStatus(windowFn.isNonMerging() ? MergeStatus.Enum.NON_MERGING : (windowingStrategy.isAlreadyMerged() ? MergeStatus.Enum.ALREADY_MERGED : MergeStatus.Enum.NEEDS_MERGE)).setOnTimeBehavior(toProto(windowingStrategy.getOnTimeBehavior())).setWindowCoderId(components.registerCoder(windowFn.windowCoder())).setEnvironmentId(environmentId);
return windowingStrategyProto.build();
}
use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class LateDataUtils method dropExpiredWindows.
/**
* Returns an {@code Iterable<WindowedValue<InputT>>} that only contains non-late input elements.
*/
public static <K, V> Iterable<WindowedValue<V>> dropExpiredWindows(final K key, Iterable<WindowedValue<V>> elements, final TimerInternals timerInternals, final WindowingStrategy<?, ?> windowingStrategy, final CounterCell droppedDueToLateness) {
return FluentIterable.from(elements).transformAndConcat(// Explode windows to filter out expired ones
input -> {
if (input == null) {
return null;
}
return input.explodeWindows();
}).filter(input -> {
if (input == null) {
// drop null elements.
return false;
}
BoundedWindow window = Iterables.getOnlyElement(input.getWindows());
boolean expired = window.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).isBefore(timerInternals.currentInputWatermarkTime());
if (expired) {
// The element is too late for this window.
droppedDueToLateness.inc();
WindowTracing.debug("GroupAlsoByWindow: Dropping element at {} for key: {}; " + "window: {} since it is too far behind inputWatermark: {}", input.getTimestamp(), key, window, timerInternals.currentInputWatermarkTime());
}
// Keep the element if the window is not expired.
return !expired;
});
}
use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class DoFnOperatorTest method testLateDroppingForStatefulFn.
@Test
public void testLateDroppingForStatefulFn() throws Exception {
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
@StateId("state")
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context) {
context.output(context.element().toString());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
Coder<WindowedValue<Integer>> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
Coder<WindowedValue<String>> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
TupleTag<String> outputTag = new TupleTag<>("main-output");
DoFnOperator<Integer, String> doFnOperator = new DoFnOperator<>(fn, "stepName", inputCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), windowingStrategy, new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, /* key coder */
keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.open();
testHarness.processWatermark(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
// this should not be late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(9);
// this should still not be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(10);
// this should now be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
testHarness.close();
}
use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.
the class DoFnOperatorTest method testWatermarkContract.
/**
* This test specifically verifies that we correctly map Flink watermarks to Beam watermarks. In
* Beam, a watermark {@code T} guarantees there will not be elements with a timestamp {@code < T}
* in the future. In Flink, a watermark {@code T} guarantees there will not be elements with a
* timestamp {@code <= T} in the future. We have to make sure to take this into account when
* firing timers.
*
* <p>This does not test the timer API in general or processing-time timers because there are
* generic tests for this in {@code ParDoTest}.
*/
@Test
public void testWatermarkContract() throws Exception {
final Instant timerTimestamp = new Instant(1000);
final Instant timerOutputTimestamp = timerTimestamp.minus(Duration.millis(1));
final String eventTimeMessage = "Event timer fired: ";
final String processingTimeMessage = "Processing timer fired";
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10_000)));
final String eventTimerId = "eventTimer";
final String eventTimerId2 = "eventTimer2";
final String processingTimerId = "processingTimer";
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
@TimerId(eventTimerId)
private final TimerSpec eventTimer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@TimerId(eventTimerId2)
private final TimerSpec eventTimer2 = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@TimerId(processingTimerId)
private final TimerSpec processingTimer = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);
@ProcessElement
public void processElement(ProcessContext context, @TimerId(eventTimerId) Timer eventTimer, @TimerId(eventTimerId2) Timer eventTimerWithOutputTimestamp, @TimerId(processingTimerId) Timer processingTimer) {
eventTimer.set(timerTimestamp);
eventTimerWithOutputTimestamp.withOutputTimestamp(timerOutputTimestamp).set(timerTimestamp);
processingTimer.offset(Duration.millis(timerTimestamp.getMillis())).setRelative();
}
@OnTimer(eventTimerId)
public void onEventTime(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.timestamp());
context.outputWithTimestamp(eventTimeMessage + eventTimerId, context.timestamp());
}
@OnTimer(eventTimerId2)
public void onEventTime2(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.fireTimestamp());
context.output(eventTimeMessage + eventTimerId2);
}
@OnTimer(processingTimerId)
public void onProcessingTime(OnTimerContext context) {
assertEquals(// See SimpleDoFnRunner#onTimer
"Timer timestamp must match current input watermark", timerTimestamp.plus(Duration.millis(1)), context.timestamp());
context.outputWithTimestamp(processingTimeMessage, context.timestamp());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
WindowedValue.FullWindowedValueCoder<Integer> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
WindowedValue.FullWindowedValueCoder<String> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
TupleTag<String> outputTag = new TupleTag<>("main-output");
DoFnOperator<Integer, String> doFnOperator = new DoFnOperator<>(fn, "stepName", inputCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), windowingStrategy, new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, /* key coder */
keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.setup(new CoderTypeSerializer<>(outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())));
testHarness.open();
testHarness.processWatermark(0);
testHarness.setProcessingTime(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10_000));
// this should register the two timers above
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
// this does not yet fire the timers (in vanilla Flink it would)
testHarness.processWatermark(timerTimestamp.getMillis());
testHarness.setProcessingTime(timerTimestamp.getMillis());
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
assertThat(doFnOperator.keyedStateInternals.minWatermarkHoldMs(), is(timerOutputTimestamp.getMillis()));
// this must fire the event timers
testHarness.processWatermark(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), containsInAnyOrder(WindowedValue.of(eventTimeMessage + eventTimerId, timerTimestamp, window1, PaneInfo.NO_FIRING), WindowedValue.of(eventTimeMessage + eventTimerId2, timerTimestamp.minus(Duration.millis(1)), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
// this must fire the processing timer
testHarness.setProcessingTime(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(// See SimpleDoFnRunner#onTimer
processingTimeMessage, timerTimestamp.plus(Duration.millis(1)), window1, PaneInfo.NO_FIRING)));
testHarness.close();
}
Aggregations