use of org.joda.time.Instant in project beam by apache.
the class InMemoryStateInternalsTest method testWatermarkLatestState.
@Test
public void testWatermarkLatestState() throws Exception {
WatermarkHoldState value = underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
// State instances are cached, but depend on the namespace.
assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR));
assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR)));
assertThat(value.read(), Matchers.nullValue());
value.add(new Instant(2000));
assertThat(value.read(), equalTo(new Instant(2000)));
value.add(new Instant(3000));
assertThat(value.read(), equalTo(new Instant(3000)));
value.add(new Instant(1000));
assertThat(value.read(), equalTo(new Instant(3000)));
value.clear();
assertThat(value.read(), equalTo(null));
assertThat(underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR), Matchers.sameInstance(value));
}
use of org.joda.time.Instant in project beam by apache.
the class InMemoryStateInternalsTest method testWatermarkEarliestState.
@Test
public void testWatermarkEarliestState() throws Exception {
WatermarkHoldState value = underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
// State instances are cached, but depend on the namespace.
assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR));
assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR)));
assertThat(value.read(), Matchers.nullValue());
value.add(new Instant(2000));
assertThat(value.read(), equalTo(new Instant(2000)));
value.add(new Instant(3000));
assertThat(value.read(), equalTo(new Instant(2000)));
value.add(new Instant(1000));
assertThat(value.read(), equalTo(new Instant(1000)));
value.clear();
assertThat(value.read(), equalTo(null));
assertThat(underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR), Matchers.sameInstance(value));
}
use of org.joda.time.Instant in project beam by apache.
the class StatefulDoFnRunner method isLate.
private boolean isLate(BoundedWindow window) {
Instant gcTime = LateDataUtils.garbageCollectionTime(window, windowingStrategy);
Instant inputWM = cleanupTimer.currentInputWatermarkTime();
return gcTime.isBefore(inputWM);
}
use of org.joda.time.Instant in project beam by apache.
the class WatermarkHold method addGarbageCollectionHold.
/**
* Attempt to add a 'garbage collection hold' if it is required. Return the {@link Instant} at
* which the hold was added (ie the end of window time plus allowed lateness),
* or {@literal null} if no hold was added.
*
* <p>We only add the hold if it is distinct from what would be added by
* {@link #addEndOfWindowHold}. In other words, {@link WindowingStrategy#getAllowedLateness}
* must be non-zero.
*
* <p>A garbage collection hold is added in two situations:
* <ol>
* <li>An incoming element came in behind the output watermark, and was too late for placing
* the usual element hold or an end of window hold. Place the garbage collection hold so that
* we can guarantee when the pane is finally triggered its output will not be dropped due to
* excessive lateness by any downstream computation.
* <li>The {@link WindowingStrategy#getClosingBehavior()} is
* {@link ClosingBehavior#FIRE_ALWAYS}, and thus we guarantee a final pane will be emitted
* for all windows which saw at least one element. Again, the garbage collection hold guarantees
* that any empty final pane can be given a timestamp which will not be considered beyond
* allowed lateness by any downstream computation.
* </ol>
*
* <p>We use {@code paneIsEmpty} to distinguish cases 1 and 2.
*/
@Nullable
private Instant addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context<?, ?, ?, W> context, boolean paneIsEmpty) {
Instant outputWM = timerInternals.currentOutputWatermarkTime();
Instant inputWM = timerInternals.currentInputWatermarkTime();
Instant gcHold = LateDataUtils.garbageCollectionTime(context.window(), windowingStrategy);
if (!windowingStrategy.getAllowedLateness().isLongerThan(Duration.ZERO)) {
WindowTracing.trace("WatermarkHold.addGarbageCollectionHold: garbage collection hold at {} is unnecessary " + "since no allowed lateness for key:{}; window:{}; inputWatermark:{}; " + "outputWatermark:{}", gcHold, context.key(), context.window(), inputWM, outputWM);
return null;
}
if (paneIsEmpty && context.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_IF_NON_EMPTY) {
WindowTracing.trace("WatermarkHold.addGarbageCollectionHold: garbage collection hold at {} is unnecessary " + "since empty pane and FIRE_IF_NON_EMPTY for key:{}; window:{}; inputWatermark:{}; " + "outputWatermark:{}", gcHold, context.key(), context.window(), inputWM, outputWM);
return null;
}
if (!gcHold.isBefore(BoundedWindow.TIMESTAMP_MAX_VALUE)) {
// If the garbage collection hold is past the timestamp we can represent, instead truncate
// to the maximum timestamp that is not positive infinity. This ensures all windows will
// eventually be garbage collected.
gcHold = BoundedWindow.TIMESTAMP_MAX_VALUE.minus(Duration.millis(1L));
}
checkState(!gcHold.isBefore(inputWM), "Garbage collection hold %s cannot be before input watermark %s", gcHold, inputWM);
checkState(!gcHold.isAfter(BoundedWindow.TIMESTAMP_MAX_VALUE), "Garbage collection hold %s is beyond end-of-time", gcHold);
// Same EXTRA_HOLD_TAG vs elementHoldTag discussion as in addEndOfWindowHold above.
context.state().access(EXTRA_HOLD_TAG).add(gcHold);
WindowTracing.trace("WatermarkHold.addGarbageCollectionHold: garbage collection hold at {} is on time for " + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}", gcHold, context.key(), context.window(), inputWM, outputWM);
return gcHold;
}
use of org.joda.time.Instant in project beam by apache.
the class WatermarkHold method addEndOfWindowHold.
/**
* Attempt to add an 'end-of-window hold'. Return the {@link Instant} at which the hold was added
* (ie the end of window time), or {@literal null} if no end of window hold is possible and we
* should fallback to a garbage collection hold.
*
* <p>We only add the hold if we can be sure a timer will be set (by {@link ReduceFnRunner})
* to clear it. In other words, the input watermark cannot be ahead of the end of window time.
*
* <p>An end-of-window hold is added in two situations:
* <ol>
* <li>An incoming element came in behind the output watermark (so we are too late for placing
* the usual element hold), but it may still be possible to include the element in an
* {@link Timing#ON_TIME} pane. We place the end of window hold to ensure that pane will
* not be considered late by any downstream computation.
* <li>We guarantee an {@link Timing#ON_TIME} pane will be emitted for all windows which saw at
* least one element, even if that {@link Timing#ON_TIME} pane is empty. Thus when elements in
* a pane are processed due to a fired trigger we must set both an end of window timer and an end
* of window hold. Again, the hold ensures the {@link Timing#ON_TIME} pane will not be considered
* late by any downstream computation.
* </ol>
*/
@Nullable
private Instant addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context<?, ?, ?, W> context, boolean paneIsEmpty) {
Instant outputWM = timerInternals.currentOutputWatermarkTime();
Instant inputWM = timerInternals.currentInputWatermarkTime();
Instant eowHold = context.window().maxTimestamp();
if (eowHold.isBefore(inputWM)) {
WindowTracing.trace("WatermarkHold.addEndOfWindowHold: end-of-window hold at {} is too late for " + "end-of-window timer for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}", eowHold, context.key(), context.window(), inputWM, outputWM);
return null;
}
checkState(outputWM == null || !eowHold.isBefore(outputWM), "End-of-window hold %s cannot be before output watermark %s", eowHold, outputWM);
checkState(!eowHold.isAfter(BoundedWindow.TIMESTAMP_MAX_VALUE), "End-of-window hold %s is beyond end-of-time", eowHold);
// If paneIsEmpty then this hold is just for empty ON_TIME panes, so we want to keep
// the hold away from the combining function in elementHoldTag.
// However if !paneIsEmpty then it could make sense to use the elementHoldTag here.
// Alas, onMerge is forced to add an end of window or garbage collection hold without
// knowing whether an element hold is already in place (stopping to check is too expensive).
// This it would end up adding an element hold at the end of the window which could
// upset the elementHoldTag combining function.
context.state().access(EXTRA_HOLD_TAG).add(eowHold);
WindowTracing.trace("WatermarkHold.addEndOfWindowHold: end-of-window hold at {} is on time for " + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}", eowHold, context.key(), context.window(), inputWM, outputWM);
return eowHold;
}
Aggregations