Search in sources :

Example 51 with Instant

use of org.joda.time.Instant in project beam by apache.

the class InMemoryStateInternalsTest method testWatermarkLatestState.

@Test
public void testWatermarkLatestState() throws Exception {
    WatermarkHoldState value = underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
    // State instances are cached, but depend on the namespace.
    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR));
    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR)));
    assertThat(value.read(), Matchers.nullValue());
    value.add(new Instant(2000));
    assertThat(value.read(), equalTo(new Instant(2000)));
    value.add(new Instant(3000));
    assertThat(value.read(), equalTo(new Instant(3000)));
    value.add(new Instant(1000));
    assertThat(value.read(), equalTo(new Instant(3000)));
    value.clear();
    assertThat(value.read(), equalTo(null));
    assertThat(underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR), Matchers.sameInstance(value));
}
Also used : Instant(org.joda.time.Instant) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Test(org.junit.Test)

Example 52 with Instant

use of org.joda.time.Instant in project beam by apache.

the class InMemoryStateInternalsTest method testWatermarkEarliestState.

@Test
public void testWatermarkEarliestState() throws Exception {
    WatermarkHoldState value = underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
    // State instances are cached, but depend on the namespace.
    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR));
    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR)));
    assertThat(value.read(), Matchers.nullValue());
    value.add(new Instant(2000));
    assertThat(value.read(), equalTo(new Instant(2000)));
    value.add(new Instant(3000));
    assertThat(value.read(), equalTo(new Instant(2000)));
    value.add(new Instant(1000));
    assertThat(value.read(), equalTo(new Instant(1000)));
    value.clear();
    assertThat(value.read(), equalTo(null));
    assertThat(underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR), Matchers.sameInstance(value));
}
Also used : Instant(org.joda.time.Instant) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Test(org.junit.Test)

Example 53 with Instant

use of org.joda.time.Instant in project beam by apache.

the class StatefulDoFnRunner method isLate.

private boolean isLate(BoundedWindow window) {
    Instant gcTime = LateDataUtils.garbageCollectionTime(window, windowingStrategy);
    Instant inputWM = cleanupTimer.currentInputWatermarkTime();
    return gcTime.isBefore(inputWM);
}
Also used : Instant(org.joda.time.Instant)

Example 54 with Instant

use of org.joda.time.Instant in project beam by apache.

the class WatermarkHold method addGarbageCollectionHold.

/**
   * Attempt to add a 'garbage collection hold' if it is required. Return the {@link Instant} at
   * which the hold was added (ie the end of window time plus allowed lateness),
   * or {@literal null} if no hold was added.
   *
   * <p>We only add the hold if it is distinct from what would be added by
   * {@link #addEndOfWindowHold}. In other words, {@link WindowingStrategy#getAllowedLateness}
   * must be non-zero.
   *
   * <p>A garbage collection hold is added in two situations:
   * <ol>
   * <li>An incoming element came in behind the output watermark, and was too late for placing
   * the usual element hold or an end of window hold. Place the garbage collection hold so that
   * we can guarantee when the pane is finally triggered its output will not be dropped due to
   * excessive lateness by any downstream computation.
   * <li>The {@link WindowingStrategy#getClosingBehavior()} is
   * {@link ClosingBehavior#FIRE_ALWAYS}, and thus we guarantee a final pane will be emitted
   * for all windows which saw at least one element. Again, the garbage collection hold guarantees
   * that any empty final pane can be given a timestamp which will not be considered beyond
   * allowed lateness by any downstream computation.
   * </ol>
   *
   * <p>We use {@code paneIsEmpty} to distinguish cases 1 and 2.
   */
@Nullable
private Instant addGarbageCollectionHold(ReduceFn<?, ?, ?, W>.Context<?, ?, ?, W> context, boolean paneIsEmpty) {
    Instant outputWM = timerInternals.currentOutputWatermarkTime();
    Instant inputWM = timerInternals.currentInputWatermarkTime();
    Instant gcHold = LateDataUtils.garbageCollectionTime(context.window(), windowingStrategy);
    if (!windowingStrategy.getAllowedLateness().isLongerThan(Duration.ZERO)) {
        WindowTracing.trace("WatermarkHold.addGarbageCollectionHold: garbage collection hold at {} is unnecessary " + "since no allowed lateness for key:{}; window:{}; inputWatermark:{}; " + "outputWatermark:{}", gcHold, context.key(), context.window(), inputWM, outputWM);
        return null;
    }
    if (paneIsEmpty && context.windowingStrategy().getClosingBehavior() == ClosingBehavior.FIRE_IF_NON_EMPTY) {
        WindowTracing.trace("WatermarkHold.addGarbageCollectionHold: garbage collection hold at {} is unnecessary " + "since empty pane and FIRE_IF_NON_EMPTY for key:{}; window:{}; inputWatermark:{}; " + "outputWatermark:{}", gcHold, context.key(), context.window(), inputWM, outputWM);
        return null;
    }
    if (!gcHold.isBefore(BoundedWindow.TIMESTAMP_MAX_VALUE)) {
        // If the garbage collection hold is past the timestamp we can represent, instead truncate
        // to the maximum timestamp that is not positive infinity. This ensures all windows will
        // eventually be garbage collected.
        gcHold = BoundedWindow.TIMESTAMP_MAX_VALUE.minus(Duration.millis(1L));
    }
    checkState(!gcHold.isBefore(inputWM), "Garbage collection hold %s cannot be before input watermark %s", gcHold, inputWM);
    checkState(!gcHold.isAfter(BoundedWindow.TIMESTAMP_MAX_VALUE), "Garbage collection hold %s is beyond end-of-time", gcHold);
    // Same EXTRA_HOLD_TAG vs elementHoldTag discussion as in addEndOfWindowHold above.
    context.state().access(EXTRA_HOLD_TAG).add(gcHold);
    WindowTracing.trace("WatermarkHold.addGarbageCollectionHold: garbage collection hold at {} is on time for " + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}", gcHold, context.key(), context.window(), inputWM, outputWM);
    return gcHold;
}
Also used : Instant(org.joda.time.Instant) Nullable(javax.annotation.Nullable)

Example 55 with Instant

use of org.joda.time.Instant in project beam by apache.

the class WatermarkHold method addEndOfWindowHold.

/**
   * Attempt to add an 'end-of-window hold'. Return the {@link Instant} at which the hold was added
   * (ie the end of window time), or {@literal null} if no end of window hold is possible and we
   * should fallback to a garbage collection hold.
   *
   * <p>We only add the hold if we can be sure a timer will be set (by {@link ReduceFnRunner})
   * to clear it. In other words, the input watermark cannot be ahead of the end of window time.
   *
   * <p>An end-of-window hold is added in two situations:
   * <ol>
   * <li>An incoming element came in behind the output watermark (so we are too late for placing
   * the usual element hold), but it may still be possible to include the element in an
   * {@link Timing#ON_TIME} pane. We place the end of window hold to ensure that pane will
   * not be considered late by any downstream computation.
   * <li>We guarantee an {@link Timing#ON_TIME} pane will be emitted for all windows which saw at
   * least one element, even if that {@link Timing#ON_TIME} pane is empty. Thus when elements in
   * a pane are processed due to a fired trigger we must set both an end of window timer and an end
   * of window hold. Again, the hold ensures the {@link Timing#ON_TIME} pane will not be considered
   * late by any downstream computation.
   * </ol>
   */
@Nullable
private Instant addEndOfWindowHold(ReduceFn<?, ?, ?, W>.Context<?, ?, ?, W> context, boolean paneIsEmpty) {
    Instant outputWM = timerInternals.currentOutputWatermarkTime();
    Instant inputWM = timerInternals.currentInputWatermarkTime();
    Instant eowHold = context.window().maxTimestamp();
    if (eowHold.isBefore(inputWM)) {
        WindowTracing.trace("WatermarkHold.addEndOfWindowHold: end-of-window hold at {} is too late for " + "end-of-window timer for key:{}; window:{}; inputWatermark:{}; outputWatermark:{}", eowHold, context.key(), context.window(), inputWM, outputWM);
        return null;
    }
    checkState(outputWM == null || !eowHold.isBefore(outputWM), "End-of-window hold %s cannot be before output watermark %s", eowHold, outputWM);
    checkState(!eowHold.isAfter(BoundedWindow.TIMESTAMP_MAX_VALUE), "End-of-window hold %s is beyond end-of-time", eowHold);
    // If paneIsEmpty then this hold is just for empty ON_TIME panes, so we want to keep
    // the hold away from the combining function in elementHoldTag.
    // However if !paneIsEmpty then it could make sense  to use the elementHoldTag here.
    // Alas, onMerge is forced to add an end of window or garbage collection hold without
    // knowing whether an element hold is already in place (stopping to check is too expensive).
    // This it would end up adding an element hold at the end of the window which could
    // upset the elementHoldTag combining function.
    context.state().access(EXTRA_HOLD_TAG).add(eowHold);
    WindowTracing.trace("WatermarkHold.addEndOfWindowHold: end-of-window hold at {} is on time for " + "key:{}; window:{}; inputWatermark:{}; outputWatermark:{}", eowHold, context.key(), context.window(), inputWM, outputWM);
    return eowHold;
}
Also used : Instant(org.joda.time.Instant) Nullable(javax.annotation.Nullable)

Aggregations

Instant (org.joda.time.Instant)411 Test (org.junit.Test)326 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)135 KV (org.apache.beam.sdk.values.KV)64 Category (org.junit.experimental.categories.Category)60 WindowedValue (org.apache.beam.sdk.util.WindowedValue)47 Duration (org.joda.time.Duration)44 ReadableInstant (org.joda.time.ReadableInstant)36 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)30 WatermarkHoldState (org.apache.beam.sdk.state.WatermarkHoldState)24 TimerData (org.apache.beam.runners.core.TimerInternals.TimerData)22 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)22 HashMap (java.util.HashMap)19 TransformWatermarks (org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks)18 StateNamespaceForTest (org.apache.beam.runners.core.StateNamespaceForTest)17 WindowMatchers.isSingleWindowedValue (org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue)17 WindowMatchers.isWindowedValue (org.apache.beam.runners.core.WindowMatchers.isWindowedValue)17 TupleTag (org.apache.beam.sdk.values.TupleTag)14 ArrayList (java.util.ArrayList)13 Map (java.util.Map)12