Search in sources :

Example 1 with WatermarkHoldState

use of org.apache.beam.sdk.state.WatermarkHoldState in project beam by apache.

the class StateMerging method mergeWatermarks.

/**
   * Merge all watermark state in {@code sources} (which must include {@code result} if non-empty)
   * into {@code result}, where the final merge result window is {@code mergeResult}.
   */
public static <W extends BoundedWindow> void mergeWatermarks(Collection<WatermarkHoldState> sources, WatermarkHoldState result, W resultWindow) {
    if (sources.isEmpty()) {
        // Nothing to merge.
        return;
    }
    if (sources.size() == 1 && sources.contains(result) && result.getTimestampCombiner().dependsOnlyOnEarliestTimestamp()) {
        // Nothing to merge.
        return;
    }
    if (result.getTimestampCombiner().dependsOnlyOnWindow()) {
        // Clear sources.
        for (WatermarkHoldState source : sources) {
            source.clear();
        }
        // Update directly from window-derived hold.
        Instant hold = result.getTimestampCombiner().assign(resultWindow, BoundedWindow.TIMESTAMP_MIN_VALUE);
        checkState(hold.isAfter(BoundedWindow.TIMESTAMP_MIN_VALUE));
        result.add(hold);
    } else {
        // Prefetch.
        List<ReadableState<Instant>> futures = new ArrayList<>(sources.size());
        for (WatermarkHoldState source : sources) {
            futures.add(source);
        }
        // Read.
        List<Instant> outputTimesToMerge = new ArrayList<>(sources.size());
        for (ReadableState<Instant> future : futures) {
            Instant sourceOutputTime = future.read();
            if (sourceOutputTime != null) {
                outputTimesToMerge.add(sourceOutputTime);
            }
        }
        // Clear sources.
        for (WatermarkHoldState source : sources) {
            source.clear();
        }
        if (!outputTimesToMerge.isEmpty()) {
            // Merge and update.
            result.add(result.getTimestampCombiner().merge(resultWindow, outputTimesToMerge));
        }
    }
}
Also used : Instant(org.joda.time.Instant) ArrayList(java.util.ArrayList) ReadableState(org.apache.beam.sdk.state.ReadableState) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState)

Example 2 with WatermarkHoldState

use of org.apache.beam.sdk.state.WatermarkHoldState in project beam by apache.

the class StateMerging method prefetchWatermarks.

/**
   * Prefetch all watermark state for {@code address} across all merging windows in
   * {@code context}.
   */
public static <K, W extends BoundedWindow> void prefetchWatermarks(MergingStateAccessor<K, W> context, StateTag<WatermarkHoldState> address) {
    Map<W, WatermarkHoldState> map = context.accessInEachMergingWindow(address);
    WatermarkHoldState result = context.access(address);
    if (map.isEmpty()) {
        // Nothing to prefetch.
        return;
    }
    if (map.size() == 1 && map.values().contains(result) && result.getTimestampCombiner().dependsOnlyOnEarliestTimestamp()) {
        // Nothing to change.
        return;
    }
    if (result.getTimestampCombiner().dependsOnlyOnWindow()) {
        // No need to read existing holds.
        return;
    }
    // Prefetch.
    for (WatermarkHoldState source : map.values()) {
        prefetchRead(source);
    }
}
Also used : WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState)

Example 3 with WatermarkHoldState

use of org.apache.beam.sdk.state.WatermarkHoldState in project beam by apache.

the class WatermarkHold method extractAndRelease.

/**
   * Return (a future for) the earliest hold for {@code context}. Clear all the holds after
   * reading, but add/restore an end-of-window or garbage collection hold if required.
   *
   * <p>The returned timestamp is the output timestamp according to the {@link TimestampCombiner}
   * from the windowing strategy of this {@link WatermarkHold}, combined across all the non-late
   * elements in the current pane. If there is no such value the timestamp is the end
   * of the window.
   */
public ReadableState<OldAndNewHolds> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context<?, ?, ?, W> context, final boolean isFinished) {
    WindowTracing.debug("WatermarkHold.extractAndRelease: for key:{}; window:{}; inputWatermark:{}; " + "outputWatermark:{}", context.key(), context.window(), timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime());
    final WatermarkHoldState elementHoldState = context.state().access(elementHoldTag);
    final WatermarkHoldState extraHoldState = context.state().access(EXTRA_HOLD_TAG);
    return new ReadableState<OldAndNewHolds>() {

        @Override
        public ReadableState<OldAndNewHolds> readLater() {
            elementHoldState.readLater();
            extraHoldState.readLater();
            return this;
        }

        @Override
        public OldAndNewHolds read() {
            // Read both the element and extra holds.
            Instant elementHold = elementHoldState.read();
            Instant extraHold = extraHoldState.read();
            Instant oldHold;
            // Find the minimum, accounting for null.
            if (elementHold == null) {
                oldHold = extraHold;
            } else if (extraHold == null) {
                oldHold = elementHold;
            } else if (elementHold.isBefore(extraHold)) {
                oldHold = elementHold;
            } else {
                oldHold = extraHold;
            }
            if (oldHold == null || oldHold.isAfter(context.window().maxTimestamp())) {
                // If no hold (eg because all elements came in behind the output watermark), or
                // the hold was for garbage collection, take the end of window as the result.
                WindowTracing.debug("WatermarkHold.extractAndRelease.read: clipping from {} to end of window " + "for key:{}; window:{}", oldHold, context.key(), context.window());
                oldHold = context.window().maxTimestamp();
            }
            WindowTracing.debug("WatermarkHold.extractAndRelease.read: clearing for key:{}; window:{}", context.key(), context.window());
            // Clear the underlying state to allow the output watermark to progress.
            elementHoldState.clear();
            extraHoldState.clear();
            @Nullable Instant newHold = null;
            if (!isFinished) {
                // Only need to leave behind an end-of-window or garbage collection hold
                // if future elements will be processed.
                newHold = addEndOfWindowOrGarbageCollectionHolds(context, true);
            }
            return new OldAndNewHolds(oldHold, newHold);
        }
    };
}
Also used : Instant(org.joda.time.Instant) ReadableState(org.apache.beam.sdk.state.ReadableState) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Nullable(javax.annotation.Nullable)

Example 4 with WatermarkHoldState

use of org.apache.beam.sdk.state.WatermarkHoldState in project beam by apache.

the class InMemoryStateInternalsTest method testMergeEarliestWatermarkIntoSource.

@Test
public void testMergeEarliestWatermarkIntoSource() throws Exception {
    WatermarkHoldState value1 = underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
    WatermarkHoldState value2 = underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR);
    value1.add(new Instant(3000));
    value2.add(new Instant(5000));
    value1.add(new Instant(4000));
    value2.add(new Instant(2000));
    // Merging clears the old values and updates the merged value.
    StateMerging.mergeWatermarks(Arrays.asList(value1, value2), value1, WINDOW_1);
    assertThat(value1.read(), equalTo(new Instant(2000)));
    assertThat(value2.read(), equalTo(null));
}
Also used : Instant(org.joda.time.Instant) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Test(org.junit.Test)

Example 5 with WatermarkHoldState

use of org.apache.beam.sdk.state.WatermarkHoldState in project beam by apache.

the class InMemoryStateInternalsTest method testWatermarkLatestState.

@Test
public void testWatermarkLatestState() throws Exception {
    WatermarkHoldState value = underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
    // State instances are cached, but depend on the namespace.
    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR));
    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR)));
    assertThat(value.read(), Matchers.nullValue());
    value.add(new Instant(2000));
    assertThat(value.read(), equalTo(new Instant(2000)));
    value.add(new Instant(3000));
    assertThat(value.read(), equalTo(new Instant(3000)));
    value.add(new Instant(1000));
    assertThat(value.read(), equalTo(new Instant(3000)));
    value.clear();
    assertThat(value.read(), equalTo(null));
    assertThat(underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR), Matchers.sameInstance(value));
}
Also used : Instant(org.joda.time.Instant) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) Test(org.junit.Test)

Aggregations

WatermarkHoldState (org.apache.beam.sdk.state.WatermarkHoldState)25 Instant (org.joda.time.Instant)24 Test (org.junit.Test)22 StateNamespaceForTest (org.apache.beam.runners.core.StateNamespaceForTest)16 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)3 ReadableState (org.apache.beam.sdk.state.ReadableState)2 ArrayList (java.util.ArrayList)1 Nullable (javax.annotation.Nullable)1 StateNamespace (org.apache.beam.runners.core.StateNamespace)1 TimestampCombiner (org.apache.beam.sdk.transforms.windowing.TimestampCombiner)1