use of org.apache.beam.sdk.state.WatermarkHoldState in project beam by apache.
the class StateMerging method mergeWatermarks.
/**
* Merge all watermark state in {@code sources} (which must include {@code result} if non-empty)
* into {@code result}, where the final merge result window is {@code mergeResult}.
*/
public static <W extends BoundedWindow> void mergeWatermarks(Collection<WatermarkHoldState> sources, WatermarkHoldState result, W resultWindow) {
if (sources.isEmpty()) {
// Nothing to merge.
return;
}
if (sources.size() == 1 && sources.contains(result) && result.getTimestampCombiner().dependsOnlyOnEarliestTimestamp()) {
// Nothing to merge.
return;
}
if (result.getTimestampCombiner().dependsOnlyOnWindow()) {
// Clear sources.
for (WatermarkHoldState source : sources) {
source.clear();
}
// Update directly from window-derived hold.
Instant hold = result.getTimestampCombiner().assign(resultWindow, BoundedWindow.TIMESTAMP_MIN_VALUE);
checkState(hold.isAfter(BoundedWindow.TIMESTAMP_MIN_VALUE));
result.add(hold);
} else {
// Prefetch.
List<ReadableState<Instant>> futures = new ArrayList<>(sources.size());
for (WatermarkHoldState source : sources) {
futures.add(source);
}
// Read.
List<Instant> outputTimesToMerge = new ArrayList<>(sources.size());
for (ReadableState<Instant> future : futures) {
Instant sourceOutputTime = future.read();
if (sourceOutputTime != null) {
outputTimesToMerge.add(sourceOutputTime);
}
}
// Clear sources.
for (WatermarkHoldState source : sources) {
source.clear();
}
if (!outputTimesToMerge.isEmpty()) {
// Merge and update.
result.add(result.getTimestampCombiner().merge(resultWindow, outputTimesToMerge));
}
}
}
use of org.apache.beam.sdk.state.WatermarkHoldState in project beam by apache.
the class StateMerging method prefetchWatermarks.
/**
* Prefetch all watermark state for {@code address} across all merging windows in
* {@code context}.
*/
public static <K, W extends BoundedWindow> void prefetchWatermarks(MergingStateAccessor<K, W> context, StateTag<WatermarkHoldState> address) {
Map<W, WatermarkHoldState> map = context.accessInEachMergingWindow(address);
WatermarkHoldState result = context.access(address);
if (map.isEmpty()) {
// Nothing to prefetch.
return;
}
if (map.size() == 1 && map.values().contains(result) && result.getTimestampCombiner().dependsOnlyOnEarliestTimestamp()) {
// Nothing to change.
return;
}
if (result.getTimestampCombiner().dependsOnlyOnWindow()) {
// No need to read existing holds.
return;
}
// Prefetch.
for (WatermarkHoldState source : map.values()) {
prefetchRead(source);
}
}
use of org.apache.beam.sdk.state.WatermarkHoldState in project beam by apache.
the class WatermarkHold method extractAndRelease.
/**
* Return (a future for) the earliest hold for {@code context}. Clear all the holds after
* reading, but add/restore an end-of-window or garbage collection hold if required.
*
* <p>The returned timestamp is the output timestamp according to the {@link TimestampCombiner}
* from the windowing strategy of this {@link WatermarkHold}, combined across all the non-late
* elements in the current pane. If there is no such value the timestamp is the end
* of the window.
*/
public ReadableState<OldAndNewHolds> extractAndRelease(final ReduceFn<?, ?, ?, W>.Context<?, ?, ?, W> context, final boolean isFinished) {
WindowTracing.debug("WatermarkHold.extractAndRelease: for key:{}; window:{}; inputWatermark:{}; " + "outputWatermark:{}", context.key(), context.window(), timerInternals.currentInputWatermarkTime(), timerInternals.currentOutputWatermarkTime());
final WatermarkHoldState elementHoldState = context.state().access(elementHoldTag);
final WatermarkHoldState extraHoldState = context.state().access(EXTRA_HOLD_TAG);
return new ReadableState<OldAndNewHolds>() {
@Override
public ReadableState<OldAndNewHolds> readLater() {
elementHoldState.readLater();
extraHoldState.readLater();
return this;
}
@Override
public OldAndNewHolds read() {
// Read both the element and extra holds.
Instant elementHold = elementHoldState.read();
Instant extraHold = extraHoldState.read();
Instant oldHold;
// Find the minimum, accounting for null.
if (elementHold == null) {
oldHold = extraHold;
} else if (extraHold == null) {
oldHold = elementHold;
} else if (elementHold.isBefore(extraHold)) {
oldHold = elementHold;
} else {
oldHold = extraHold;
}
if (oldHold == null || oldHold.isAfter(context.window().maxTimestamp())) {
// If no hold (eg because all elements came in behind the output watermark), or
// the hold was for garbage collection, take the end of window as the result.
WindowTracing.debug("WatermarkHold.extractAndRelease.read: clipping from {} to end of window " + "for key:{}; window:{}", oldHold, context.key(), context.window());
oldHold = context.window().maxTimestamp();
}
WindowTracing.debug("WatermarkHold.extractAndRelease.read: clearing for key:{}; window:{}", context.key(), context.window());
// Clear the underlying state to allow the output watermark to progress.
elementHoldState.clear();
extraHoldState.clear();
@Nullable Instant newHold = null;
if (!isFinished) {
// Only need to leave behind an end-of-window or garbage collection hold
// if future elements will be processed.
newHold = addEndOfWindowOrGarbageCollectionHolds(context, true);
}
return new OldAndNewHolds(oldHold, newHold);
}
};
}
use of org.apache.beam.sdk.state.WatermarkHoldState in project beam by apache.
the class InMemoryStateInternalsTest method testMergeEarliestWatermarkIntoSource.
@Test
public void testMergeEarliestWatermarkIntoSource() throws Exception {
WatermarkHoldState value1 = underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
WatermarkHoldState value2 = underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR);
value1.add(new Instant(3000));
value2.add(new Instant(5000));
value1.add(new Instant(4000));
value2.add(new Instant(2000));
// Merging clears the old values and updates the merged value.
StateMerging.mergeWatermarks(Arrays.asList(value1, value2), value1, WINDOW_1);
assertThat(value1.read(), equalTo(new Instant(2000)));
assertThat(value2.read(), equalTo(null));
}
use of org.apache.beam.sdk.state.WatermarkHoldState in project beam by apache.
the class InMemoryStateInternalsTest method testWatermarkLatestState.
@Test
public void testWatermarkLatestState() throws Exception {
WatermarkHoldState value = underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
// State instances are cached, but depend on the namespace.
assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR));
assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR)));
assertThat(value.read(), Matchers.nullValue());
value.add(new Instant(2000));
assertThat(value.read(), equalTo(new Instant(2000)));
value.add(new Instant(3000));
assertThat(value.read(), equalTo(new Instant(3000)));
value.add(new Instant(1000));
assertThat(value.read(), equalTo(new Instant(3000)));
value.clear();
assertThat(value.read(), equalTo(null));
assertThat(underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR), Matchers.sameInstance(value));
}
Aggregations