use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method updateWatermarkWithLateData.
/**
* Demonstrates that updateWatermarks in the presence of late data is monotonic.
*/
@Test
public void updateWatermarkWithLateData() {
Instant sourceWatermark = new Instant(1_000_000L);
CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts, TimestampedValue.of(1, sourceWatermark), TimestampedValue.of(2, new Instant(1234L)));
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(createdBundle)), sourceWatermark);
CommittedBundle<KV<String, Integer>> keyBundle = timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), sourceWatermark), TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)));
// Finish processing the on-time data. The watermarks should progress to be equal to the source
manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(keyBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
TransformWatermarks onTimeWatermarks = manager.getWatermarks(graph.getProducer(keyed));
assertThat(onTimeWatermarks.getInputWatermark(), equalTo(sourceWatermark));
assertThat(onTimeWatermarks.getOutputWatermark(), equalTo(sourceWatermark));
CommittedBundle<Integer> lateDataBundle = timestampedBundle(createdInts, TimestampedValue.of(3, new Instant(-1000L)));
// the late data arrives in a downstream PCollection after its watermark has advanced past it;
// we don't advance the watermark past the current watermark until we've consumed the late data
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(lateDataBundle)), new Instant(2_000_000L));
manager.refreshAll();
TransformWatermarks bufferedLateWm = manager.getWatermarks(graph.getProducer(createdInts));
assertThat(bufferedLateWm.getOutputWatermark(), equalTo(new Instant(2_000_000L)));
// The input watermark should be held to its previous value (not advanced due to late data; not
// moved backwards in the presence of watermarks due to monotonicity).
TransformWatermarks lateDataBufferedWatermark = manager.getWatermarks(graph.getProducer(keyed));
assertThat(lateDataBufferedWatermark.getInputWatermark(), not(earlierThan(sourceWatermark)));
assertThat(lateDataBufferedWatermark.getOutputWatermark(), not(earlierThan(sourceWatermark)));
CommittedBundle<KV<String, Integer>> lateKeyedBundle = timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
manager.updateWatermarks(lateDataBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), lateDataBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(lateKeyedBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
}
use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method updateWatermarkWithUnprocessedElements.
@Test
public void updateWatermarkWithUnprocessedElements() {
WindowedValue<Integer> first = WindowedValue.valueInGlobalWindow(1);
WindowedValue<Integer> second = WindowedValue.timestampedValueInGlobalWindow(2, new Instant(-1000L));
WindowedValue<Integer> third = WindowedValue.timestampedValueInGlobalWindow(3, new Instant(1234L));
CommittedBundle<Integer> createdBundle = bundleFactory.createBundle(createdInts).add(first).add(second).add(third).commit(clock.now());
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(createdBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
CommittedBundle<KV<String, Integer>> keyBundle = timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), BoundedWindow.TIMESTAMP_MIN_VALUE));
manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), createdBundle.withElements(ImmutableList.of(second, third)), Collections.<CommittedBundle<?>>singleton(keyBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
TransformWatermarks keyedWatermarks = manager.getWatermarks(graph.getProducer(keyed));
// the unprocessed second and third are readded to pending
assertThat(keyedWatermarks.getInputWatermark(), not(laterThan(new Instant(-1000L))));
}
use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method getWatermarkForUntouchedTransform.
/**
* Demonstrates that getWatermark, when called on an {@link AppliedPTransform} that has not
* processed any elements, returns the {@link BoundedWindow#TIMESTAMP_MIN_VALUE}.
*/
@Test
public void getWatermarkForUntouchedTransform() {
TransformWatermarks watermarks = manager.getWatermarks(graph.getProducer(createdInts));
assertThat(watermarks.getInputWatermark(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
assertThat(watermarks.getOutputWatermark(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
}
use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method updateWatermarkWithDifferentWindowedValueInstances.
public void updateWatermarkWithDifferentWindowedValueInstances() {
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(bundleFactory.createBundle(createdInts).add(WindowedValue.valueInGlobalWindow(1)).commit(Instant.now()))), BoundedWindow.TIMESTAMP_MAX_VALUE);
CommittedBundle<Integer> createdBundle = bundleFactory.createBundle(createdInts).add(WindowedValue.valueInGlobalWindow(1)).commit(Instant.now());
manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), null);
manager.refreshAll();
TransformWatermarks onTimeWatermarks = manager.getWatermarks(graph.getProducer(keyed));
assertThat(onTimeWatermarks.getInputWatermark(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
}
use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method getWatermarksAfterHoldAndEmptyOutput.
/**
* Demonstrates that after watermarks of an upstream transform are updated, but no output has been
* produced, and the downstream transform has a watermark hold, the watermark is held to the hold.
*/
@Test
public void getWatermarksAfterHoldAndEmptyOutput() {
CommittedBundle<Integer> firstCreateOutput = multiWindowedBundle(createdInts, 1, 2);
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(firstCreateOutput)), new Instant(12_000L));
CommittedBundle<Integer> firstFilterOutput = multiWindowedBundle(filtered);
manager.updateWatermarks(firstCreateOutput, TimerUpdate.empty(), result(graph.getProducer(filtered), firstCreateOutput.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(firstFilterOutput)), new Instant(10_000L));
manager.refreshAll();
TransformWatermarks firstFilterWatermarks = manager.getWatermarks(graph.getProducer(filtered));
assertThat(firstFilterWatermarks.getInputWatermark(), not(earlierThan(new Instant(12_000L))));
assertThat(firstFilterWatermarks.getOutputWatermark(), not(laterThan(new Instant(10_000L))));
CommittedBundle<Integer> emptyCreateOutput = multiWindowedBundle(createdInts);
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(emptyCreateOutput)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
TransformWatermarks updatedSourceWatermarks = manager.getWatermarks(graph.getProducer(createdInts));
assertThat(updatedSourceWatermarks.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
TransformWatermarks finishedFilterWatermarks = manager.getWatermarks(graph.getProducer(filtered));
assertThat(finishedFilterWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
assertThat(finishedFilterWatermarks.getOutputWatermark(), not(laterThan(new Instant(10_000L))));
}
Aggregations