use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method updateWatermarkWithWatermarkHolds.
/**
* Demonstrates that the watermark of an {@link AppliedPTransform} is held to the provided
* watermark hold.
*/
@Test
public void updateWatermarkWithWatermarkHolds() {
CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts, TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)), TimestampedValue.of(3, new Instant(-1000L)));
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(createdBundle)), new Instant(Long.MAX_VALUE));
CommittedBundle<KV<String, Integer>> keyBundle = timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)), TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)), TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(keyBundle)), new Instant(500L));
manager.refreshAll();
TransformWatermarks keyedWatermarks = manager.getWatermarks(graph.getProducer(keyed));
assertThat(keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
assertThat(keyedWatermarks.getOutputWatermark(), not(laterThan(new Instant(500L))));
}
use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method updateWatermarkWithHoldsShouldBeMonotonic.
/**
* Demonstrates that updated output watermarks are monotonic in the presence of watermark holds
* that become earlier than a previous watermark hold.
*/
@Test
public void updateWatermarkWithHoldsShouldBeMonotonic() {
CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts, TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)), TimestampedValue.of(3, new Instant(-1000L)));
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(createdBundle)), new Instant(Long.MAX_VALUE));
CommittedBundle<KV<String, Integer>> keyBundle = timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)), TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)), TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(keyBundle)), new Instant(500L));
manager.refreshAll();
TransformWatermarks keyedWatermarks = manager.getWatermarks(graph.getProducer(keyed));
assertThat(keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
assertThat(keyedWatermarks.getOutputWatermark(), not(laterThan(new Instant(500L))));
Instant oldOutputWatermark = keyedWatermarks.getOutputWatermark();
TransformWatermarks updatedWatermarks = manager.getWatermarks(graph.getProducer(keyed));
assertThat(updatedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
// We added a hold prior to the old watermark; we shouldn't progress (due to the earlier hold)
// but the watermark is monotonic and should not backslide to the new, earlier hold
assertThat(updatedWatermarks.getOutputWatermark(), equalTo(oldOutputWatermark));
}
use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method getSynchronizedProcessingTimeOutputHeldToPendingTimers.
/**
* Demonstrates that the Synchronized Processing Time output watermark cannot progress past
* pending timers in the same set. This propagates to all downstream SynchronizedProcessingTimes.
*
* <p>Also demonstrate that the result is monotonic.
*/
// @Test
public void getSynchronizedProcessingTimeOutputHeldToPendingTimers() {
CommittedBundle<Integer> createdBundle = multiWindowedBundle(createdInts, 1, 2, 4, 8);
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(createdBundle)), new Instant(1248L));
manager.refreshAll();
TransformWatermarks filteredWms = manager.getWatermarks(graph.getProducer(filtered));
TransformWatermarks filteredDoubledWms = manager.getWatermarks(graph.getProducer(filteredTimesTwo));
Instant initialFilteredWm = filteredWms.getSynchronizedProcessingOutputTime();
Instant initialFilteredDoubledWm = filteredDoubledWms.getSynchronizedProcessingOutputTime();
StructuralKey<String> key = StructuralKey.of("key", StringUtf8Coder.of());
CommittedBundle<Integer> filteredBundle = multiWindowedBundle(filtered, 2, 8);
TimerData pastTimer = TimerData.of(StateNamespaces.global(), new Instant(250L), TimeDomain.PROCESSING_TIME);
TimerData futureTimer = TimerData.of(StateNamespaces.global(), new Instant(4096L), TimeDomain.PROCESSING_TIME);
TimerUpdate timers = TimerUpdate.builder(key).setTimer(pastTimer).setTimer(futureTimer).build();
manager.updateWatermarks(createdBundle, timers, result(graph.getProducer(filtered), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(filteredBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
Instant startTime = clock.now();
clock.set(startTime.plus(250L));
// We're held based on the past timer
assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
// And we're monotonic
assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(earlierThan(initialFilteredWm)));
assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), not(earlierThan(initialFilteredDoubledWm)));
Collection<FiredTimers> firedTimers = manager.extractFiredTimers();
assertThat(Iterables.getOnlyElement(firedTimers).getTimers(), contains(pastTimer));
// Our timer has fired, but has not been completed, so it holds our synchronized processing WM
assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
CommittedBundle<Integer> filteredTimerBundle = bundleFactory.createKeyedBundle(key, filtered).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
CommittedBundle<Integer> filteredTimerResult = bundleFactory.createKeyedBundle(key, filteredTimesTwo).commit(filteredWms.getSynchronizedProcessingOutputTime());
// Complete the processing time timer
manager.updateWatermarks(filteredTimerBundle, TimerUpdate.builder(key).withCompletedTimers(Collections.<TimerData>singleton(pastTimer)).build(), result(graph.getProducer(filtered), filteredTimerBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(filteredTimerResult)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
clock.set(startTime.plus(500L));
assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(clock.now())));
// filtered should be held to the time at which the filteredTimerResult fired
assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), not(earlierThan(filteredTimerResult.getSynchronizedProcessingOutputWatermark())));
manager.updateWatermarks(filteredTimerResult, TimerUpdate.empty(), result(graph.getProducer(filteredTimesTwo), filteredTimerResult.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), equalTo(clock.now()));
clock.set(new Instant(Long.MAX_VALUE));
assertThat(filteredWms.getSynchronizedProcessingOutputTime(), equalTo(new Instant(4096)));
assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), equalTo(new Instant(4096)));
}
use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method getWatermarkMultiIdenticalInput.
/**
* Demonstrates that getWatermark for a transform that takes multiple inputs is held to the
* minimum watermark across all of its inputs.
*/
@Test
public void getWatermarkMultiIdenticalInput() {
PCollection<Integer> created = p.apply(Create.of(1, 2, 3));
PCollection<Integer> multiConsumer = PCollectionList.of(created).and(created).apply(Flatten.<Integer>pCollections());
DirectGraphVisitor graphVisitor = new DirectGraphVisitor();
p.traverseTopologically(graphVisitor);
DirectGraph graph = graphVisitor.getGraph();
AppliedPTransform<?, ?, ?> theFlatten = graph.getProducer(multiConsumer);
WatermarkManager tstMgr = WatermarkManager.create(clock, graph);
CommittedBundle<Void> root = bundleFactory.<Void>createRootBundle().add(WindowedValue.<Void>valueInGlobalWindow(null)).commit(clock.now());
CommittedBundle<Integer> createBundle = bundleFactory.createBundle(created).add(WindowedValue.timestampedValueInGlobalWindow(1, new Instant(33536))).commit(clock.now());
Map<AppliedPTransform<?, ?, ?>, Collection<CommittedBundle<?>>> initialInputs = ImmutableMap.<AppliedPTransform<?, ?, ?>, Collection<CommittedBundle<?>>>builder().put(graph.getProducer(created), Collections.<CommittedBundle<?>>singleton(root)).build();
tstMgr.initialize(initialInputs);
tstMgr.updateWatermarks(root, TimerUpdate.empty(), CommittedResult.create(StepTransformResult.withoutHold(graph.getProducer(created)).build(), root.withElements(Collections.<WindowedValue<Void>>emptyList()), Collections.singleton(createBundle), EnumSet.allOf(OutputType.class)), BoundedWindow.TIMESTAMP_MAX_VALUE);
tstMgr.refreshAll();
TransformWatermarks flattenWms = tstMgr.getWatermarks(theFlatten);
assertThat(flattenWms.getInputWatermark(), equalTo(new Instant(33536)));
tstMgr.updateWatermarks(createBundle, TimerUpdate.empty(), CommittedResult.create(StepTransformResult.withoutHold(theFlatten).build(), createBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList(), EnumSet.allOf(OutputType.class)), BoundedWindow.TIMESTAMP_MAX_VALUE);
tstMgr.refreshAll();
assertThat(flattenWms.getInputWatermark(), equalTo(new Instant(33536)));
tstMgr.updateWatermarks(createBundle, TimerUpdate.empty(), CommittedResult.create(StepTransformResult.withoutHold(theFlatten).build(), createBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList(), EnumSet.allOf(OutputType.class)), BoundedWindow.TIMESTAMP_MAX_VALUE);
tstMgr.refreshAll();
assertThat(flattenWms.getInputWatermark(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
}
use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method getWatermarkForMultiConsumedCollection.
/**
* Demonstrates that pending elements are independent among
* {@link AppliedPTransform AppliedPTransforms} that consume the same input {@link PCollection}.
*/
@Test
public void getWatermarkForMultiConsumedCollection() {
CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts, TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)), TimestampedValue.of(3, new Instant(-1000L)));
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(createdBundle)), new Instant(Long.MAX_VALUE));
manager.refreshAll();
TransformWatermarks createdAfterProducing = manager.getWatermarks(graph.getProducer(createdInts));
assertThat(createdAfterProducing.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
CommittedBundle<KV<String, Integer>> keyBundle = timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)), TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)), TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(keyBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
TransformWatermarks keyedWatermarks = manager.getWatermarks(graph.getProducer(keyed));
assertThat(keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
assertThat(keyedWatermarks.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
TransformWatermarks filteredWatermarks = manager.getWatermarks(graph.getProducer(filtered));
assertThat(filteredWatermarks.getInputWatermark(), not(laterThan(new Instant(-1000L))));
assertThat(filteredWatermarks.getOutputWatermark(), not(laterThan(new Instant(-1000L))));
CommittedBundle<Integer> filteredBundle = timestampedBundle(filtered, TimestampedValue.of(2, new Instant(1234L)));
manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(filtered), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(filteredBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
TransformWatermarks filteredProcessedWatermarks = manager.getWatermarks(graph.getProducer(filtered));
assertThat(filteredProcessedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
assertThat(filteredProcessedWatermarks.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
}
Aggregations