use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method synchronizedProcessingInputTimeIsHeldToPendingBundleTimes.
@Test
public void synchronizedProcessingInputTimeIsHeldToPendingBundleTimes() {
CommittedBundle<Integer> created = multiWindowedBundle(createdInts, 1, 2, 3);
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(created)), new Instant(29_919_235L));
Instant upstreamHold = new Instant(2048L);
CommittedBundle<Integer> filteredBundle = bundleFactory.createKeyedBundle(StructuralKey.of("key", StringUtf8Coder.of()), filtered).commit(upstreamHold);
manager.updateWatermarks(created, TimerUpdate.empty(), result(graph.getProducer(filtered), created.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(filteredBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
TransformWatermarks downstreamWms = manager.getWatermarks(graph.getProducer(filteredTimesTwo));
assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(clock.now()));
clock.set(BoundedWindow.TIMESTAMP_MAX_VALUE);
assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(upstreamHold));
}
use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method updateOutputWatermarkShouldBeMonotonic.
/**
* Demonstrates that updated output watermarks are monotonic in the presence of late data, when
* called on an {@link AppliedPTransform} that consumes no input.
*/
@Test
public void updateOutputWatermarkShouldBeMonotonic() {
CommittedBundle<?> firstInput = bundleFactory.createBundle(createdInts).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(firstInput)), new Instant(0L));
manager.refreshAll();
TransformWatermarks firstWatermarks = manager.getWatermarks(graph.getProducer(createdInts));
assertThat(firstWatermarks.getOutputWatermark(), equalTo(new Instant(0L)));
CommittedBundle<?> secondInput = bundleFactory.createBundle(createdInts).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(secondInput)), new Instant(-250L));
manager.refreshAll();
TransformWatermarks secondWatermarks = manager.getWatermarks(graph.getProducer(createdInts));
assertThat(secondWatermarks.getOutputWatermark(), not(earlierThan(new Instant(0L))));
}
use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method synchronizedProcessingInputTimeIsHeldToUpstreamProcessingTimeTimers.
@Test
public void synchronizedProcessingInputTimeIsHeldToUpstreamProcessingTimeTimers() {
CommittedBundle<Integer> created = multiWindowedBundle(createdInts, 1, 2, 3);
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(created)), new Instant(40_900L));
manager.refreshAll();
CommittedBundle<Integer> filteredBundle = multiWindowedBundle(filtered, 2, 4);
Instant upstreamHold = new Instant(2048L);
TimerData upstreamProcessingTimer = TimerData.of(StateNamespaces.global(), upstreamHold, TimeDomain.PROCESSING_TIME);
manager.updateWatermarks(created, TimerUpdate.builder(StructuralKey.of("key", StringUtf8Coder.of())).setTimer(upstreamProcessingTimer).build(), result(graph.getProducer(filtered), created.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(filteredBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
TransformWatermarks downstreamWms = manager.getWatermarks(graph.getProducer(filteredTimesTwo));
assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(clock.now()));
clock.set(BoundedWindow.TIMESTAMP_MAX_VALUE);
assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(upstreamHold));
manager.extractFiredTimers();
// Pending processing time timers that have been fired but aren't completed hold the
// synchronized processing time
assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(upstreamHold));
CommittedBundle<Integer> otherCreated = multiWindowedBundle(createdInts, 4, 8, 12);
manager.updateWatermarks(otherCreated, TimerUpdate.builder(StructuralKey.of("key", StringUtf8Coder.of())).withCompletedTimers(Collections.singleton(upstreamProcessingTimer)).build(), result(graph.getProducer(filtered), otherCreated.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
assertThat(downstreamWms.getSynchronizedProcessingInputTime(), not(earlierThan(clock.now())));
}
use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method getWatermarkForMultiInputTransform.
/**
* Demonstrates that getWatermark for a transform that takes multiple inputs is held to the
* minimum watermark across all of its inputs.
*/
@Test
public void getWatermarkForMultiInputTransform() {
CommittedBundle<Integer> secondPcollectionBundle = multiWindowedBundle(intsToFlatten, -1);
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(intsToFlatten), null, Collections.<CommittedBundle<?>>singleton(secondPcollectionBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
// We didn't do anything for the first source, so we shouldn't have progressed the watermark
TransformWatermarks firstSourceWatermark = manager.getWatermarks(graph.getProducer(createdInts));
assertThat(firstSourceWatermark.getOutputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
// the Second Source output all of the elements so it should be done (with a watermark at the
// end of time).
TransformWatermarks secondSourceWatermark = manager.getWatermarks(graph.getProducer(intsToFlatten));
assertThat(secondSourceWatermark.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
// We haven't consumed anything yet, so our watermark should be at the beginning of time
TransformWatermarks transformWatermark = manager.getWatermarks(graph.getProducer(flattened));
assertThat(transformWatermark.getInputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
assertThat(transformWatermark.getOutputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
CommittedBundle<Integer> flattenedBundleSecondCreate = multiWindowedBundle(flattened, -1);
// We have finished processing the bundle from the second PCollection, but we haven't consumed
// anything from the first PCollection yet; so our watermark shouldn't advance
manager.updateWatermarks(secondPcollectionBundle, TimerUpdate.empty(), result(graph.getProducer(flattened), secondPcollectionBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate)), BoundedWindow.TIMESTAMP_MAX_VALUE);
TransformWatermarks transformAfterProcessing = manager.getWatermarks(graph.getProducer(flattened));
manager.updateWatermarks(secondPcollectionBundle, TimerUpdate.empty(), result(graph.getProducer(flattened), secondPcollectionBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
assertThat(transformAfterProcessing.getInputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
assertThat(transformAfterProcessing.getOutputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
Instant firstCollectionTimestamp = new Instant(10000);
CommittedBundle<Integer> firstPcollectionBundle = timestampedBundle(createdInts, TimestampedValue.<Integer>of(5, firstCollectionTimestamp));
// the source is done, but elements are still buffered. The source output watermark should be
// past the end of the global window
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(firstPcollectionBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
TransformWatermarks firstSourceWatermarks = manager.getWatermarks(graph.getProducer(createdInts));
assertThat(firstSourceWatermarks.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
// We still haven't consumed any of the first source's input, so the watermark should still not
// progress
TransformWatermarks flattenAfterSourcesProduced = manager.getWatermarks(graph.getProducer(flattened));
assertThat(flattenAfterSourcesProduced.getInputWatermark(), not(laterThan(firstCollectionTimestamp)));
assertThat(flattenAfterSourcesProduced.getOutputWatermark(), not(laterThan(firstCollectionTimestamp)));
// We have buffered inputs, but since the PCollection has all of the elements (has a WM past the
// end of the global window), we should have a watermark equal to the min among buffered
// elements
TransformWatermarks withBufferedElements = manager.getWatermarks(graph.getProducer(flattened));
assertThat(withBufferedElements.getInputWatermark(), equalTo(firstCollectionTimestamp));
assertThat(withBufferedElements.getOutputWatermark(), equalTo(firstCollectionTimestamp));
CommittedBundle<?> completedFlattenBundle = bundleFactory.createBundle(flattened).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.updateWatermarks(firstPcollectionBundle, TimerUpdate.empty(), result(graph.getProducer(flattened), firstPcollectionBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(completedFlattenBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
TransformWatermarks afterConsumingAllInput = manager.getWatermarks(graph.getProducer(flattened));
assertThat(afterConsumingAllInput.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
assertThat(afterConsumingAllInput.getOutputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
}
use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.
the class WatermarkManagerTest method updateWatermarkWithKeyedWatermarkHolds.
/**
* Demonstrates that the watermark of an {@link AppliedPTransform} is held to the provided
* watermark hold.
*/
@Test
public void updateWatermarkWithKeyedWatermarkHolds() {
CommittedBundle<Integer> firstKeyBundle = bundleFactory.createKeyedBundle(StructuralKey.of("Odd", StringUtf8Coder.of()), createdInts).add(WindowedValue.timestampedValueInGlobalWindow(1, new Instant(1_000_000L))).add(WindowedValue.timestampedValueInGlobalWindow(3, new Instant(-1000L))).commit(clock.now());
CommittedBundle<Integer> secondKeyBundle = bundleFactory.createKeyedBundle(StructuralKey.of("Even", StringUtf8Coder.of()), createdInts).add(WindowedValue.timestampedValueInGlobalWindow(2, new Instant(1234L))).commit(clock.now());
manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, ImmutableList.of(firstKeyBundle, secondKeyBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.updateWatermarks(firstKeyBundle, TimerUpdate.empty(), result(graph.getProducer(filtered), firstKeyBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), new Instant(-1000L));
manager.updateWatermarks(secondKeyBundle, TimerUpdate.empty(), result(graph.getProducer(filtered), secondKeyBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), new Instant(1234L));
manager.refreshAll();
TransformWatermarks filteredWatermarks = manager.getWatermarks(graph.getProducer(filtered));
assertThat(filteredWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
assertThat(filteredWatermarks.getOutputWatermark(), not(laterThan(new Instant(-1000L))));
CommittedBundle<Integer> fauxFirstKeyTimerBundle = bundleFactory.createKeyedBundle(StructuralKey.of("Odd", StringUtf8Coder.of()), createdInts).commit(clock.now());
manager.updateWatermarks(fauxFirstKeyTimerBundle, TimerUpdate.empty(), result(graph.getProducer(filtered), fauxFirstKeyTimerBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
assertThat(filteredWatermarks.getOutputWatermark(), equalTo(new Instant(1234L)));
CommittedBundle<Integer> fauxSecondKeyTimerBundle = bundleFactory.createKeyedBundle(StructuralKey.of("Even", StringUtf8Coder.of()), createdInts).commit(clock.now());
manager.updateWatermarks(fauxSecondKeyTimerBundle, TimerUpdate.empty(), result(graph.getProducer(filtered), fauxSecondKeyTimerBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), new Instant(5678L));
manager.refreshAll();
assertThat(filteredWatermarks.getOutputWatermark(), equalTo(new Instant(5678L)));
manager.updateWatermarks(fauxSecondKeyTimerBundle, TimerUpdate.empty(), result(graph.getProducer(filtered), fauxSecondKeyTimerBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), BoundedWindow.TIMESTAMP_MAX_VALUE);
manager.refreshAll();
assertThat(filteredWatermarks.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
}
Aggregations