Search in sources :

Example 16 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method synchronizedProcessingInputTimeIsHeldToPendingBundleTimes.

@Test
public void synchronizedProcessingInputTimeIsHeldToPendingBundleTimes() {
    CommittedBundle<Integer> created = multiWindowedBundle(createdInts, 1, 2, 3);
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(created)), new Instant(29_919_235L));
    Instant upstreamHold = new Instant(2048L);
    CommittedBundle<Integer> filteredBundle = bundleFactory.createKeyedBundle(StructuralKey.of("key", StringUtf8Coder.of()), filtered).commit(upstreamHold);
    manager.updateWatermarks(created, TimerUpdate.empty(), result(graph.getProducer(filtered), created.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(filteredBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    TransformWatermarks downstreamWms = manager.getWatermarks(graph.getProducer(filteredTimesTwo));
    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(clock.now()));
    clock.set(BoundedWindow.TIMESTAMP_MAX_VALUE);
    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(upstreamHold));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) Test(org.junit.Test)

Example 17 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method updateOutputWatermarkShouldBeMonotonic.

/**
   * Demonstrates that updated output watermarks are monotonic in the presence of late data, when
   * called on an {@link AppliedPTransform} that consumes no input.
   */
@Test
public void updateOutputWatermarkShouldBeMonotonic() {
    CommittedBundle<?> firstInput = bundleFactory.createBundle(createdInts).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(firstInput)), new Instant(0L));
    manager.refreshAll();
    TransformWatermarks firstWatermarks = manager.getWatermarks(graph.getProducer(createdInts));
    assertThat(firstWatermarks.getOutputWatermark(), equalTo(new Instant(0L)));
    CommittedBundle<?> secondInput = bundleFactory.createBundle(createdInts).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(secondInput)), new Instant(-250L));
    manager.refreshAll();
    TransformWatermarks secondWatermarks = manager.getWatermarks(graph.getProducer(createdInts));
    assertThat(secondWatermarks.getOutputWatermark(), not(earlierThan(new Instant(0L))));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) Test(org.junit.Test)

Example 18 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method synchronizedProcessingInputTimeIsHeldToUpstreamProcessingTimeTimers.

@Test
public void synchronizedProcessingInputTimeIsHeldToUpstreamProcessingTimeTimers() {
    CommittedBundle<Integer> created = multiWindowedBundle(createdInts, 1, 2, 3);
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(created)), new Instant(40_900L));
    manager.refreshAll();
    CommittedBundle<Integer> filteredBundle = multiWindowedBundle(filtered, 2, 4);
    Instant upstreamHold = new Instant(2048L);
    TimerData upstreamProcessingTimer = TimerData.of(StateNamespaces.global(), upstreamHold, TimeDomain.PROCESSING_TIME);
    manager.updateWatermarks(created, TimerUpdate.builder(StructuralKey.of("key", StringUtf8Coder.of())).setTimer(upstreamProcessingTimer).build(), result(graph.getProducer(filtered), created.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(filteredBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    TransformWatermarks downstreamWms = manager.getWatermarks(graph.getProducer(filteredTimesTwo));
    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(clock.now()));
    clock.set(BoundedWindow.TIMESTAMP_MAX_VALUE);
    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(upstreamHold));
    manager.extractFiredTimers();
    // Pending processing time timers that have been fired but aren't completed hold the
    // synchronized processing time
    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), equalTo(upstreamHold));
    CommittedBundle<Integer> otherCreated = multiWindowedBundle(createdInts, 4, 8, 12);
    manager.updateWatermarks(otherCreated, TimerUpdate.builder(StructuralKey.of("key", StringUtf8Coder.of())).withCompletedTimers(Collections.singleton(upstreamProcessingTimer)).build(), result(graph.getProducer(filtered), otherCreated.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    assertThat(downstreamWms.getSynchronizedProcessingInputTime(), not(earlierThan(clock.now())));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) Test(org.junit.Test)

Example 19 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method getWatermarkForMultiInputTransform.

/**
   * Demonstrates that getWatermark for a transform that takes multiple inputs is held to the
   * minimum watermark across all of its inputs.
   */
@Test
public void getWatermarkForMultiInputTransform() {
    CommittedBundle<Integer> secondPcollectionBundle = multiWindowedBundle(intsToFlatten, -1);
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(intsToFlatten), null, Collections.<CommittedBundle<?>>singleton(secondPcollectionBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    // We didn't do anything for the first source, so we shouldn't have progressed the watermark
    TransformWatermarks firstSourceWatermark = manager.getWatermarks(graph.getProducer(createdInts));
    assertThat(firstSourceWatermark.getOutputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
    // the Second Source output all of the elements so it should be done (with a watermark at the
    // end of time).
    TransformWatermarks secondSourceWatermark = manager.getWatermarks(graph.getProducer(intsToFlatten));
    assertThat(secondSourceWatermark.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    // We haven't consumed anything yet, so our watermark should be at the beginning of time
    TransformWatermarks transformWatermark = manager.getWatermarks(graph.getProducer(flattened));
    assertThat(transformWatermark.getInputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
    assertThat(transformWatermark.getOutputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
    CommittedBundle<Integer> flattenedBundleSecondCreate = multiWindowedBundle(flattened, -1);
    // We have finished processing the bundle from the second PCollection, but we haven't consumed
    // anything from the first PCollection yet; so our watermark shouldn't advance
    manager.updateWatermarks(secondPcollectionBundle, TimerUpdate.empty(), result(graph.getProducer(flattened), secondPcollectionBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    TransformWatermarks transformAfterProcessing = manager.getWatermarks(graph.getProducer(flattened));
    manager.updateWatermarks(secondPcollectionBundle, TimerUpdate.empty(), result(graph.getProducer(flattened), secondPcollectionBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(flattenedBundleSecondCreate)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    assertThat(transformAfterProcessing.getInputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
    assertThat(transformAfterProcessing.getOutputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MIN_VALUE)));
    Instant firstCollectionTimestamp = new Instant(10000);
    CommittedBundle<Integer> firstPcollectionBundle = timestampedBundle(createdInts, TimestampedValue.<Integer>of(5, firstCollectionTimestamp));
    // the source is done, but elements are still buffered. The source output watermark should be
    // past the end of the global window
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(firstPcollectionBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    TransformWatermarks firstSourceWatermarks = manager.getWatermarks(graph.getProducer(createdInts));
    assertThat(firstSourceWatermarks.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    // We still haven't consumed any of the first source's input, so the watermark should still not
    // progress
    TransformWatermarks flattenAfterSourcesProduced = manager.getWatermarks(graph.getProducer(flattened));
    assertThat(flattenAfterSourcesProduced.getInputWatermark(), not(laterThan(firstCollectionTimestamp)));
    assertThat(flattenAfterSourcesProduced.getOutputWatermark(), not(laterThan(firstCollectionTimestamp)));
    // We have buffered inputs, but since the PCollection has all of the elements (has a WM past the
    // end of the global window), we should have a watermark equal to the min among buffered
    // elements
    TransformWatermarks withBufferedElements = manager.getWatermarks(graph.getProducer(flattened));
    assertThat(withBufferedElements.getInputWatermark(), equalTo(firstCollectionTimestamp));
    assertThat(withBufferedElements.getOutputWatermark(), equalTo(firstCollectionTimestamp));
    CommittedBundle<?> completedFlattenBundle = bundleFactory.createBundle(flattened).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.updateWatermarks(firstPcollectionBundle, TimerUpdate.empty(), result(graph.getProducer(flattened), firstPcollectionBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(completedFlattenBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    TransformWatermarks afterConsumingAllInput = manager.getWatermarks(graph.getProducer(flattened));
    assertThat(afterConsumingAllInput.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    assertThat(afterConsumingAllInput.getOutputWatermark(), not(laterThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) Test(org.junit.Test)

Example 20 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method updateWatermarkWithKeyedWatermarkHolds.

/**
   * Demonstrates that the watermark of an {@link AppliedPTransform} is held to the provided
   * watermark hold.
   */
@Test
public void updateWatermarkWithKeyedWatermarkHolds() {
    CommittedBundle<Integer> firstKeyBundle = bundleFactory.createKeyedBundle(StructuralKey.of("Odd", StringUtf8Coder.of()), createdInts).add(WindowedValue.timestampedValueInGlobalWindow(1, new Instant(1_000_000L))).add(WindowedValue.timestampedValueInGlobalWindow(3, new Instant(-1000L))).commit(clock.now());
    CommittedBundle<Integer> secondKeyBundle = bundleFactory.createKeyedBundle(StructuralKey.of("Even", StringUtf8Coder.of()), createdInts).add(WindowedValue.timestampedValueInGlobalWindow(2, new Instant(1234L))).commit(clock.now());
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, ImmutableList.of(firstKeyBundle, secondKeyBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.updateWatermarks(firstKeyBundle, TimerUpdate.empty(), result(graph.getProducer(filtered), firstKeyBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), new Instant(-1000L));
    manager.updateWatermarks(secondKeyBundle, TimerUpdate.empty(), result(graph.getProducer(filtered), secondKeyBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), new Instant(1234L));
    manager.refreshAll();
    TransformWatermarks filteredWatermarks = manager.getWatermarks(graph.getProducer(filtered));
    assertThat(filteredWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    assertThat(filteredWatermarks.getOutputWatermark(), not(laterThan(new Instant(-1000L))));
    CommittedBundle<Integer> fauxFirstKeyTimerBundle = bundleFactory.createKeyedBundle(StructuralKey.of("Odd", StringUtf8Coder.of()), createdInts).commit(clock.now());
    manager.updateWatermarks(fauxFirstKeyTimerBundle, TimerUpdate.empty(), result(graph.getProducer(filtered), fauxFirstKeyTimerBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    assertThat(filteredWatermarks.getOutputWatermark(), equalTo(new Instant(1234L)));
    CommittedBundle<Integer> fauxSecondKeyTimerBundle = bundleFactory.createKeyedBundle(StructuralKey.of("Even", StringUtf8Coder.of()), createdInts).commit(clock.now());
    manager.updateWatermarks(fauxSecondKeyTimerBundle, TimerUpdate.empty(), result(graph.getProducer(filtered), fauxSecondKeyTimerBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), new Instant(5678L));
    manager.refreshAll();
    assertThat(filteredWatermarks.getOutputWatermark(), equalTo(new Instant(5678L)));
    manager.updateWatermarks(fauxSecondKeyTimerBundle, TimerUpdate.empty(), result(graph.getProducer(filtered), fauxSecondKeyTimerBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    assertThat(filteredWatermarks.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) Test(org.junit.Test)

Aggregations

TransformWatermarks (org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks)22 Instant (org.joda.time.Instant)18 Test (org.junit.Test)18 ReadableInstant (org.joda.time.ReadableInstant)17 KV (org.apache.beam.sdk.values.KV)5 TimerData (org.apache.beam.runners.core.TimerInternals.TimerData)2 ImmutableList (com.google.common.collect.ImmutableList)1 Collection (java.util.Collection)1 FiredTimers (org.apache.beam.runners.direct.WatermarkManager.FiredTimers)1 TimerUpdate (org.apache.beam.runners.direct.WatermarkManager.TimerUpdate)1 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)1 PCollection (org.apache.beam.sdk.values.PCollection)1