Search in sources :

Example 1 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method updateWatermarkWithLateData.

/**
   * Demonstrates that updateWatermarks in the presence of late data is monotonic.
   */
@Test
public void updateWatermarkWithLateData() {
    Instant sourceWatermark = new Instant(1_000_000L);
    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts, TimestampedValue.of(1, sourceWatermark), TimestampedValue.of(2, new Instant(1234L)));
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(createdBundle)), sourceWatermark);
    CommittedBundle<KV<String, Integer>> keyBundle = timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), sourceWatermark), TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)));
    // Finish processing the on-time data. The watermarks should progress to be equal to the source
    manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(keyBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    TransformWatermarks onTimeWatermarks = manager.getWatermarks(graph.getProducer(keyed));
    assertThat(onTimeWatermarks.getInputWatermark(), equalTo(sourceWatermark));
    assertThat(onTimeWatermarks.getOutputWatermark(), equalTo(sourceWatermark));
    CommittedBundle<Integer> lateDataBundle = timestampedBundle(createdInts, TimestampedValue.of(3, new Instant(-1000L)));
    // the late data arrives in a downstream PCollection after its watermark has advanced past it;
    // we don't advance the watermark past the current watermark until we've consumed the late data
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(lateDataBundle)), new Instant(2_000_000L));
    manager.refreshAll();
    TransformWatermarks bufferedLateWm = manager.getWatermarks(graph.getProducer(createdInts));
    assertThat(bufferedLateWm.getOutputWatermark(), equalTo(new Instant(2_000_000L)));
    // The input watermark should be held to its previous value (not advanced due to late data; not
    // moved backwards in the presence of watermarks due to monotonicity).
    TransformWatermarks lateDataBufferedWatermark = manager.getWatermarks(graph.getProducer(keyed));
    assertThat(lateDataBufferedWatermark.getInputWatermark(), not(earlierThan(sourceWatermark)));
    assertThat(lateDataBufferedWatermark.getOutputWatermark(), not(earlierThan(sourceWatermark)));
    CommittedBundle<KV<String, Integer>> lateKeyedBundle = timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
    manager.updateWatermarks(lateDataBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), lateDataBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(lateKeyedBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Example 2 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method updateWatermarkWithUnprocessedElements.

@Test
public void updateWatermarkWithUnprocessedElements() {
    WindowedValue<Integer> first = WindowedValue.valueInGlobalWindow(1);
    WindowedValue<Integer> second = WindowedValue.timestampedValueInGlobalWindow(2, new Instant(-1000L));
    WindowedValue<Integer> third = WindowedValue.timestampedValueInGlobalWindow(3, new Instant(1234L));
    CommittedBundle<Integer> createdBundle = bundleFactory.createBundle(createdInts).add(first).add(second).add(third).commit(clock.now());
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(createdBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    CommittedBundle<KV<String, Integer>> keyBundle = timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), BoundedWindow.TIMESTAMP_MIN_VALUE));
    manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), createdBundle.withElements(ImmutableList.of(second, third)), Collections.<CommittedBundle<?>>singleton(keyBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    TransformWatermarks keyedWatermarks = manager.getWatermarks(graph.getProducer(keyed));
    // the unprocessed second and third are readded to pending
    assertThat(keyedWatermarks.getInputWatermark(), not(laterThan(new Instant(-1000L))));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Example 3 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method getWatermarkForUntouchedTransform.

/**
   * Demonstrates that getWatermark, when called on an {@link AppliedPTransform} that has not
   * processed any elements, returns the {@link BoundedWindow#TIMESTAMP_MIN_VALUE}.
   */
@Test
public void getWatermarkForUntouchedTransform() {
    TransformWatermarks watermarks = manager.getWatermarks(graph.getProducer(createdInts));
    assertThat(watermarks.getInputWatermark(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
    assertThat(watermarks.getOutputWatermark(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) Test(org.junit.Test)

Example 4 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method updateWatermarkWithDifferentWindowedValueInstances.

public void updateWatermarkWithDifferentWindowedValueInstances() {
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(bundleFactory.createBundle(createdInts).add(WindowedValue.valueInGlobalWindow(1)).commit(Instant.now()))), BoundedWindow.TIMESTAMP_MAX_VALUE);
    CommittedBundle<Integer> createdBundle = bundleFactory.createBundle(createdInts).add(WindowedValue.valueInGlobalWindow(1)).commit(Instant.now());
    manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), null);
    manager.refreshAll();
    TransformWatermarks onTimeWatermarks = manager.getWatermarks(graph.getProducer(keyed));
    assertThat(onTimeWatermarks.getInputWatermark(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks)

Example 5 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method getWatermarksAfterHoldAndEmptyOutput.

/**
   * Demonstrates that after watermarks of an upstream transform are updated, but no output has been
   * produced, and the downstream transform has a watermark hold, the watermark is held to the hold.
   */
@Test
public void getWatermarksAfterHoldAndEmptyOutput() {
    CommittedBundle<Integer> firstCreateOutput = multiWindowedBundle(createdInts, 1, 2);
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(firstCreateOutput)), new Instant(12_000L));
    CommittedBundle<Integer> firstFilterOutput = multiWindowedBundle(filtered);
    manager.updateWatermarks(firstCreateOutput, TimerUpdate.empty(), result(graph.getProducer(filtered), firstCreateOutput.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(firstFilterOutput)), new Instant(10_000L));
    manager.refreshAll();
    TransformWatermarks firstFilterWatermarks = manager.getWatermarks(graph.getProducer(filtered));
    assertThat(firstFilterWatermarks.getInputWatermark(), not(earlierThan(new Instant(12_000L))));
    assertThat(firstFilterWatermarks.getOutputWatermark(), not(laterThan(new Instant(10_000L))));
    CommittedBundle<Integer> emptyCreateOutput = multiWindowedBundle(createdInts);
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(emptyCreateOutput)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    TransformWatermarks updatedSourceWatermarks = manager.getWatermarks(graph.getProducer(createdInts));
    assertThat(updatedSourceWatermarks.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    TransformWatermarks finishedFilterWatermarks = manager.getWatermarks(graph.getProducer(filtered));
    assertThat(finishedFilterWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    assertThat(finishedFilterWatermarks.getOutputWatermark(), not(laterThan(new Instant(10_000L))));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) Test(org.junit.Test)

Aggregations

TransformWatermarks (org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks)22 Instant (org.joda.time.Instant)18 Test (org.junit.Test)18 ReadableInstant (org.joda.time.ReadableInstant)17 KV (org.apache.beam.sdk.values.KV)5 TimerData (org.apache.beam.runners.core.TimerInternals.TimerData)2 ImmutableList (com.google.common.collect.ImmutableList)1 Collection (java.util.Collection)1 FiredTimers (org.apache.beam.runners.direct.WatermarkManager.FiredTimers)1 TimerUpdate (org.apache.beam.runners.direct.WatermarkManager.TimerUpdate)1 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)1 PCollection (org.apache.beam.sdk.values.PCollection)1