Search in sources :

Example 6 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method updateWatermarkWithWatermarkHolds.

/**
   * Demonstrates that the watermark of an {@link AppliedPTransform} is held to the provided
   * watermark hold.
   */
@Test
public void updateWatermarkWithWatermarkHolds() {
    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts, TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)), TimestampedValue.of(3, new Instant(-1000L)));
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(createdBundle)), new Instant(Long.MAX_VALUE));
    CommittedBundle<KV<String, Integer>> keyBundle = timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)), TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)), TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
    manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(keyBundle)), new Instant(500L));
    manager.refreshAll();
    TransformWatermarks keyedWatermarks = manager.getWatermarks(graph.getProducer(keyed));
    assertThat(keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    assertThat(keyedWatermarks.getOutputWatermark(), not(laterThan(new Instant(500L))));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Example 7 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method updateWatermarkWithHoldsShouldBeMonotonic.

/**
   * Demonstrates that updated output watermarks are monotonic in the presence of watermark holds
   * that become earlier than a previous watermark hold.
   */
@Test
public void updateWatermarkWithHoldsShouldBeMonotonic() {
    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts, TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)), TimestampedValue.of(3, new Instant(-1000L)));
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(createdBundle)), new Instant(Long.MAX_VALUE));
    CommittedBundle<KV<String, Integer>> keyBundle = timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)), TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)), TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
    manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(keyBundle)), new Instant(500L));
    manager.refreshAll();
    TransformWatermarks keyedWatermarks = manager.getWatermarks(graph.getProducer(keyed));
    assertThat(keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    assertThat(keyedWatermarks.getOutputWatermark(), not(laterThan(new Instant(500L))));
    Instant oldOutputWatermark = keyedWatermarks.getOutputWatermark();
    TransformWatermarks updatedWatermarks = manager.getWatermarks(graph.getProducer(keyed));
    assertThat(updatedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    // We added a hold prior to the old watermark; we shouldn't progress (due to the earlier hold)
    // but the watermark is monotonic and should not backslide to the new, earlier hold
    assertThat(updatedWatermarks.getOutputWatermark(), equalTo(oldOutputWatermark));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Example 8 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method getSynchronizedProcessingTimeOutputHeldToPendingTimers.

/**
   * Demonstrates that the Synchronized Processing Time output watermark cannot progress past
   * pending timers in the same set. This propagates to all downstream SynchronizedProcessingTimes.
   *
   * <p>Also demonstrate that the result is monotonic.
   */
//  @Test
public void getSynchronizedProcessingTimeOutputHeldToPendingTimers() {
    CommittedBundle<Integer> createdBundle = multiWindowedBundle(createdInts, 1, 2, 4, 8);
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(createdBundle)), new Instant(1248L));
    manager.refreshAll();
    TransformWatermarks filteredWms = manager.getWatermarks(graph.getProducer(filtered));
    TransformWatermarks filteredDoubledWms = manager.getWatermarks(graph.getProducer(filteredTimesTwo));
    Instant initialFilteredWm = filteredWms.getSynchronizedProcessingOutputTime();
    Instant initialFilteredDoubledWm = filteredDoubledWms.getSynchronizedProcessingOutputTime();
    StructuralKey<String> key = StructuralKey.of("key", StringUtf8Coder.of());
    CommittedBundle<Integer> filteredBundle = multiWindowedBundle(filtered, 2, 8);
    TimerData pastTimer = TimerData.of(StateNamespaces.global(), new Instant(250L), TimeDomain.PROCESSING_TIME);
    TimerData futureTimer = TimerData.of(StateNamespaces.global(), new Instant(4096L), TimeDomain.PROCESSING_TIME);
    TimerUpdate timers = TimerUpdate.builder(key).setTimer(pastTimer).setTimer(futureTimer).build();
    manager.updateWatermarks(createdBundle, timers, result(graph.getProducer(filtered), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(filteredBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    Instant startTime = clock.now();
    clock.set(startTime.plus(250L));
    // We're held based on the past timer
    assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
    assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
    // And we're monotonic
    assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(earlierThan(initialFilteredWm)));
    assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), not(earlierThan(initialFilteredDoubledWm)));
    Collection<FiredTimers> firedTimers = manager.extractFiredTimers();
    assertThat(Iterables.getOnlyElement(firedTimers).getTimers(), contains(pastTimer));
    // Our timer has fired, but has not been completed, so it holds our synchronized processing WM
    assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
    assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), not(laterThan(startTime)));
    CommittedBundle<Integer> filteredTimerBundle = bundleFactory.createKeyedBundle(key, filtered).commit(BoundedWindow.TIMESTAMP_MAX_VALUE);
    CommittedBundle<Integer> filteredTimerResult = bundleFactory.createKeyedBundle(key, filteredTimesTwo).commit(filteredWms.getSynchronizedProcessingOutputTime());
    // Complete the processing time timer
    manager.updateWatermarks(filteredTimerBundle, TimerUpdate.builder(key).withCompletedTimers(Collections.<TimerData>singleton(pastTimer)).build(), result(graph.getProducer(filtered), filteredTimerBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(filteredTimerResult)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    clock.set(startTime.plus(500L));
    assertThat(filteredWms.getSynchronizedProcessingOutputTime(), not(laterThan(clock.now())));
    // filtered should be held to the time at which the filteredTimerResult fired
    assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), not(earlierThan(filteredTimerResult.getSynchronizedProcessingOutputWatermark())));
    manager.updateWatermarks(filteredTimerResult, TimerUpdate.empty(), result(graph.getProducer(filteredTimesTwo), filteredTimerResult.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList()), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), equalTo(clock.now()));
    clock.set(new Instant(Long.MAX_VALUE));
    assertThat(filteredWms.getSynchronizedProcessingOutputTime(), equalTo(new Instant(4096)));
    assertThat(filteredDoubledWms.getSynchronizedProcessingOutputTime(), equalTo(new Instant(4096)));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) TimerUpdate(org.apache.beam.runners.direct.WatermarkManager.TimerUpdate) FiredTimers(org.apache.beam.runners.direct.WatermarkManager.FiredTimers) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData)

Example 9 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method getWatermarkMultiIdenticalInput.

/**
   * Demonstrates that getWatermark for a transform that takes multiple inputs is held to the
   * minimum watermark across all of its inputs.
   */
@Test
public void getWatermarkMultiIdenticalInput() {
    PCollection<Integer> created = p.apply(Create.of(1, 2, 3));
    PCollection<Integer> multiConsumer = PCollectionList.of(created).and(created).apply(Flatten.<Integer>pCollections());
    DirectGraphVisitor graphVisitor = new DirectGraphVisitor();
    p.traverseTopologically(graphVisitor);
    DirectGraph graph = graphVisitor.getGraph();
    AppliedPTransform<?, ?, ?> theFlatten = graph.getProducer(multiConsumer);
    WatermarkManager tstMgr = WatermarkManager.create(clock, graph);
    CommittedBundle<Void> root = bundleFactory.<Void>createRootBundle().add(WindowedValue.<Void>valueInGlobalWindow(null)).commit(clock.now());
    CommittedBundle<Integer> createBundle = bundleFactory.createBundle(created).add(WindowedValue.timestampedValueInGlobalWindow(1, new Instant(33536))).commit(clock.now());
    Map<AppliedPTransform<?, ?, ?>, Collection<CommittedBundle<?>>> initialInputs = ImmutableMap.<AppliedPTransform<?, ?, ?>, Collection<CommittedBundle<?>>>builder().put(graph.getProducer(created), Collections.<CommittedBundle<?>>singleton(root)).build();
    tstMgr.initialize(initialInputs);
    tstMgr.updateWatermarks(root, TimerUpdate.empty(), CommittedResult.create(StepTransformResult.withoutHold(graph.getProducer(created)).build(), root.withElements(Collections.<WindowedValue<Void>>emptyList()), Collections.singleton(createBundle), EnumSet.allOf(OutputType.class)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    tstMgr.refreshAll();
    TransformWatermarks flattenWms = tstMgr.getWatermarks(theFlatten);
    assertThat(flattenWms.getInputWatermark(), equalTo(new Instant(33536)));
    tstMgr.updateWatermarks(createBundle, TimerUpdate.empty(), CommittedResult.create(StepTransformResult.withoutHold(theFlatten).build(), createBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList(), EnumSet.allOf(OutputType.class)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    tstMgr.refreshAll();
    assertThat(flattenWms.getInputWatermark(), equalTo(new Instant(33536)));
    tstMgr.updateWatermarks(createBundle, TimerUpdate.empty(), CommittedResult.create(StepTransformResult.withoutHold(theFlatten).build(), createBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>emptyList(), EnumSet.allOf(OutputType.class)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    tstMgr.refreshAll();
    assertThat(flattenWms.getInputWatermark(), equalTo(BoundedWindow.TIMESTAMP_MAX_VALUE));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) Test(org.junit.Test)

Example 10 with TransformWatermarks

use of org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks in project beam by apache.

the class WatermarkManagerTest method getWatermarkForMultiConsumedCollection.

/**
   * Demonstrates that pending elements are independent among
   * {@link AppliedPTransform AppliedPTransforms} that consume the same input {@link PCollection}.
   */
@Test
public void getWatermarkForMultiConsumedCollection() {
    CommittedBundle<Integer> createdBundle = timestampedBundle(createdInts, TimestampedValue.of(1, new Instant(1_000_000L)), TimestampedValue.of(2, new Instant(1234L)), TimestampedValue.of(3, new Instant(-1000L)));
    manager.updateWatermarks(null, TimerUpdate.empty(), result(graph.getProducer(createdInts), null, Collections.<CommittedBundle<?>>singleton(createdBundle)), new Instant(Long.MAX_VALUE));
    manager.refreshAll();
    TransformWatermarks createdAfterProducing = manager.getWatermarks(graph.getProducer(createdInts));
    assertThat(createdAfterProducing.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    CommittedBundle<KV<String, Integer>> keyBundle = timestampedBundle(keyed, TimestampedValue.of(KV.of("MyKey", 1), new Instant(1_000_000L)), TimestampedValue.of(KV.of("MyKey", 2), new Instant(1234L)), TimestampedValue.of(KV.of("MyKey", 3), new Instant(-1000L)));
    manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(keyed), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(keyBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    TransformWatermarks keyedWatermarks = manager.getWatermarks(graph.getProducer(keyed));
    assertThat(keyedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    assertThat(keyedWatermarks.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    TransformWatermarks filteredWatermarks = manager.getWatermarks(graph.getProducer(filtered));
    assertThat(filteredWatermarks.getInputWatermark(), not(laterThan(new Instant(-1000L))));
    assertThat(filteredWatermarks.getOutputWatermark(), not(laterThan(new Instant(-1000L))));
    CommittedBundle<Integer> filteredBundle = timestampedBundle(filtered, TimestampedValue.of(2, new Instant(1234L)));
    manager.updateWatermarks(createdBundle, TimerUpdate.empty(), result(graph.getProducer(filtered), createdBundle.withElements(Collections.<WindowedValue<Integer>>emptyList()), Collections.<CommittedBundle<?>>singleton(filteredBundle)), BoundedWindow.TIMESTAMP_MAX_VALUE);
    manager.refreshAll();
    TransformWatermarks filteredProcessedWatermarks = manager.getWatermarks(graph.getProducer(filtered));
    assertThat(filteredProcessedWatermarks.getInputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
    assertThat(filteredProcessedWatermarks.getOutputWatermark(), not(earlierThan(BoundedWindow.TIMESTAMP_MAX_VALUE)));
}
Also used : TransformWatermarks(org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks) ReadableInstant(org.joda.time.ReadableInstant) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Aggregations

TransformWatermarks (org.apache.beam.runners.direct.WatermarkManager.TransformWatermarks)22 Instant (org.joda.time.Instant)18 Test (org.junit.Test)18 ReadableInstant (org.joda.time.ReadableInstant)17 KV (org.apache.beam.sdk.values.KV)5 TimerData (org.apache.beam.runners.core.TimerInternals.TimerData)2 ImmutableList (com.google.common.collect.ImmutableList)1 Collection (java.util.Collection)1 FiredTimers (org.apache.beam.runners.direct.WatermarkManager.FiredTimers)1 TimerUpdate (org.apache.beam.runners.direct.WatermarkManager.TimerUpdate)1 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)1 PCollection (org.apache.beam.sdk.values.PCollection)1