Search in sources :

Example 1 with SparkWatermarks

use of org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks in project beam by apache.

the class CreateStream method advance.

private CreateStream<T> advance(Instant newWatermark) {
    // advance the system time.
    Instant currentSynchronizedProcessingTime = times.peekLast() == null ? initialSystemTime : times.peekLast().getSynchronizedProcessingTime();
    Instant nextSynchronizedProcessingTime = currentSynchronizedProcessingTime.plus(batchInterval);
    checkArgument(nextSynchronizedProcessingTime.isAfter(currentSynchronizedProcessingTime), "Synchronized processing time must always advance.");
    times.offer(new SparkWatermarks(lowWatermark, newWatermark, nextSynchronizedProcessingTime));
    lowWatermark = newWatermark;
    return this;
}
Also used : SparkWatermarks(org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks) Instant(org.joda.time.Instant)

Example 2 with SparkWatermarks

use of org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks in project beam by apache.

the class GlobalWatermarkHolderTest method testLowHighWatermarksAdvance.

@Test
public void testLowHighWatermarksAdvance() {
    JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
    Instant instant = new Instant(0);
    // low == high.
    GlobalWatermarkHolder.add(1, new SparkWatermarks(instant.plus(Duration.millis(5)), instant.plus(Duration.millis(5)), instant));
    GlobalWatermarkHolder.advance(jsc);
    // low < high.
    GlobalWatermarkHolder.add(1, new SparkWatermarks(instant.plus(Duration.millis(10)), instant.plus(Duration.millis(15)), instant.plus(Duration.millis(100))));
    GlobalWatermarkHolder.advance(jsc);
    // assert watermarks in Broadcast.
    SparkWatermarks currentWatermarks = GlobalWatermarkHolder.get().getValue().get(1);
    assertThat(currentWatermarks.getLowWatermark(), equalTo(instant.plus(Duration.millis(10))));
    assertThat(currentWatermarks.getHighWatermark(), equalTo(instant.plus(Duration.millis(15))));
    assertThat(currentWatermarks.getSynchronizedProcessingTime(), equalTo(instant.plus(Duration.millis(100))));
    // assert illegal watermark advance.
    thrown.expect(IllegalStateException.class);
    thrown.expectMessage(RegexMatcher.matches("Low watermark " + INSTANT_PATTERN + " cannot be later then high watermark " + INSTANT_PATTERN));
    // low > high -> not allowed!
    GlobalWatermarkHolder.add(1, new SparkWatermarks(instant.plus(Duration.millis(25)), instant.plus(Duration.millis(20)), instant.plus(Duration.millis(200))));
    GlobalWatermarkHolder.advance(jsc);
}
Also used : SparkWatermarks(org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks) Instant(org.joda.time.Instant) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Test(org.junit.Test)

Example 3 with SparkWatermarks

use of org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks in project beam by apache.

the class GlobalWatermarkHolderTest method testSynchronizedTimeMonotonic.

@Test
public void testSynchronizedTimeMonotonic() {
    JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
    Instant instant = new Instant(0);
    GlobalWatermarkHolder.add(1, new SparkWatermarks(instant.plus(Duration.millis(5)), instant.plus(Duration.millis(10)), instant));
    GlobalWatermarkHolder.advance(jsc);
    thrown.expect(IllegalStateException.class);
    thrown.expectMessage("Synchronized processing time must advance.");
    // no actual advancement of watermarks - fine by Watermarks
    // but not by synchronized processing time.
    GlobalWatermarkHolder.add(1, new SparkWatermarks(instant.plus(Duration.millis(5)), instant.plus(Duration.millis(10)), instant));
    GlobalWatermarkHolder.advance(jsc);
}
Also used : SparkWatermarks(org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks) Instant(org.joda.time.Instant) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Test(org.junit.Test)

Example 4 with SparkWatermarks

use of org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks in project beam by apache.

the class GlobalWatermarkHolderTest method testMultiSource.

@Test
public void testMultiSource() {
    JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
    Instant instant = new Instant(0);
    GlobalWatermarkHolder.add(1, new SparkWatermarks(instant.plus(Duration.millis(5)), instant.plus(Duration.millis(10)), instant));
    GlobalWatermarkHolder.add(2, new SparkWatermarks(instant.plus(Duration.millis(3)), instant.plus(Duration.millis(6)), instant));
    GlobalWatermarkHolder.advance(jsc);
    // assert watermarks for source 1.
    SparkWatermarks watermarksForSource1 = GlobalWatermarkHolder.get().getValue().get(1);
    assertThat(watermarksForSource1.getLowWatermark(), equalTo(instant.plus(Duration.millis(5))));
    assertThat(watermarksForSource1.getHighWatermark(), equalTo(instant.plus(Duration.millis(10))));
    // assert watermarks for source 2.
    SparkWatermarks watermarksForSource2 = GlobalWatermarkHolder.get().getValue().get(2);
    assertThat(watermarksForSource2.getLowWatermark(), equalTo(instant.plus(Duration.millis(3))));
    assertThat(watermarksForSource2.getHighWatermark(), equalTo(instant.plus(Duration.millis(6))));
}
Also used : SparkWatermarks(org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks) Instant(org.joda.time.Instant) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Test(org.junit.Test)

Example 5 with SparkWatermarks

use of org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks in project beam by apache.

the class SparkTimerInternals method forStreamFromSources.

/** Build the {@link TimerInternals} according to the feeding streams. */
public static SparkTimerInternals forStreamFromSources(List<Integer> sourceIds, @Nullable Broadcast<Map<Integer, SparkWatermarks>> broadcast) {
    // if broadcast is invalid for the specific ids, use defaults.
    if (broadcast == null || broadcast.getValue().isEmpty() || Collections.disjoint(sourceIds, broadcast.getValue().keySet())) {
        return new SparkTimerInternals(BoundedWindow.TIMESTAMP_MIN_VALUE, BoundedWindow.TIMESTAMP_MIN_VALUE, new Instant(0));
    }
    // there might be more than one stream feeding this stream, slowest WM is the right one.
    Instant slowestLowWatermark = BoundedWindow.TIMESTAMP_MAX_VALUE;
    Instant slowestHighWatermark = BoundedWindow.TIMESTAMP_MAX_VALUE;
    // synchronized processing time should clearly be synchronized.
    Instant synchronizedProcessingTime = null;
    for (Integer sourceId : sourceIds) {
        SparkWatermarks sparkWatermarks = broadcast.getValue().get(sourceId);
        if (sparkWatermarks != null) {
            // keep slowest WMs.
            slowestLowWatermark = slowestLowWatermark.isBefore(sparkWatermarks.getLowWatermark()) ? slowestLowWatermark : sparkWatermarks.getLowWatermark();
            slowestHighWatermark = slowestHighWatermark.isBefore(sparkWatermarks.getHighWatermark()) ? slowestHighWatermark : sparkWatermarks.getHighWatermark();
            if (synchronizedProcessingTime == null) {
                // firstime set.
                synchronizedProcessingTime = sparkWatermarks.getSynchronizedProcessingTime();
            } else {
                // assert on following.
                checkArgument(sparkWatermarks.getSynchronizedProcessingTime().equals(synchronizedProcessingTime), "Synchronized time is expected to keep synchronized across sources.");
            }
        }
    }
    return new SparkTimerInternals(slowestLowWatermark, slowestHighWatermark, synchronizedProcessingTime);
}
Also used : SparkWatermarks(org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks) Instant(org.joda.time.Instant)

Aggregations

SparkWatermarks (org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks)5 Instant (org.joda.time.Instant)5 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)3 Test (org.junit.Test)3