use of org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks in project beam by apache.
the class CreateStream method advance.
private CreateStream<T> advance(Instant newWatermark) {
// advance the system time.
Instant currentSynchronizedProcessingTime = times.peekLast() == null ? initialSystemTime : times.peekLast().getSynchronizedProcessingTime();
Instant nextSynchronizedProcessingTime = currentSynchronizedProcessingTime.plus(batchInterval);
checkArgument(nextSynchronizedProcessingTime.isAfter(currentSynchronizedProcessingTime), "Synchronized processing time must always advance.");
times.offer(new SparkWatermarks(lowWatermark, newWatermark, nextSynchronizedProcessingTime));
lowWatermark = newWatermark;
return this;
}
use of org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks in project beam by apache.
the class GlobalWatermarkHolderTest method testLowHighWatermarksAdvance.
@Test
public void testLowHighWatermarksAdvance() {
JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
Instant instant = new Instant(0);
// low == high.
GlobalWatermarkHolder.add(1, new SparkWatermarks(instant.plus(Duration.millis(5)), instant.plus(Duration.millis(5)), instant));
GlobalWatermarkHolder.advance(jsc);
// low < high.
GlobalWatermarkHolder.add(1, new SparkWatermarks(instant.plus(Duration.millis(10)), instant.plus(Duration.millis(15)), instant.plus(Duration.millis(100))));
GlobalWatermarkHolder.advance(jsc);
// assert watermarks in Broadcast.
SparkWatermarks currentWatermarks = GlobalWatermarkHolder.get().getValue().get(1);
assertThat(currentWatermarks.getLowWatermark(), equalTo(instant.plus(Duration.millis(10))));
assertThat(currentWatermarks.getHighWatermark(), equalTo(instant.plus(Duration.millis(15))));
assertThat(currentWatermarks.getSynchronizedProcessingTime(), equalTo(instant.plus(Duration.millis(100))));
// assert illegal watermark advance.
thrown.expect(IllegalStateException.class);
thrown.expectMessage(RegexMatcher.matches("Low watermark " + INSTANT_PATTERN + " cannot be later then high watermark " + INSTANT_PATTERN));
// low > high -> not allowed!
GlobalWatermarkHolder.add(1, new SparkWatermarks(instant.plus(Duration.millis(25)), instant.plus(Duration.millis(20)), instant.plus(Duration.millis(200))));
GlobalWatermarkHolder.advance(jsc);
}
use of org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks in project beam by apache.
the class GlobalWatermarkHolderTest method testSynchronizedTimeMonotonic.
@Test
public void testSynchronizedTimeMonotonic() {
JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
Instant instant = new Instant(0);
GlobalWatermarkHolder.add(1, new SparkWatermarks(instant.plus(Duration.millis(5)), instant.plus(Duration.millis(10)), instant));
GlobalWatermarkHolder.advance(jsc);
thrown.expect(IllegalStateException.class);
thrown.expectMessage("Synchronized processing time must advance.");
// no actual advancement of watermarks - fine by Watermarks
// but not by synchronized processing time.
GlobalWatermarkHolder.add(1, new SparkWatermarks(instant.plus(Duration.millis(5)), instant.plus(Duration.millis(10)), instant));
GlobalWatermarkHolder.advance(jsc);
}
use of org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks in project beam by apache.
the class GlobalWatermarkHolderTest method testMultiSource.
@Test
public void testMultiSource() {
JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
Instant instant = new Instant(0);
GlobalWatermarkHolder.add(1, new SparkWatermarks(instant.plus(Duration.millis(5)), instant.plus(Duration.millis(10)), instant));
GlobalWatermarkHolder.add(2, new SparkWatermarks(instant.plus(Duration.millis(3)), instant.plus(Duration.millis(6)), instant));
GlobalWatermarkHolder.advance(jsc);
// assert watermarks for source 1.
SparkWatermarks watermarksForSource1 = GlobalWatermarkHolder.get().getValue().get(1);
assertThat(watermarksForSource1.getLowWatermark(), equalTo(instant.plus(Duration.millis(5))));
assertThat(watermarksForSource1.getHighWatermark(), equalTo(instant.plus(Duration.millis(10))));
// assert watermarks for source 2.
SparkWatermarks watermarksForSource2 = GlobalWatermarkHolder.get().getValue().get(2);
assertThat(watermarksForSource2.getLowWatermark(), equalTo(instant.plus(Duration.millis(3))));
assertThat(watermarksForSource2.getHighWatermark(), equalTo(instant.plus(Duration.millis(6))));
}
use of org.apache.beam.runners.spark.util.GlobalWatermarkHolder.SparkWatermarks in project beam by apache.
the class SparkTimerInternals method forStreamFromSources.
/** Build the {@link TimerInternals} according to the feeding streams. */
public static SparkTimerInternals forStreamFromSources(List<Integer> sourceIds, @Nullable Broadcast<Map<Integer, SparkWatermarks>> broadcast) {
// if broadcast is invalid for the specific ids, use defaults.
if (broadcast == null || broadcast.getValue().isEmpty() || Collections.disjoint(sourceIds, broadcast.getValue().keySet())) {
return new SparkTimerInternals(BoundedWindow.TIMESTAMP_MIN_VALUE, BoundedWindow.TIMESTAMP_MIN_VALUE, new Instant(0));
}
// there might be more than one stream feeding this stream, slowest WM is the right one.
Instant slowestLowWatermark = BoundedWindow.TIMESTAMP_MAX_VALUE;
Instant slowestHighWatermark = BoundedWindow.TIMESTAMP_MAX_VALUE;
// synchronized processing time should clearly be synchronized.
Instant synchronizedProcessingTime = null;
for (Integer sourceId : sourceIds) {
SparkWatermarks sparkWatermarks = broadcast.getValue().get(sourceId);
if (sparkWatermarks != null) {
// keep slowest WMs.
slowestLowWatermark = slowestLowWatermark.isBefore(sparkWatermarks.getLowWatermark()) ? slowestLowWatermark : sparkWatermarks.getLowWatermark();
slowestHighWatermark = slowestHighWatermark.isBefore(sparkWatermarks.getHighWatermark()) ? slowestHighWatermark : sparkWatermarks.getHighWatermark();
if (synchronizedProcessingTime == null) {
// firstime set.
synchronizedProcessingTime = sparkWatermarks.getSynchronizedProcessingTime();
} else {
// assert on following.
checkArgument(sparkWatermarks.getSynchronizedProcessingTime().equals(synchronizedProcessingTime), "Synchronized time is expected to keep synchronized across sources.");
}
}
}
return new SparkTimerInternals(slowestLowWatermark, slowestHighWatermark, synchronizedProcessingTime);
}
Aggregations