use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.
the class StatefulDoFnRunnerTest method advanceInputWatermark.
private static void advanceInputWatermark(InMemoryTimerInternals timerInternals, Instant newInputWatermark, DoFnRunner<?, ?> toTrigger) throws Exception {
timerInternals.advanceInputWatermark(newInputWatermark);
TimerInternals.TimerData timer;
while ((timer = timerInternals.removeNextEventTimer()) != null) {
StateNamespace namespace = timer.getNamespace();
checkArgument(namespace instanceof StateNamespaces.WindowNamespace);
BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();
toTrigger.onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
}
}
use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.
the class GroupIntoBatchesTest method testInStreamingMode.
@Test
@Category({ NeedsRunner.class, UsesTimersInParDo.class, UsesTestStream.class, UsesStatefulParDo.class })
public void testInStreamingMode() {
int timestampInterval = 1;
Instant startInstant = new Instant(0L);
TestStream.Builder<KV<String, String>> streamBuilder = TestStream.create(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())).advanceWatermarkTo(startInstant);
long offset = 0L;
for (KV<String, String> element : data) {
streamBuilder = streamBuilder.addElements(TimestampedValue.of(element, startInstant.plus(Duration.standardSeconds(offset * timestampInterval))));
offset++;
}
final long windowDuration = 6;
TestStream<KV<String, String>> stream = streamBuilder.advanceWatermarkTo(startInstant.plus(Duration.standardSeconds(windowDuration - 1))).advanceWatermarkTo(startInstant.plus(Duration.standardSeconds(windowDuration + 1))).advanceWatermarkTo(startInstant.plus(Duration.standardSeconds(NUM_ELEMENTS))).advanceWatermarkToInfinity();
PCollection<KV<String, String>> inputCollection = pipeline.apply(stream).apply(Window.<KV<String, String>>into(FixedWindows.of(Duration.standardSeconds(windowDuration))).withAllowedLateness(Duration.millis(ALLOWED_LATENESS)));
inputCollection.apply(ParDo.of(new DoFn<KV<String, String>, Void>() {
@ProcessElement
public void processElement(ProcessContext c, BoundedWindow window) {
LOG.debug("*** ELEMENT: ({},{}) *** with timestamp %s in window %s", c.element().getKey(), c.element().getValue(), c.timestamp().toString(), window.toString());
}
}));
PCollection<KV<String, Iterable<String>>> outputCollection = inputCollection.apply(GroupIntoBatches.<String, String>ofSize(BATCH_SIZE)).setCoder(KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(StringUtf8Coder.of())));
// elements have the same key and collection is divided into windows,
// so Count.perKey values are the number of elements in windows
PCollection<KV<String, Long>> countOutput = outputCollection.apply("Count elements in windows after applying GroupIntoBatches", Count.<String, Iterable<String>>perKey());
PAssert.that("Wrong number of elements in windows after GroupIntoBatches", countOutput).satisfies(new SerializableFunction<Iterable<KV<String, Long>>, Void>() {
@Override
public Void apply(Iterable<KV<String, Long>> input) {
Iterator<KV<String, Long>> inputIterator = input.iterator();
// first element
long count0 = inputIterator.next().getValue();
// window duration is 6 and batch size is 5, so there should be 2 elements in the
// window (flush because batchSize reached and for end of window reached)
assertEquals("Wrong number of elements in first window", 2, count0);
// second element
long count1 = inputIterator.next().getValue();
// collection is 10 elements, there is only 4 elements left, so there should be only
// one element in the window (flush because end of window/collection reached)
assertEquals("Wrong number of elements in second window", 1, count1);
// third element
return null;
}
});
PAssert.that("Incorrect output collection after GroupIntoBatches", outputCollection).satisfies(new SerializableFunction<Iterable<KV<String, Iterable<String>>>, Void>() {
@Override
public Void apply(Iterable<KV<String, Iterable<String>>> input) {
Iterator<KV<String, Iterable<String>>> inputIterator = input.iterator();
// first element
int size0 = Iterables.size(inputIterator.next().getValue());
// window duration is 6 and batch size is 5, so output batch size should de 5
// (flush because of batchSize reached)
assertEquals("Wrong first element batch Size", 5, size0);
// second element
int size1 = Iterables.size(inputIterator.next().getValue());
// there is only one element left in the window so batch size should be 1
// (flush because of end of window reached)
assertEquals("Wrong second element batch Size", 1, size1);
// third element
int size2 = Iterables.size(inputIterator.next().getValue());
// collection is 10 elements, there is only 4 left, so batch size should be 4
// (flush because end of collection reached)
assertEquals("Wrong third element batch Size", 4, size2);
return null;
}
});
pipeline.run().waitUntilFinish();
}
use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.
the class ParDoTest method testOutOfBoundsEventTimeTimer.
@Test
@Category({ ValidatesRunner.class, UsesTimersInParDo.class })
public void testOutOfBoundsEventTimeTimer() throws Exception {
final String timerId = "foo";
DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@ProcessElement
public void processElement(ProcessContext context, BoundedWindow window, @TimerId(timerId) Timer timer) {
timer.set(window.maxTimestamp().plus(1L));
}
@OnTimer(timerId)
public void onTimer(OnTimerContext context) {
}
};
PCollection<Integer> output = pipeline.apply(Create.of(KV.of("hello", 37))).apply(ParDo.of(fn));
thrown.expect(RuntimeException.class);
// Note that runners can reasonably vary their message - this matcher should be flexible
// and can be evolved.
thrown.expectMessage("event time timer");
thrown.expectMessage("expiration");
pipeline.run();
}
use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.
the class ParDoTest method testEventTimeTimerMultipleKeys.
/**
* Tests that event time timers for multiple keys both fire. This particularly exercises
* implementations that may GC in ways not simply governed by the watermark.
*/
@Test
@Category({ ValidatesRunner.class, UsesTimersInParDo.class })
public void testEventTimeTimerMultipleKeys() throws Exception {
final String timerId = "foo";
final String stateId = "sizzle";
final int offset = 5000;
final int timerOutput = 4093;
DoFn<KV<String, Integer>, KV<String, Integer>> fn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@StateId(stateId)
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
timer.set(window.maxTimestamp());
state.write(context.element().getKey());
context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
}
@OnTimer(timerId)
public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
context.output(KV.of(state.read(), timerOutput));
}
};
// Enough keys that we exercise interesting code paths
int numKeys = 50;
List<KV<String, Integer>> input = new ArrayList<>();
List<KV<String, Integer>> expectedOutput = new ArrayList<>();
for (Integer key = 0; key < numKeys; ++key) {
// Each key should have just one final output at GC time
expectedOutput.add(KV.of(key.toString(), timerOutput));
for (int i = 0; i < 15; ++i) {
// Each input should be output with the offset added
input.add(KV.of(key.toString(), i));
expectedOutput.add(KV.of(key.toString(), i + offset));
}
}
Collections.shuffle(input);
PCollection<KV<String, Integer>> output = pipeline.apply(Create.of(input)).apply(ParDo.of(fn));
PAssert.that(output).containsInAnyOrder(expectedOutput);
pipeline.run();
}
use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.
the class DoFnTesterTest method testSupportsWindowParameter.
@Test
public void testSupportsWindowParameter() throws Exception {
Instant now = Instant.now();
try (DoFnTester<Integer, KV<Integer, BoundedWindow>> tester = DoFnTester.of(new DoFnWithWindowParameter())) {
BoundedWindow firstWindow = new IntervalWindow(now, now.plus(Duration.standardMinutes(1)));
tester.processWindowedElement(1, now, firstWindow);
tester.processWindowedElement(2, now, firstWindow);
BoundedWindow secondWindow = new IntervalWindow(now, now.plus(Duration.standardMinutes(4)));
tester.processWindowedElement(3, now, secondWindow);
tester.finishBundle();
assertThat(tester.peekOutputElementsInWindow(firstWindow), containsInAnyOrder(TimestampedValue.of(KV.of(1, firstWindow), now), TimestampedValue.of(KV.of(2, firstWindow), now)));
assertThat(tester.peekOutputElementsInWindow(secondWindow), containsInAnyOrder(TimestampedValue.of(KV.of(3, secondWindow), now)));
}
}
Aggregations