Search in sources :

Example 1 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class StatefulDoFnRunnerTest method advanceInputWatermark.

private static void advanceInputWatermark(InMemoryTimerInternals timerInternals, Instant newInputWatermark, DoFnRunner<?, ?> toTrigger) throws Exception {
    timerInternals.advanceInputWatermark(newInputWatermark);
    TimerInternals.TimerData timer;
    while ((timer = timerInternals.removeNextEventTimer()) != null) {
        StateNamespace namespace = timer.getNamespace();
        checkArgument(namespace instanceof StateNamespaces.WindowNamespace);
        BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();
        toTrigger.onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
    }
}
Also used : BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow)

Example 2 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class GroupIntoBatchesTest method testInStreamingMode.

@Test
@Category({ NeedsRunner.class, UsesTimersInParDo.class, UsesTestStream.class, UsesStatefulParDo.class })
public void testInStreamingMode() {
    int timestampInterval = 1;
    Instant startInstant = new Instant(0L);
    TestStream.Builder<KV<String, String>> streamBuilder = TestStream.create(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())).advanceWatermarkTo(startInstant);
    long offset = 0L;
    for (KV<String, String> element : data) {
        streamBuilder = streamBuilder.addElements(TimestampedValue.of(element, startInstant.plus(Duration.standardSeconds(offset * timestampInterval))));
        offset++;
    }
    final long windowDuration = 6;
    TestStream<KV<String, String>> stream = streamBuilder.advanceWatermarkTo(startInstant.plus(Duration.standardSeconds(windowDuration - 1))).advanceWatermarkTo(startInstant.plus(Duration.standardSeconds(windowDuration + 1))).advanceWatermarkTo(startInstant.plus(Duration.standardSeconds(NUM_ELEMENTS))).advanceWatermarkToInfinity();
    PCollection<KV<String, String>> inputCollection = pipeline.apply(stream).apply(Window.<KV<String, String>>into(FixedWindows.of(Duration.standardSeconds(windowDuration))).withAllowedLateness(Duration.millis(ALLOWED_LATENESS)));
    inputCollection.apply(ParDo.of(new DoFn<KV<String, String>, Void>() {

        @ProcessElement
        public void processElement(ProcessContext c, BoundedWindow window) {
            LOG.debug("*** ELEMENT: ({},{}) *** with timestamp %s in window %s", c.element().getKey(), c.element().getValue(), c.timestamp().toString(), window.toString());
        }
    }));
    PCollection<KV<String, Iterable<String>>> outputCollection = inputCollection.apply(GroupIntoBatches.<String, String>ofSize(BATCH_SIZE)).setCoder(KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(StringUtf8Coder.of())));
    // elements have the same key and collection is divided into windows,
    // so Count.perKey values are the number of elements in windows
    PCollection<KV<String, Long>> countOutput = outputCollection.apply("Count elements in windows after applying GroupIntoBatches", Count.<String, Iterable<String>>perKey());
    PAssert.that("Wrong number of elements in windows after GroupIntoBatches", countOutput).satisfies(new SerializableFunction<Iterable<KV<String, Long>>, Void>() {

        @Override
        public Void apply(Iterable<KV<String, Long>> input) {
            Iterator<KV<String, Long>> inputIterator = input.iterator();
            // first element
            long count0 = inputIterator.next().getValue();
            // window duration is 6 and batch size is 5, so there should be 2 elements in the
            // window (flush because batchSize reached and for end of window reached)
            assertEquals("Wrong number of elements in first window", 2, count0);
            // second element
            long count1 = inputIterator.next().getValue();
            // collection is 10 elements, there is only 4 elements left, so there should be only
            // one element in the window (flush because end of window/collection reached)
            assertEquals("Wrong number of elements in second window", 1, count1);
            // third element
            return null;
        }
    });
    PAssert.that("Incorrect output collection after GroupIntoBatches", outputCollection).satisfies(new SerializableFunction<Iterable<KV<String, Iterable<String>>>, Void>() {

        @Override
        public Void apply(Iterable<KV<String, Iterable<String>>> input) {
            Iterator<KV<String, Iterable<String>>> inputIterator = input.iterator();
            // first element
            int size0 = Iterables.size(inputIterator.next().getValue());
            // window duration is 6 and batch size is 5, so output batch size should de 5
            // (flush because of batchSize reached)
            assertEquals("Wrong first element batch Size", 5, size0);
            // second element
            int size1 = Iterables.size(inputIterator.next().getValue());
            // there is only one element left in the window so batch size should be 1
            // (flush because of end of window reached)
            assertEquals("Wrong second element batch Size", 1, size1);
            // third element
            int size2 = Iterables.size(inputIterator.next().getValue());
            // collection is 10 elements, there is only 4 left, so batch size should be 4
            // (flush because end of collection reached)
            assertEquals("Wrong third element batch Size", 4, size2);
            return null;
        }
    });
    pipeline.run().waitUntilFinish();
}
Also used : Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Iterator(java.util.Iterator) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) UsesTestStream(org.apache.beam.sdk.testing.UsesTestStream) TestStream(org.apache.beam.sdk.testing.TestStream) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 3 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class ParDoTest method testOutOfBoundsEventTimeTimer.

@Test
@Category({ ValidatesRunner.class, UsesTimersInParDo.class })
public void testOutOfBoundsEventTimeTimer() throws Exception {
    final String timerId = "foo";
    DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {

        @TimerId(timerId)
        private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @ProcessElement
        public void processElement(ProcessContext context, BoundedWindow window, @TimerId(timerId) Timer timer) {
            timer.set(window.maxTimestamp().plus(1L));
        }

        @OnTimer(timerId)
        public void onTimer(OnTimerContext context) {
        }
    };
    PCollection<Integer> output = pipeline.apply(Create.of(KV.of("hello", 37))).apply(ParDo.of(fn));
    thrown.expect(RuntimeException.class);
    // Note that runners can reasonably vary their message - this matcher should be flexible
    // and can be evolved.
    thrown.expectMessage("event time timer");
    thrown.expectMessage("expiration");
    pipeline.run();
}
Also used : OnTimer(org.apache.beam.sdk.transforms.DoFn.OnTimer) Timer(org.apache.beam.sdk.state.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) TimerSpec(org.apache.beam.sdk.state.TimerSpec) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 4 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class ParDoTest method testEventTimeTimerMultipleKeys.

/**
   * Tests that event time timers for multiple keys both fire. This particularly exercises
   * implementations that may GC in ways not simply governed by the watermark.
   */
@Test
@Category({ ValidatesRunner.class, UsesTimersInParDo.class })
public void testEventTimeTimerMultipleKeys() throws Exception {
    final String timerId = "foo";
    final String stateId = "sizzle";
    final int offset = 5000;
    final int timerOutput = 4093;
    DoFn<KV<String, Integer>, KV<String, Integer>> fn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {

        @TimerId(timerId)
        private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @StateId(stateId)
        private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
            timer.set(window.maxTimestamp());
            state.write(context.element().getKey());
            context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
        }

        @OnTimer(timerId)
        public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
            context.output(KV.of(state.read(), timerOutput));
        }
    };
    // Enough keys that we exercise interesting code paths
    int numKeys = 50;
    List<KV<String, Integer>> input = new ArrayList<>();
    List<KV<String, Integer>> expectedOutput = new ArrayList<>();
    for (Integer key = 0; key < numKeys; ++key) {
        // Each key should have just one final output at GC time
        expectedOutput.add(KV.of(key.toString(), timerOutput));
        for (int i = 0; i < 15; ++i) {
            // Each input should be output with the offset added
            input.add(KV.of(key.toString(), i));
            expectedOutput.add(KV.of(key.toString(), i + offset));
        }
    }
    Collections.shuffle(input);
    PCollection<KV<String, Integer>> output = pipeline.apply(Create.of(input)).apply(ParDo.of(fn));
    PAssert.that(output).containsInAnyOrder(expectedOutput);
    pipeline.run();
}
Also used : ArrayList(java.util.ArrayList) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) OnTimer(org.apache.beam.sdk.transforms.DoFn.OnTimer) Timer(org.apache.beam.sdk.state.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TimerSpec(org.apache.beam.sdk.state.TimerSpec) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 5 with BoundedWindow

use of org.apache.beam.sdk.transforms.windowing.BoundedWindow in project beam by apache.

the class DoFnTesterTest method testSupportsWindowParameter.

@Test
public void testSupportsWindowParameter() throws Exception {
    Instant now = Instant.now();
    try (DoFnTester<Integer, KV<Integer, BoundedWindow>> tester = DoFnTester.of(new DoFnWithWindowParameter())) {
        BoundedWindow firstWindow = new IntervalWindow(now, now.plus(Duration.standardMinutes(1)));
        tester.processWindowedElement(1, now, firstWindow);
        tester.processWindowedElement(2, now, firstWindow);
        BoundedWindow secondWindow = new IntervalWindow(now, now.plus(Duration.standardMinutes(4)));
        tester.processWindowedElement(3, now, secondWindow);
        tester.finishBundle();
        assertThat(tester.peekOutputElementsInWindow(firstWindow), containsInAnyOrder(TimestampedValue.of(KV.of(1, firstWindow), now), TimestampedValue.of(KV.of(2, firstWindow), now)));
        assertThat(tester.peekOutputElementsInWindow(secondWindow), containsInAnyOrder(TimestampedValue.of(KV.of(3, secondWindow), now)));
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Instant(org.joda.time.Instant) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) KV(org.apache.beam.sdk.values.KV) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Aggregations

BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)54 Instant (org.joda.time.Instant)27 Test (org.junit.Test)26 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)21 KV (org.apache.beam.sdk.values.KV)20 WindowedValue (org.apache.beam.sdk.util.WindowedValue)14 ArrayList (java.util.ArrayList)7 TimerSpec (org.apache.beam.sdk.state.TimerSpec)7 Timer (org.apache.beam.sdk.state.Timer)6 Matchers.containsString (org.hamcrest.Matchers.containsString)6 DoFn (org.apache.beam.sdk.transforms.DoFn)5 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)5 ImmutableList (com.google.common.collect.ImmutableList)4 List (java.util.List)4 ValueState (org.apache.beam.sdk.state.ValueState)4 OnTimer (org.apache.beam.sdk.transforms.DoFn.OnTimer)4 TimestampCombiner (org.apache.beam.sdk.transforms.windowing.TimestampCombiner)4 PCollection (org.apache.beam.sdk.values.PCollection)4 TupleTag (org.apache.beam.sdk.values.TupleTag)4 Duration (org.joda.time.Duration)4