Search in sources :

Example 1 with ValueState

use of org.apache.beam.sdk.state.ValueState in project beam by apache.

the class StatefulDoFnRunnerTest method testGarbageCollect.

@Test
public void testGarbageCollect() throws Exception {
    timerInternals.advanceInputWatermark(new Instant(1L));
    MyDoFn fn = new MyDoFn();
    StateTag<ValueState<Integer>> stateTag = StateTags.tagForSpec(fn.stateId, fn.intState);
    DoFnRunner<KV<String, Integer>, Integer> runner = DoFnRunners.defaultStatefulDoFnRunner(fn, getDoFnRunner(fn), WINDOWING_STRATEGY, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, WINDOWING_STRATEGY), new StatefulDoFnRunner.StateInternalsStateCleaner<>(fn, stateInternals, (Coder) WINDOWING_STRATEGY.getWindowFn().windowCoder()));
    Instant elementTime = new Instant(1);
    // first element, key is hello, WINDOW_1
    runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime, WINDOW_1, PaneInfo.NO_FIRING));
    assertEquals(1, (int) stateInternals.state(windowNamespace(WINDOW_1), stateTag).read());
    // second element, key is hello, WINDOW_2
    runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime.plus(WINDOW_SIZE), WINDOW_2, PaneInfo.NO_FIRING));
    runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime.plus(WINDOW_SIZE), WINDOW_2, PaneInfo.NO_FIRING));
    assertEquals(2, (int) stateInternals.state(windowNamespace(WINDOW_2), stateTag).read());
    // advance watermark past end of WINDOW_1 + allowed lateness
    // the cleanup timer is set to window.maxTimestamp() + allowed lateness + 1
    // to ensure that state is still available when a user timer for window.maxTimestamp() fires
    advanceInputWatermark(timerInternals, WINDOW_1.maxTimestamp().plus(ALLOWED_LATENESS).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).plus(// so the watermark is past the GC horizon, not on it
    1), runner);
    assertTrue(stateInternals.isEmptyForTesting(stateInternals.state(windowNamespace(WINDOW_1), stateTag)));
    assertEquals(2, (int) stateInternals.state(windowNamespace(WINDOW_2), stateTag).read());
    // advance watermark past end of WINDOW_2 + allowed lateness
    advanceInputWatermark(timerInternals, WINDOW_2.maxTimestamp().plus(ALLOWED_LATENESS).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).plus(// so the watermark is past the GC horizon, not on it
    1), runner);
    assertTrue(stateInternals.isEmptyForTesting(stateInternals.state(windowNamespace(WINDOW_2), stateTag)));
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) ValueState(org.apache.beam.sdk.state.ValueState) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Test(org.junit.Test)

Example 2 with ValueState

use of org.apache.beam.sdk.state.ValueState in project beam by apache.

the class ParDoTest method testValueStateFixedWindows.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateFixedWindows() {
    final String stateId = "foo";
    DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
            Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
            c.output(currentValue);
            state.write(currentValue + 1);
        }
    };
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(20));
    PCollection<Integer> output = pipeline.apply(Create.timestamped(// first window
    TimestampedValue.of(KV.of("hello", 7), new Instant(1)), TimestampedValue.of(KV.of("hello", 14), new Instant(2)), TimestampedValue.of(KV.of("hello", 21), new Instant(3)), // second window
    TimestampedValue.of(KV.of("hello", 28), new Instant(11)), TimestampedValue.of(KV.of("hello", 35), new Instant(13)))).apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))).apply("Stateful ParDo", ParDo.of(fn));
    PAssert.that(output).inWindow(firstWindow).containsInAnyOrder(0, 1, 2);
    PAssert.that(output).inWindow(secondWindow).containsInAnyOrder(0, 1);
    pipeline.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) Instant(org.joda.time.Instant) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 3 with ValueState

use of org.apache.beam.sdk.state.ValueState in project beam by apache.

the class ParDoTest method testValueStateCoderInference.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateCoderInference() {
    final String stateId = "foo";
    MyIntegerCoder myIntegerCoder = MyIntegerCoder.of();
    pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
    DoFn<KV<String, Integer>, MyInteger> fn = new DoFn<KV<String, Integer>, MyInteger>() {

        @StateId(stateId)
        private final StateSpec<ValueState<MyInteger>> intState = StateSpecs.value();

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<MyInteger> state) {
            MyInteger currentValue = MoreObjects.firstNonNull(state.read(), new MyInteger(0));
            c.output(currentValue);
            state.write(new MyInteger(currentValue.getValue() + 1));
        }
    };
    PCollection<MyInteger> output = pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))).apply(ParDo.of(fn)).setCoder(myIntegerCoder);
    PAssert.that(output).containsInAnyOrder(new MyInteger(0), new MyInteger(1), new MyInteger(2));
    pipeline.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 4 with ValueState

use of org.apache.beam.sdk.state.ValueState in project beam by apache.

the class ParDoTest method testEventTimeTimerMultipleKeys.

/**
   * Tests that event time timers for multiple keys both fire. This particularly exercises
   * implementations that may GC in ways not simply governed by the watermark.
   */
@Test
@Category({ ValidatesRunner.class, UsesTimersInParDo.class })
public void testEventTimeTimerMultipleKeys() throws Exception {
    final String timerId = "foo";
    final String stateId = "sizzle";
    final int offset = 5000;
    final int timerOutput = 4093;
    DoFn<KV<String, Integer>, KV<String, Integer>> fn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {

        @TimerId(timerId)
        private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @StateId(stateId)
        private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
            timer.set(window.maxTimestamp());
            state.write(context.element().getKey());
            context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
        }

        @OnTimer(timerId)
        public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
            context.output(KV.of(state.read(), timerOutput));
        }
    };
    // Enough keys that we exercise interesting code paths
    int numKeys = 50;
    List<KV<String, Integer>> input = new ArrayList<>();
    List<KV<String, Integer>> expectedOutput = new ArrayList<>();
    for (Integer key = 0; key < numKeys; ++key) {
        // Each key should have just one final output at GC time
        expectedOutput.add(KV.of(key.toString(), timerOutput));
        for (int i = 0; i < 15; ++i) {
            // Each input should be output with the offset added
            input.add(KV.of(key.toString(), i));
            expectedOutput.add(KV.of(key.toString(), i + offset));
        }
    }
    Collections.shuffle(input);
    PCollection<KV<String, Integer>> output = pipeline.apply(Create.of(input)).apply(ParDo.of(fn));
    PAssert.that(output).containsInAnyOrder(expectedOutput);
    pipeline.run();
}
Also used : ArrayList(java.util.ArrayList) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) OnTimer(org.apache.beam.sdk.transforms.DoFn.OnTimer) Timer(org.apache.beam.sdk.state.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TimerSpec(org.apache.beam.sdk.state.TimerSpec) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 5 with ValueState

use of org.apache.beam.sdk.state.ValueState in project beam by apache.

the class SideInputHandler method addSideInputValue.

/**
   * Add the given value to the internal side-input store of the given side input. This
   * might change the result of {@link #isReady(PCollectionView, BoundedWindow)} for that side
   * input.
   */
public void addSideInputValue(PCollectionView<?> sideInput, WindowedValue<Iterable<?>> value) {
    @SuppressWarnings("unchecked") Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) sideInput.getWindowingStrategyInternal().getWindowFn().windowCoder();
    // reify the WindowedValue
    List<WindowedValue<?>> inputWithReifiedWindows = new ArrayList<>();
    for (Object e : value.getValue()) {
        inputWithReifiedWindows.add(value.withValue(e));
    }
    StateTag<ValueState<Iterable<WindowedValue<?>>>> stateTag = sideInputContentsTags.get(sideInput);
    for (BoundedWindow window : value.getWindows()) {
        stateInternals.state(StateNamespaces.window(windowCoder, window), stateTag).write(inputWithReifiedWindows);
        stateInternals.state(StateNamespaces.global(), availableWindowsTags.get(sideInput)).add(window);
    }
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) SetCoder(org.apache.beam.sdk.coders.SetCoder) ValueState(org.apache.beam.sdk.state.ValueState) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ArrayList(java.util.ArrayList) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow)

Aggregations

ValueState (org.apache.beam.sdk.state.ValueState)18 Test (org.junit.Test)17 KV (org.apache.beam.sdk.values.KV)15 StateSpec (org.apache.beam.sdk.state.StateSpec)12 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)10 Matchers.containsString (org.hamcrest.Matchers.containsString)10 Category (org.junit.experimental.categories.Category)10 ArrayList (java.util.ArrayList)5 TupleTag (org.apache.beam.sdk.values.TupleTag)5 Instant (org.joda.time.Instant)5 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)4 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)4 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)3 ImmutableList (com.google.common.collect.ImmutableList)2 List (java.util.List)2 KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)2 StateNamespace (org.apache.beam.runners.core.StateNamespace)2 StatefulParDo (org.apache.beam.runners.direct.ParDoMultiOverrideFactory.StatefulParDo)2 Coder (org.apache.beam.sdk.coders.Coder)2 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)2