Search in sources :

Example 16 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class DoFnOperatorTest method testStateGCForStatefulFn.

@Test
public void testStateGCForStatefulFn() throws Exception {
    WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(new Duration(10))).withAllowedLateness(Duration.ZERO);
    final String timerId = "boo";
    final String stateId = "dazzle";
    final int offset = 5000;
    final int timerOutput = 4093;
    DoFn<KV<String, Integer>, KV<String, Integer>> fn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {

        @TimerId(timerId)
        private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @StateId(stateId)
        private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
            timer.set(window.maxTimestamp());
            state.write(context.element().getKey());
            context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
        }

        @OnTimer(timerId)
        public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
            context.output(KV.of(state.read(), timerOutput));
        }
    };
    WindowedValue.FullWindowedValueCoder<KV<String, Integer>> windowedValueCoder = WindowedValue.getFullCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), windowingStrategy.getWindowFn().windowCoder());
    TupleTag<KV<String, Integer>> outputTag = new TupleTag<>("main-output");
    DoFnOperator<KV<String, Integer>, KV<String, Integer>, WindowedValue<KV<String, Integer>>> doFnOperator = new DoFnOperator<>(fn, "stepName", windowedValueCoder, outputTag, Collections.<TupleTag<?>>emptyList(), new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<KV<String, Integer>>>(), windowingStrategy, new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
    Collections.<PCollectionView<?>>emptyList(), /* side inputs */
    PipelineOptionsFactory.as(FlinkPipelineOptions.class), StringUtf8Coder.of());
    KeyedOneInputStreamOperatorTestHarness<String, WindowedValue<KV<String, Integer>>, WindowedValue<KV<String, Integer>>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, new KeySelector<WindowedValue<KV<String, Integer>>, String>() {

        @Override
        public String getKey(WindowedValue<KV<String, Integer>> kvWindowedValue) throws Exception {
            return kvWindowedValue.getValue().getKey();
        }
    }, new CoderTypeInformation<>(StringUtf8Coder.of()));
    testHarness.open();
    testHarness.processWatermark(0);
    assertEquals(0, testHarness.numKeyedStateEntries());
    IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(KV.of("key1", 5), new Instant(1), window1, PaneInfo.NO_FIRING)));
    testHarness.processElement(new StreamRecord<>(WindowedValue.of(KV.of("key2", 7), new Instant(3), window1, PaneInfo.NO_FIRING)));
    assertThat(this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(KV.of("key1", 5 + offset), new Instant(1), window1, PaneInfo.NO_FIRING), WindowedValue.of(KV.of("key2", 7 + offset), new Instant(3), window1, PaneInfo.NO_FIRING)));
    assertEquals(2, testHarness.numKeyedStateEntries());
    testHarness.getOutput().clear();
    // this should trigger both the window.maxTimestamp() timer and the GC timer
    // this tests that the GC timer fires after the user timer
    testHarness.processWatermark(window1.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).getMillis());
    assertThat(this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(KV.of("key1", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING), WindowedValue.of(KV.of("key2", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING)));
    // ensure the state was garbage collected
    assertEquals(0, testHarness.numKeyedStateEntries());
    testHarness.close();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) DoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) StateSpec(org.apache.beam.sdk.state.StateSpec) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) TimerSpec(org.apache.beam.sdk.state.TimerSpec) Instant(org.joda.time.Instant) Duration(org.joda.time.Duration) KV(org.apache.beam.sdk.values.KV) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) Timer(org.apache.beam.sdk.state.Timer) Test(org.junit.Test)

Example 17 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testValueStateSameId.

/**
   * Tests that there is no state bleeding between adjacent stateful {@link ParDo} transforms,
   * which may (or may not) be executed in similar contexts after runner optimizations.
   */
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateSameId() {
    final String stateId = "foo";
    DoFn<KV<String, Integer>, KV<String, Integer>> fn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
            Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
            c.output(KV.of("sizzle", currentValue));
            state.write(currentValue + 1);
        }
    };
    DoFn<KV<String, Integer>, Integer> fn2 = new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
            Integer currentValue = MoreObjects.firstNonNull(state.read(), 13);
            c.output(currentValue);
            state.write(currentValue + 13);
        }
    };
    PCollection<KV<String, Integer>> intermediate = pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))).apply("First stateful ParDo", ParDo.of(fn));
    PCollection<Integer> output = intermediate.apply("Second stateful ParDo", ParDo.of(fn2));
    PAssert.that(intermediate).containsInAnyOrder(KV.of("sizzle", 0), KV.of("sizzle", 1), KV.of("sizzle", 2));
    PAssert.that(output).containsInAnyOrder(13, 26, 39);
    pipeline.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 18 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testValueStateSimple.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateSimple() {
    final String stateId = "foo";
    DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
            Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
            c.output(currentValue);
            state.write(currentValue + 1);
        }
    };
    PCollection<Integer> output = pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))).apply(ParDo.of(fn));
    PAssert.that(output).containsInAnyOrder(0, 1, 2);
    pipeline.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 19 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testValueStateCoderInferenceFromInputCoder.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateCoderInferenceFromInputCoder() {
    final String stateId = "foo";
    MyIntegerCoder myIntegerCoder = MyIntegerCoder.of();
    DoFn<KV<String, MyInteger>, MyInteger> fn = new DoFn<KV<String, MyInteger>, MyInteger>() {

        @StateId(stateId)
        private final StateSpec<ValueState<MyInteger>> intState = StateSpecs.value();

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<MyInteger> state) {
            MyInteger currentValue = MoreObjects.firstNonNull(state.read(), new MyInteger(0));
            c.output(currentValue);
            state.write(new MyInteger(currentValue.getValue() + 1));
        }
    };
    pipeline.apply(Create.of(KV.of("hello", new MyInteger(42)), KV.of("hello", new MyInteger(97)), KV.of("hello", new MyInteger(84))).withCoder(KvCoder.of(StringUtf8Coder.of(), myIntegerCoder))).apply(ParDo.of(fn)).setCoder(myIntegerCoder);
    pipeline.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 20 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDo method finishSpecifyingStateSpecs.

private static void finishSpecifyingStateSpecs(DoFn<?, ?> fn, CoderRegistry coderRegistry, Coder<?> inputCoder) {
    DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
    Map<String, DoFnSignature.StateDeclaration> stateDeclarations = signature.stateDeclarations();
    for (DoFnSignature.StateDeclaration stateDeclaration : stateDeclarations.values()) {
        try {
            StateSpec<?> stateSpec = (StateSpec<?>) stateDeclaration.field().get(fn);
            stateSpec.offerCoders(codersForStateSpecTypes(stateDeclaration, coderRegistry, inputCoder));
            stateSpec.finishSpecifying();
        } catch (IllegalAccessException e) {
            throw new RuntimeException(e);
        }
    }
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Aggregations

StateSpec (org.apache.beam.sdk.state.StateSpec)28 Test (org.junit.Test)26 KV (org.apache.beam.sdk.values.KV)25 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)23 Matchers.containsString (org.hamcrest.Matchers.containsString)23 Category (org.junit.experimental.categories.Category)23 ValueState (org.apache.beam.sdk.state.ValueState)12 CombiningState (org.apache.beam.sdk.state.CombiningState)9 ArrayList (java.util.ArrayList)7 ImmutableList (com.google.common.collect.ImmutableList)5 List (java.util.List)5 TupleTagList (org.apache.beam.sdk.values.TupleTagList)5 HashSet (java.util.HashSet)4 BagState (org.apache.beam.sdk.state.BagState)4 DoFn (org.apache.beam.sdk.transforms.DoFn)4 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)4 TupleTag (org.apache.beam.sdk.values.TupleTag)4 Instant (org.joda.time.Instant)4 Map (java.util.Map)3 Set (java.util.Set)3