Search in sources :

Example 6 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testCombiningStateCoderInference.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCombiningStateCoderInference() {
    pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, MyIntegerCoder.of());
    final String stateId = "foo";
    DoFn<KV<String, Integer>, String> fn = new DoFn<KV<String, Integer>, String>() {

        private static final int EXPECTED_SUM = 16;

        @StateId(stateId)
        private final StateSpec<CombiningState<Integer, MyInteger, Integer>> combiningState = StateSpecs.combining(new Combine.CombineFn<Integer, MyInteger, Integer>() {

            @Override
            public MyInteger createAccumulator() {
                return new MyInteger(0);
            }

            @Override
            public MyInteger addInput(MyInteger accumulator, Integer input) {
                return new MyInteger(accumulator.getValue() + input);
            }

            @Override
            public MyInteger mergeAccumulators(Iterable<MyInteger> accumulators) {
                int newValue = 0;
                for (MyInteger myInteger : accumulators) {
                    newValue += myInteger.getValue();
                }
                return new MyInteger(newValue);
            }

            @Override
            public Integer extractOutput(MyInteger accumulator) {
                return accumulator.getValue();
            }
        });

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) CombiningState<Integer, MyInteger, Integer> state) {
            state.add(c.element().getValue());
            Integer currentValue = state.read();
            if (currentValue == EXPECTED_SUM) {
                c.output("right on");
            }
        }
    };
    PCollection<String> output = pipeline.apply(Create.of(KV.of("hello", 3), KV.of("hello", 6), KV.of("hello", 7))).apply(ParDo.of(fn));
    // There should only be one moment at which the average is exactly 16
    PAssert.that(output).containsInAnyOrder("right on");
    pipeline.run();
}
Also used : StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 7 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testValueStateCoderInference.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateCoderInference() {
    final String stateId = "foo";
    MyIntegerCoder myIntegerCoder = MyIntegerCoder.of();
    pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
    DoFn<KV<String, Integer>, MyInteger> fn = new DoFn<KV<String, Integer>, MyInteger>() {

        @StateId(stateId)
        private final StateSpec<ValueState<MyInteger>> intState = StateSpecs.value();

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<MyInteger> state) {
            MyInteger currentValue = MoreObjects.firstNonNull(state.read(), new MyInteger(0));
            c.output(currentValue);
            state.write(new MyInteger(currentValue.getValue() + 1));
        }
    };
    PCollection<MyInteger> output = pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))).apply(ParDo.of(fn)).setCoder(myIntegerCoder);
    PAssert.that(output).containsInAnyOrder(new MyInteger(0), new MyInteger(1), new MyInteger(2));
    pipeline.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 8 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testCombiningStateCoderInferenceFailure.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCombiningStateCoderInferenceFailure() throws Exception {
    final String stateId = "foo";
    DoFn<KV<String, Integer>, String> fn = new DoFn<KV<String, Integer>, String>() {

        private static final int EXPECTED_SUM = 16;

        @StateId(stateId)
        private final StateSpec<CombiningState<Integer, MyInteger, Integer>> combiningState = StateSpecs.combining(new Combine.CombineFn<Integer, MyInteger, Integer>() {

            @Override
            public MyInteger createAccumulator() {
                return new MyInteger(0);
            }

            @Override
            public MyInteger addInput(MyInteger accumulator, Integer input) {
                return new MyInteger(accumulator.getValue() + input);
            }

            @Override
            public MyInteger mergeAccumulators(Iterable<MyInteger> accumulators) {
                int newValue = 0;
                for (MyInteger myInteger : accumulators) {
                    newValue += myInteger.getValue();
                }
                return new MyInteger(newValue);
            }

            @Override
            public Integer extractOutput(MyInteger accumulator) {
                return accumulator.getValue();
            }
        });

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) CombiningState<Integer, MyInteger, Integer> state) {
            state.add(c.element().getValue());
            Integer currentValue = state.read();
            if (currentValue == EXPECTED_SUM) {
                c.output("right on");
            }
        }
    };
    thrown.expect(RuntimeException.class);
    thrown.expectMessage("Unable to infer a coder for CombiningState and no Coder was specified.");
    pipeline.apply(Create.of(KV.of("hello", 3), KV.of("hello", 6), KV.of("hello", 7))).apply(ParDo.of(fn));
    pipeline.run();
}
Also used : StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 9 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testEventTimeTimerMultipleKeys.

/**
   * Tests that event time timers for multiple keys both fire. This particularly exercises
   * implementations that may GC in ways not simply governed by the watermark.
   */
@Test
@Category({ ValidatesRunner.class, UsesTimersInParDo.class })
public void testEventTimeTimerMultipleKeys() throws Exception {
    final String timerId = "foo";
    final String stateId = "sizzle";
    final int offset = 5000;
    final int timerOutput = 4093;
    DoFn<KV<String, Integer>, KV<String, Integer>> fn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {

        @TimerId(timerId)
        private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @StateId(stateId)
        private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
            timer.set(window.maxTimestamp());
            state.write(context.element().getKey());
            context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
        }

        @OnTimer(timerId)
        public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
            context.output(KV.of(state.read(), timerOutput));
        }
    };
    // Enough keys that we exercise interesting code paths
    int numKeys = 50;
    List<KV<String, Integer>> input = new ArrayList<>();
    List<KV<String, Integer>> expectedOutput = new ArrayList<>();
    for (Integer key = 0; key < numKeys; ++key) {
        // Each key should have just one final output at GC time
        expectedOutput.add(KV.of(key.toString(), timerOutput));
        for (int i = 0; i < 15; ++i) {
            // Each input should be output with the offset added
            input.add(KV.of(key.toString(), i));
            expectedOutput.add(KV.of(key.toString(), i + offset));
        }
    }
    Collections.shuffle(input);
    PCollection<KV<String, Integer>> output = pipeline.apply(Create.of(input)).apply(ParDo.of(fn));
    PAssert.that(output).containsInAnyOrder(expectedOutput);
    pipeline.run();
}
Also used : ArrayList(java.util.ArrayList) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) OnTimer(org.apache.beam.sdk.transforms.DoFn.OnTimer) Timer(org.apache.beam.sdk.state.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TimerSpec(org.apache.beam.sdk.state.TimerSpec) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 10 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testMapStateCoderInferenceFailure.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesMapState.class })
public void testMapStateCoderInferenceFailure() throws Exception {
    final String stateId = "foo";
    final String countStateId = "count";
    Coder<MyInteger> myIntegerCoder = MyIntegerCoder.of();
    DoFn<KV<String, KV<String, Integer>>, KV<String, MyInteger>> fn = new DoFn<KV<String, KV<String, Integer>>, KV<String, MyInteger>>() {

        @StateId(stateId)
        private final StateSpec<MapState<String, MyInteger>> mapState = StateSpecs.map();

        @StateId(countStateId)
        private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) MapState<String, MyInteger> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
            KV<String, Integer> value = c.element().getValue();
            state.put(value.getKey(), new MyInteger(value.getValue()));
            count.add(1);
            if (count.read() >= 4) {
                Iterable<Map.Entry<String, MyInteger>> iterate = state.entries().read();
                for (Map.Entry<String, MyInteger> entry : iterate) {
                    c.output(KV.of(entry.getKey(), entry.getValue()));
                }
            }
        }
    };
    thrown.expect(RuntimeException.class);
    thrown.expectMessage("Unable to infer a coder for MapState and no Coder was specified.");
    pipeline.apply(Create.of(KV.of("hello", KV.of("a", 97)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("c", 12)))).apply(ParDo.of(fn)).setCoder(KvCoder.of(StringUtf8Coder.of(), myIntegerCoder));
    pipeline.run();
}
Also used : UsesMapState(org.apache.beam.sdk.testing.UsesMapState) MapState(org.apache.beam.sdk.state.MapState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) Map(java.util.Map) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

StateSpec (org.apache.beam.sdk.state.StateSpec)28 Test (org.junit.Test)26 KV (org.apache.beam.sdk.values.KV)25 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)23 Matchers.containsString (org.hamcrest.Matchers.containsString)23 Category (org.junit.experimental.categories.Category)23 ValueState (org.apache.beam.sdk.state.ValueState)12 CombiningState (org.apache.beam.sdk.state.CombiningState)9 ArrayList (java.util.ArrayList)7 ImmutableList (com.google.common.collect.ImmutableList)5 List (java.util.List)5 TupleTagList (org.apache.beam.sdk.values.TupleTagList)5 HashSet (java.util.HashSet)4 BagState (org.apache.beam.sdk.state.BagState)4 DoFn (org.apache.beam.sdk.transforms.DoFn)4 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)4 TupleTag (org.apache.beam.sdk.values.TupleTag)4 Instant (org.joda.time.Instant)4 Map (java.util.Map)3 Set (java.util.Set)3