Search in sources :

Example 21 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testSetStateCoderInference.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesSetState.class })
public void testSetStateCoderInference() {
    final String stateId = "foo";
    final String countStateId = "count";
    Coder<MyInteger> myIntegerCoder = MyIntegerCoder.of();
    pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
    DoFn<KV<String, Integer>, Set<MyInteger>> fn = new DoFn<KV<String, Integer>, Set<MyInteger>>() {

        @StateId(stateId)
        private final StateSpec<SetState<MyInteger>> setState = StateSpecs.set();

        @StateId(countStateId)
        private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) SetState<MyInteger> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
            state.add(new MyInteger(c.element().getValue()));
            count.add(1);
            if (count.read() >= 4) {
                Set<MyInteger> set = Sets.newHashSet(state.read());
                c.output(set);
            }
        }
    };
    PCollection<Set<MyInteger>> output = pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))).apply(ParDo.of(fn)).setCoder(SetCoder.of(myIntegerCoder));
    PAssert.that(output).containsInAnyOrder(Sets.newHashSet(new MyInteger(97), new MyInteger(42), new MyInteger(12)));
    pipeline.run();
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) SetState(org.apache.beam.sdk.state.SetState) UsesSetState(org.apache.beam.sdk.testing.UsesSetState) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 22 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testBagState.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testBagState() {
    final String stateId = "foo";
    DoFn<KV<String, Integer>, List<Integer>> fn = new DoFn<KV<String, Integer>, List<Integer>>() {

        @StateId(stateId)
        private final StateSpec<BagState<Integer>> bufferState = StateSpecs.bag(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) BagState<Integer> state) {
            Iterable<Integer> currentValue = state.read();
            state.add(c.element().getValue());
            if (Iterables.size(state.read()) >= 4) {
                List<Integer> sorted = Lists.newArrayList(currentValue);
                Collections.sort(sorted);
                c.output(sorted);
            }
        }
    };
    PCollection<List<Integer>> output = pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 84), KV.of("hello", 12))).apply(ParDo.of(fn));
    PAssert.that(output).containsInAnyOrder(Lists.newArrayList(12, 42, 84, 97));
    pipeline.run();
}
Also used : StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ArrayList(java.util.ArrayList) List(java.util.List) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ImmutableList(com.google.common.collect.ImmutableList) BagState(org.apache.beam.sdk.state.BagState) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 23 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testValueStateCoderInferenceFailure.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateCoderInferenceFailure() throws Exception {
    final String stateId = "foo";
    MyIntegerCoder myIntegerCoder = MyIntegerCoder.of();
    DoFn<KV<String, Integer>, MyInteger> fn = new DoFn<KV<String, Integer>, MyInteger>() {

        @StateId(stateId)
        private final StateSpec<ValueState<MyInteger>> intState = StateSpecs.value();

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<MyInteger> state) {
            MyInteger currentValue = MoreObjects.firstNonNull(state.read(), new MyInteger(0));
            c.output(currentValue);
            state.write(new MyInteger(currentValue.getValue() + 1));
        }
    };
    thrown.expect(RuntimeException.class);
    thrown.expectMessage("Unable to infer a coder for ValueState and no Coder was specified.");
    pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))).apply(ParDo.of(fn)).setCoder(myIntegerCoder);
    pipeline.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 24 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testValueStateTaggedOutput.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateTaggedOutput() {
    final String stateId = "foo";
    final TupleTag<Integer> evenTag = new TupleTag<Integer>() {
    };
    final TupleTag<Integer> oddTag = new TupleTag<Integer>() {
    };
    DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
            Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
            if (currentValue % 2 == 0) {
                c.output(currentValue);
            } else {
                c.output(oddTag, currentValue);
            }
            state.write(currentValue + 1);
        }
    };
    PCollectionTuple output = pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84), KV.of("goodbye", 33), KV.of("hello", 859), KV.of("goodbye", 83945))).apply(ParDo.of(fn).withOutputTags(evenTag, TupleTagList.of(oddTag)));
    PCollection<Integer> evens = output.get(evenTag);
    PCollection<Integer> odds = output.get(oddTag);
    // There are 0 and 2 from "hello" and just 0 from "goodbye"
    PAssert.that(evens).containsInAnyOrder(0, 2, 0);
    // There are 1 and 3 from "hello" and just "1" from "goodbye"
    PAssert.that(odds).containsInAnyOrder(1, 3, 1);
    pipeline.run();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 25 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testMapState.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesMapState.class })
public void testMapState() {
    final String stateId = "foo";
    final String countStateId = "count";
    DoFn<KV<String, KV<String, Integer>>, KV<String, Integer>> fn = new DoFn<KV<String, KV<String, Integer>>, KV<String, Integer>>() {

        @StateId(stateId)
        private final StateSpec<MapState<String, Integer>> mapState = StateSpecs.map(StringUtf8Coder.of(), VarIntCoder.of());

        @StateId(countStateId)
        private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) MapState<String, Integer> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
            KV<String, Integer> value = c.element().getValue();
            state.put(value.getKey(), value.getValue());
            count.add(1);
            if (count.read() >= 4) {
                Iterable<Map.Entry<String, Integer>> iterate = state.entries().read();
                for (Map.Entry<String, Integer> entry : iterate) {
                    c.output(KV.of(entry.getKey(), entry.getValue()));
                }
            }
        }
    };
    PCollection<KV<String, Integer>> output = pipeline.apply(Create.of(KV.of("hello", KV.of("a", 97)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("c", 12)))).apply(ParDo.of(fn));
    PAssert.that(output).containsInAnyOrder(KV.of("a", 97), KV.of("b", 42), KV.of("c", 12));
    pipeline.run();
}
Also used : UsesMapState(org.apache.beam.sdk.testing.UsesMapState) MapState(org.apache.beam.sdk.state.MapState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) Map(java.util.Map) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

StateSpec (org.apache.beam.sdk.state.StateSpec)28 Test (org.junit.Test)26 KV (org.apache.beam.sdk.values.KV)25 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)23 Matchers.containsString (org.hamcrest.Matchers.containsString)23 Category (org.junit.experimental.categories.Category)23 ValueState (org.apache.beam.sdk.state.ValueState)12 CombiningState (org.apache.beam.sdk.state.CombiningState)9 ArrayList (java.util.ArrayList)7 ImmutableList (com.google.common.collect.ImmutableList)5 List (java.util.List)5 TupleTagList (org.apache.beam.sdk.values.TupleTagList)5 HashSet (java.util.HashSet)4 BagState (org.apache.beam.sdk.state.BagState)4 DoFn (org.apache.beam.sdk.transforms.DoFn)4 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)4 TupleTag (org.apache.beam.sdk.values.TupleTag)4 Instant (org.joda.time.Instant)4 Map (java.util.Map)3 Set (java.util.Set)3