Search in sources :

Example 26 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testValueStateDedup.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateDedup() {
    final String stateId = "foo";
    DoFn<KV<Integer, Integer>, Integer> onePerKey = new DoFn<KV<Integer, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> seenSpec = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> seenState) {
            Integer seen = MoreObjects.firstNonNull(seenState.read(), 0);
            if (seen == 0) {
                seenState.write(seen + 1);
                c.output(c.element().getValue());
            }
        }
    };
    int numKeys = 50;
    // A big enough list that we can see some deduping
    List<KV<Integer, Integer>> input = new ArrayList<>();
    // The output should have no dupes
    Set<Integer> expectedOutput = new HashSet<>();
    for (int key = 0; key < numKeys; ++key) {
        int output = 1000 + key;
        expectedOutput.add(output);
        for (int i = 0; i < 15; ++i) {
            input.add(KV.of(key, output));
        }
    }
    Collections.shuffle(input);
    PCollection<Integer> output = pipeline.apply(Create.of(input)).apply(ParDo.of(onePerKey));
    PAssert.that(output).containsInAnyOrder(expectedOutput);
    pipeline.run();
}
Also used : ArrayList(java.util.ArrayList) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) HashSet(java.util.HashSet) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 27 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testSetState.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesSetState.class })
public void testSetState() {
    final String stateId = "foo";
    final String countStateId = "count";
    DoFn<KV<String, Integer>, Set<Integer>> fn = new DoFn<KV<String, Integer>, Set<Integer>>() {

        @StateId(stateId)
        private final StateSpec<SetState<Integer>> setState = StateSpecs.set(VarIntCoder.of());

        @StateId(countStateId)
        private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) SetState<Integer> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
            state.add(c.element().getValue());
            count.add(1);
            if (count.read() >= 4) {
                Set<Integer> set = Sets.newHashSet(state.read());
                c.output(set);
            }
        }
    };
    PCollection<Set<Integer>> output = pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))).apply(ParDo.of(fn));
    PAssert.that(output).containsInAnyOrder(Sets.newHashSet(97, 42, 12));
    pipeline.run();
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) SetState(org.apache.beam.sdk.state.SetState) UsesSetState(org.apache.beam.sdk.testing.UsesSetState) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 28 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testCoderInferenceOfList.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCoderInferenceOfList() {
    final String stateId = "foo";
    MyIntegerCoder myIntegerCoder = MyIntegerCoder.of();
    pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
    DoFn<KV<String, Integer>, List<MyInteger>> fn = new DoFn<KV<String, Integer>, List<MyInteger>>() {

        @StateId(stateId)
        private final StateSpec<ValueState<List<MyInteger>>> intState = StateSpecs.value();

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<List<MyInteger>> state) {
            MyInteger myInteger = new MyInteger(c.element().getValue());
            List<MyInteger> currentValue = state.read();
            List<MyInteger> newValue = currentValue != null ? ImmutableList.<MyInteger>builder().addAll(currentValue).add(myInteger).build() : Collections.singletonList(myInteger);
            c.output(newValue);
            state.write(newValue);
        }
    };
    pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))).apply(ParDo.of(fn)).setCoder(ListCoder.of(myIntegerCoder));
    pipeline.run();
}
Also used : StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) ArrayList(java.util.ArrayList) List(java.util.List) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ImmutableList(com.google.common.collect.ImmutableList) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

StateSpec (org.apache.beam.sdk.state.StateSpec)28 Test (org.junit.Test)26 KV (org.apache.beam.sdk.values.KV)25 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)23 Matchers.containsString (org.hamcrest.Matchers.containsString)23 Category (org.junit.experimental.categories.Category)23 ValueState (org.apache.beam.sdk.state.ValueState)12 CombiningState (org.apache.beam.sdk.state.CombiningState)9 ArrayList (java.util.ArrayList)7 ImmutableList (com.google.common.collect.ImmutableList)5 List (java.util.List)5 TupleTagList (org.apache.beam.sdk.values.TupleTagList)5 HashSet (java.util.HashSet)4 BagState (org.apache.beam.sdk.state.BagState)4 DoFn (org.apache.beam.sdk.transforms.DoFn)4 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)4 TupleTag (org.apache.beam.sdk.values.TupleTag)4 Instant (org.joda.time.Instant)4 Map (java.util.Map)3 Set (java.util.Set)3