Search in sources :

Example 16 with ValueState

use of org.apache.beam.sdk.state.ValueState in project beam by apache.

the class ParDoTest method testValueStateTaggedOutput.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateTaggedOutput() {
    final String stateId = "foo";
    final TupleTag<Integer> evenTag = new TupleTag<Integer>() {
    };
    final TupleTag<Integer> oddTag = new TupleTag<Integer>() {
    };
    DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
            Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
            if (currentValue % 2 == 0) {
                c.output(currentValue);
            } else {
                c.output(oddTag, currentValue);
            }
            state.write(currentValue + 1);
        }
    };
    PCollectionTuple output = pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84), KV.of("goodbye", 33), KV.of("hello", 859), KV.of("goodbye", 83945))).apply(ParDo.of(fn).withOutputTags(evenTag, TupleTagList.of(oddTag)));
    PCollection<Integer> evens = output.get(evenTag);
    PCollection<Integer> odds = output.get(oddTag);
    // There are 0 and 2 from "hello" and just 0 from "goodbye"
    PAssert.that(evens).containsInAnyOrder(0, 2, 0);
    // There are 1 and 3 from "hello" and just "1" from "goodbye"
    PAssert.that(odds).containsInAnyOrder(1, 3, 1);
    pipeline.run();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 17 with ValueState

use of org.apache.beam.sdk.state.ValueState in project beam by apache.

the class ParDoTest method testValueStateDedup.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateDedup() {
    final String stateId = "foo";
    DoFn<KV<Integer, Integer>, Integer> onePerKey = new DoFn<KV<Integer, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> seenSpec = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> seenState) {
            Integer seen = MoreObjects.firstNonNull(seenState.read(), 0);
            if (seen == 0) {
                seenState.write(seen + 1);
                c.output(c.element().getValue());
            }
        }
    };
    int numKeys = 50;
    // A big enough list that we can see some deduping
    List<KV<Integer, Integer>> input = new ArrayList<>();
    // The output should have no dupes
    Set<Integer> expectedOutput = new HashSet<>();
    for (int key = 0; key < numKeys; ++key) {
        int output = 1000 + key;
        expectedOutput.add(output);
        for (int i = 0; i < 15; ++i) {
            input.add(KV.of(key, output));
        }
    }
    Collections.shuffle(input);
    PCollection<Integer> output = pipeline.apply(Create.of(input)).apply(ParDo.of(onePerKey));
    PAssert.that(output).containsInAnyOrder(expectedOutput);
    pipeline.run();
}
Also used : ArrayList(java.util.ArrayList) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) HashSet(java.util.HashSet) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 18 with ValueState

use of org.apache.beam.sdk.state.ValueState in project beam by apache.

the class ParDoTest method testCoderInferenceOfList.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCoderInferenceOfList() {
    final String stateId = "foo";
    MyIntegerCoder myIntegerCoder = MyIntegerCoder.of();
    pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
    DoFn<KV<String, Integer>, List<MyInteger>> fn = new DoFn<KV<String, Integer>, List<MyInteger>>() {

        @StateId(stateId)
        private final StateSpec<ValueState<List<MyInteger>>> intState = StateSpecs.value();

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<List<MyInteger>> state) {
            MyInteger myInteger = new MyInteger(c.element().getValue());
            List<MyInteger> currentValue = state.read();
            List<MyInteger> newValue = currentValue != null ? ImmutableList.<MyInteger>builder().addAll(currentValue).add(myInteger).build() : Collections.singletonList(myInteger);
            c.output(newValue);
            state.write(newValue);
        }
    };
    pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))).apply(ParDo.of(fn)).setCoder(ListCoder.of(myIntegerCoder));
    pipeline.run();
}
Also used : StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) ArrayList(java.util.ArrayList) List(java.util.List) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ImmutableList(com.google.common.collect.ImmutableList) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

ValueState (org.apache.beam.sdk.state.ValueState)18 Test (org.junit.Test)17 KV (org.apache.beam.sdk.values.KV)15 StateSpec (org.apache.beam.sdk.state.StateSpec)12 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)10 Matchers.containsString (org.hamcrest.Matchers.containsString)10 Category (org.junit.experimental.categories.Category)10 ArrayList (java.util.ArrayList)5 TupleTag (org.apache.beam.sdk.values.TupleTag)5 Instant (org.joda.time.Instant)5 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)4 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)4 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)3 ImmutableList (com.google.common.collect.ImmutableList)2 List (java.util.List)2 KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)2 StateNamespace (org.apache.beam.runners.core.StateNamespace)2 StatefulParDo (org.apache.beam.runners.direct.ParDoMultiOverrideFactory.StatefulParDo)2 Coder (org.apache.beam.sdk.coders.Coder)2 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)2