Search in sources :

Example 6 with CombiningState

use of org.apache.beam.sdk.state.CombiningState in project beam by apache.

the class ParDoTest method testCombiningState.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCombiningState() {
    final String stateId = "foo";
    DoFn<KV<String, Double>, String> fn = new DoFn<KV<String, Double>, String>() {

        private static final double EPSILON = 0.0001;

        @StateId(stateId)
        private final StateSpec<CombiningState<Double, CountSum<Double>, Double>> combiningState = StateSpecs.combining(new Mean.CountSumCoder<Double>(), Mean.<Double>of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) CombiningState<Double, CountSum<Double>, Double> state) {
            state.add(c.element().getValue());
            Double currentValue = state.read();
            if (Math.abs(currentValue - 0.5) < EPSILON) {
                c.output("right on");
            }
        }
    };
    PCollection<String> output = pipeline.apply(Create.of(KV.of("hello", 0.3), KV.of("hello", 0.6), KV.of("hello", 0.6))).apply(ParDo.of(fn));
    // There should only be one moment at which the average is exactly 0.5
    PAssert.that(output).containsInAnyOrder("right on");
    pipeline.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) CountSum(org.apache.beam.sdk.transforms.Mean.CountSum) CombiningState(org.apache.beam.sdk.state.CombiningState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 7 with CombiningState

use of org.apache.beam.sdk.state.CombiningState in project beam by apache.

the class CopyOnAccessInMemoryStateInternalsTest method testAccumulatorCombiningStateWithUnderlying.

@Test
public void testAccumulatorCombiningStateWithUnderlying() throws CannotProvideCoderException {
    CopyOnAccessInMemoryStateInternals<String> underlying = CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
    CombineFn<Long, long[], Long> sumLongFn = Sum.ofLongs();
    StateNamespace namespace = new StateNamespaceForTest("foo");
    CoderRegistry reg = pipeline.getCoderRegistry();
    StateTag<CombiningState<Long, long[], Long>> stateTag = StateTags.combiningValue("summer", sumLongFn.getAccumulatorCoder(reg, reg.getCoder(Long.class)), sumLongFn);
    GroupingState<Long, Long> underlyingValue = underlying.state(namespace, stateTag);
    assertThat(underlyingValue.read(), equalTo(0L));
    underlyingValue.add(1L);
    assertThat(underlyingValue.read(), equalTo(1L));
    CopyOnAccessInMemoryStateInternals<String> internals = CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
    GroupingState<Long, Long> copyOnAccessState = internals.state(namespace, stateTag);
    assertThat(copyOnAccessState.read(), equalTo(1L));
    copyOnAccessState.add(4L);
    assertThat(copyOnAccessState.read(), equalTo(5L));
    assertThat(underlyingValue.read(), equalTo(1L));
    GroupingState<Long, Long> reReadUnderlyingValue = underlying.state(namespace, stateTag);
    assertThat(underlyingValue.read(), equalTo(reReadUnderlyingValue.read()));
}
Also used : CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) CombiningState(org.apache.beam.sdk.state.CombiningState) StateNamespaceForTest(org.apache.beam.runners.core.StateNamespaceForTest) StateNamespace(org.apache.beam.runners.core.StateNamespace) StateNamespaceForTest(org.apache.beam.runners.core.StateNamespaceForTest) Test(org.junit.Test)

Example 8 with CombiningState

use of org.apache.beam.sdk.state.CombiningState in project beam by apache.

the class ParDoTest method testSetStateCoderInference.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesSetState.class })
public void testSetStateCoderInference() {
    final String stateId = "foo";
    final String countStateId = "count";
    Coder<MyInteger> myIntegerCoder = MyIntegerCoder.of();
    pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
    DoFn<KV<String, Integer>, Set<MyInteger>> fn = new DoFn<KV<String, Integer>, Set<MyInteger>>() {

        @StateId(stateId)
        private final StateSpec<SetState<MyInteger>> setState = StateSpecs.set();

        @StateId(countStateId)
        private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) SetState<MyInteger> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
            state.add(new MyInteger(c.element().getValue()));
            count.add(1);
            if (count.read() >= 4) {
                Set<MyInteger> set = Sets.newHashSet(state.read());
                c.output(set);
            }
        }
    };
    PCollection<Set<MyInteger>> output = pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))).apply(ParDo.of(fn)).setCoder(SetCoder.of(myIntegerCoder));
    PAssert.that(output).containsInAnyOrder(Sets.newHashSet(new MyInteger(97), new MyInteger(42), new MyInteger(12)));
    pipeline.run();
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) SetState(org.apache.beam.sdk.state.SetState) UsesSetState(org.apache.beam.sdk.testing.UsesSetState) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 9 with CombiningState

use of org.apache.beam.sdk.state.CombiningState in project beam by apache.

the class ParDoTest method testMapState.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesMapState.class })
public void testMapState() {
    final String stateId = "foo";
    final String countStateId = "count";
    DoFn<KV<String, KV<String, Integer>>, KV<String, Integer>> fn = new DoFn<KV<String, KV<String, Integer>>, KV<String, Integer>>() {

        @StateId(stateId)
        private final StateSpec<MapState<String, Integer>> mapState = StateSpecs.map(StringUtf8Coder.of(), VarIntCoder.of());

        @StateId(countStateId)
        private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) MapState<String, Integer> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
            KV<String, Integer> value = c.element().getValue();
            state.put(value.getKey(), value.getValue());
            count.add(1);
            if (count.read() >= 4) {
                Iterable<Map.Entry<String, Integer>> iterate = state.entries().read();
                for (Map.Entry<String, Integer> entry : iterate) {
                    c.output(KV.of(entry.getKey(), entry.getValue()));
                }
            }
        }
    };
    PCollection<KV<String, Integer>> output = pipeline.apply(Create.of(KV.of("hello", KV.of("a", 97)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("c", 12)))).apply(ParDo.of(fn));
    PAssert.that(output).containsInAnyOrder(KV.of("a", 97), KV.of("b", 42), KV.of("c", 12));
    pipeline.run();
}
Also used : UsesMapState(org.apache.beam.sdk.testing.UsesMapState) MapState(org.apache.beam.sdk.state.MapState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) Map(java.util.Map) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 10 with CombiningState

use of org.apache.beam.sdk.state.CombiningState in project beam by apache.

the class ParDoTest method testSetState.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesSetState.class })
public void testSetState() {
    final String stateId = "foo";
    final String countStateId = "count";
    DoFn<KV<String, Integer>, Set<Integer>> fn = new DoFn<KV<String, Integer>, Set<Integer>>() {

        @StateId(stateId)
        private final StateSpec<SetState<Integer>> setState = StateSpecs.set(VarIntCoder.of());

        @StateId(countStateId)
        private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) SetState<Integer> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
            state.add(c.element().getValue());
            count.add(1);
            if (count.read() >= 4) {
                Set<Integer> set = Sets.newHashSet(state.read());
                c.output(set);
            }
        }
    };
    PCollection<Set<Integer>> output = pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))).apply(ParDo.of(fn));
    PAssert.that(output).containsInAnyOrder(Sets.newHashSet(97, 42, 12));
    pipeline.run();
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) SetState(org.apache.beam.sdk.state.SetState) UsesSetState(org.apache.beam.sdk.testing.UsesSetState) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

CombiningState (org.apache.beam.sdk.state.CombiningState)10 Test (org.junit.Test)10 StateSpec (org.apache.beam.sdk.state.StateSpec)9 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)9 KV (org.apache.beam.sdk.values.KV)9 Matchers.containsString (org.hamcrest.Matchers.containsString)9 Category (org.junit.experimental.categories.Category)9 HashSet (java.util.HashSet)3 Map (java.util.Map)3 Set (java.util.Set)3 MapState (org.apache.beam.sdk.state.MapState)3 SetState (org.apache.beam.sdk.state.SetState)3 UsesMapState (org.apache.beam.sdk.testing.UsesMapState)3 UsesSetState (org.apache.beam.sdk.testing.UsesSetState)3 StateNamespace (org.apache.beam.runners.core.StateNamespace)1 StateNamespaceForTest (org.apache.beam.runners.core.StateNamespaceForTest)1 CoderRegistry (org.apache.beam.sdk.coders.CoderRegistry)1 CountSum (org.apache.beam.sdk.transforms.Mean.CountSum)1