Search in sources :

Example 1 with MapState

use of org.apache.beam.sdk.state.MapState in project beam by apache.

the class ParDoTest method testMapStateCoderInference.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesMapState.class })
public void testMapStateCoderInference() {
    final String stateId = "foo";
    final String countStateId = "count";
    Coder<MyInteger> myIntegerCoder = MyIntegerCoder.of();
    pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
    DoFn<KV<String, KV<String, Integer>>, KV<String, MyInteger>> fn = new DoFn<KV<String, KV<String, Integer>>, KV<String, MyInteger>>() {

        @StateId(stateId)
        private final StateSpec<MapState<String, MyInteger>> mapState = StateSpecs.map();

        @StateId(countStateId)
        private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) MapState<String, MyInteger> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
            KV<String, Integer> value = c.element().getValue();
            state.put(value.getKey(), new MyInteger(value.getValue()));
            count.add(1);
            if (count.read() >= 4) {
                Iterable<Map.Entry<String, MyInteger>> iterate = state.entries().read();
                for (Map.Entry<String, MyInteger> entry : iterate) {
                    c.output(KV.of(entry.getKey(), entry.getValue()));
                }
            }
        }
    };
    PCollection<KV<String, MyInteger>> output = pipeline.apply(Create.of(KV.of("hello", KV.of("a", 97)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("c", 12)))).apply(ParDo.of(fn)).setCoder(KvCoder.of(StringUtf8Coder.of(), myIntegerCoder));
    PAssert.that(output).containsInAnyOrder(KV.of("a", new MyInteger(97)), KV.of("b", new MyInteger(42)), KV.of("c", new MyInteger(12)));
    pipeline.run();
}
Also used : UsesMapState(org.apache.beam.sdk.testing.UsesMapState) MapState(org.apache.beam.sdk.state.MapState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) Map(java.util.Map) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 2 with MapState

use of org.apache.beam.sdk.state.MapState in project beam by apache.

the class ParDoTest method testMapStateCoderInferenceFailure.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesMapState.class })
public void testMapStateCoderInferenceFailure() throws Exception {
    final String stateId = "foo";
    final String countStateId = "count";
    Coder<MyInteger> myIntegerCoder = MyIntegerCoder.of();
    DoFn<KV<String, KV<String, Integer>>, KV<String, MyInteger>> fn = new DoFn<KV<String, KV<String, Integer>>, KV<String, MyInteger>>() {

        @StateId(stateId)
        private final StateSpec<MapState<String, MyInteger>> mapState = StateSpecs.map();

        @StateId(countStateId)
        private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) MapState<String, MyInteger> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
            KV<String, Integer> value = c.element().getValue();
            state.put(value.getKey(), new MyInteger(value.getValue()));
            count.add(1);
            if (count.read() >= 4) {
                Iterable<Map.Entry<String, MyInteger>> iterate = state.entries().read();
                for (Map.Entry<String, MyInteger> entry : iterate) {
                    c.output(KV.of(entry.getKey(), entry.getValue()));
                }
            }
        }
    };
    thrown.expect(RuntimeException.class);
    thrown.expectMessage("Unable to infer a coder for MapState and no Coder was specified.");
    pipeline.apply(Create.of(KV.of("hello", KV.of("a", 97)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("c", 12)))).apply(ParDo.of(fn)).setCoder(KvCoder.of(StringUtf8Coder.of(), myIntegerCoder));
    pipeline.run();
}
Also used : UsesMapState(org.apache.beam.sdk.testing.UsesMapState) MapState(org.apache.beam.sdk.state.MapState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) Map(java.util.Map) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 3 with MapState

use of org.apache.beam.sdk.state.MapState in project beam by apache.

the class ParDoTest method testMapState.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesMapState.class })
public void testMapState() {
    final String stateId = "foo";
    final String countStateId = "count";
    DoFn<KV<String, KV<String, Integer>>, KV<String, Integer>> fn = new DoFn<KV<String, KV<String, Integer>>, KV<String, Integer>>() {

        @StateId(stateId)
        private final StateSpec<MapState<String, Integer>> mapState = StateSpecs.map(StringUtf8Coder.of(), VarIntCoder.of());

        @StateId(countStateId)
        private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) MapState<String, Integer> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
            KV<String, Integer> value = c.element().getValue();
            state.put(value.getKey(), value.getValue());
            count.add(1);
            if (count.read() >= 4) {
                Iterable<Map.Entry<String, Integer>> iterate = state.entries().read();
                for (Map.Entry<String, Integer> entry : iterate) {
                    c.output(KV.of(entry.getKey(), entry.getValue()));
                }
            }
        }
    };
    PCollection<KV<String, Integer>> output = pipeline.apply(Create.of(KV.of("hello", KV.of("a", 97)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("c", 12)))).apply(ParDo.of(fn));
    PAssert.that(output).containsInAnyOrder(KV.of("a", 97), KV.of("b", 42), KV.of("c", 12));
    pipeline.run();
}
Also used : UsesMapState(org.apache.beam.sdk.testing.UsesMapState) MapState(org.apache.beam.sdk.state.MapState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) Map(java.util.Map) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 4 with MapState

use of org.apache.beam.sdk.state.MapState in project beam by apache.

the class CopyOnAccessInMemoryStateInternalsTest method testMapStateWithUnderlying.

@Test
public void testMapStateWithUnderlying() {
    CopyOnAccessInMemoryStateInternals<String> underlying = CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
    StateNamespace namespace = new StateNamespaceForTest("foo");
    StateTag<MapState<String, Integer>> valueTag = StateTags.map("foo", StringUtf8Coder.of(), VarIntCoder.of());
    MapState<String, Integer> underlyingValue = underlying.state(namespace, valueTag);
    assertThat(underlyingValue.entries().read(), emptyIterable());
    underlyingValue.put("hello", 1);
    assertThat(underlyingValue.get("hello").read(), equalTo(1));
    CopyOnAccessInMemoryStateInternals<String> internals = CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
    MapState<String, Integer> copyOnAccessState = internals.state(namespace, valueTag);
    assertThat(copyOnAccessState.get("hello").read(), equalTo(1));
    copyOnAccessState.put("world", 4);
    assertThat(copyOnAccessState.get("hello").read(), equalTo(1));
    assertThat(copyOnAccessState.get("world").read(), equalTo(4));
    assertThat(underlyingValue.get("hello").read(), equalTo(1));
    assertNull(underlyingValue.get("world").read());
    MapState<String, Integer> reReadUnderlyingValue = underlying.state(namespace, valueTag);
    assertThat(underlyingValue.entries().read(), equalTo(reReadUnderlyingValue.entries().read()));
}
Also used : StateNamespaceForTest(org.apache.beam.runners.core.StateNamespaceForTest) MapState(org.apache.beam.sdk.state.MapState) StateNamespace(org.apache.beam.runners.core.StateNamespace) StateNamespaceForTest(org.apache.beam.runners.core.StateNamespaceForTest) Test(org.junit.Test)

Aggregations

MapState (org.apache.beam.sdk.state.MapState)4 Test (org.junit.Test)4 Map (java.util.Map)3 CombiningState (org.apache.beam.sdk.state.CombiningState)3 StateSpec (org.apache.beam.sdk.state.StateSpec)3 UsesMapState (org.apache.beam.sdk.testing.UsesMapState)3 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)3 KV (org.apache.beam.sdk.values.KV)3 Matchers.containsString (org.hamcrest.Matchers.containsString)3 Category (org.junit.experimental.categories.Category)3 StateNamespace (org.apache.beam.runners.core.StateNamespace)1 StateNamespaceForTest (org.apache.beam.runners.core.StateNamespaceForTest)1