Search in sources :

Example 16 with BagState

use of org.apache.beam.sdk.state.BagState in project beam by apache.

the class ParDoTest method testBagState.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testBagState() {
    final String stateId = "foo";
    DoFn<KV<String, Integer>, List<Integer>> fn = new DoFn<KV<String, Integer>, List<Integer>>() {

        @StateId(stateId)
        private final StateSpec<BagState<Integer>> bufferState = StateSpecs.bag(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) BagState<Integer> state) {
            Iterable<Integer> currentValue = state.read();
            state.add(c.element().getValue());
            if (Iterables.size(state.read()) >= 4) {
                List<Integer> sorted = Lists.newArrayList(currentValue);
                Collections.sort(sorted);
                c.output(sorted);
            }
        }
    };
    PCollection<List<Integer>> output = pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 84), KV.of("hello", 12))).apply(ParDo.of(fn));
    PAssert.that(output).containsInAnyOrder(Lists.newArrayList(12, 42, 84, 97));
    pipeline.run();
}
Also used : StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) ArrayList(java.util.ArrayList) List(java.util.List) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ImmutableList(com.google.common.collect.ImmutableList) BagState(org.apache.beam.sdk.state.BagState) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

BagState (org.apache.beam.sdk.state.BagState)16 Test (org.junit.Test)16 StateNamespaceForTest (org.apache.beam.runners.core.StateNamespaceForTest)11 StateNamespace (org.apache.beam.runners.core.StateNamespace)10 ImmutableList (com.google.common.collect.ImmutableList)4 ArrayList (java.util.ArrayList)4 List (java.util.List)4 StateSpec (org.apache.beam.sdk.state.StateSpec)4 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)4 KV (org.apache.beam.sdk.values.KV)4 TupleTagList (org.apache.beam.sdk.values.TupleTagList)4 Matchers.containsString (org.hamcrest.Matchers.containsString)4 Category (org.junit.experimental.categories.Category)4 ByteArrayInputStream (java.io.ByteArrayInputStream)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 ByteBuffer (java.nio.ByteBuffer)1 DirectStepContext (org.apache.beam.runners.direct.DirectExecutionContext.DirectStepContext)1 FlinkKeyGroupStateInternals (org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkKeyGroupStateInternals)1