Search in sources :

Example 1 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class StatefulParDoEvaluatorFactoryTest method windowCleanupScheduled.

@Test
public void windowCleanupScheduled() throws Exception {
    // To test the factory, first we set up a pipeline and then we use the constructed
    // pipeline to create the right parameters to pass to the factory
    final String stateId = "my-state-id";
    // For consistency, window it into FixedWindows. Actually we will fabricate an input bundle.
    PCollection<KV<String, Integer>> input = pipeline.apply(Create.of(KV.of("hello", 1), KV.of("hello", 2))).apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))));
    TupleTag<Integer> mainOutput = new TupleTag<>();
    PCollection<Integer> produced = input.apply(new ParDoMultiOverrideFactory.GbkThenStatefulParDo<>(ParDo.of(new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<String>> spec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void process(ProcessContext c) {
        }
    }).withOutputTags(mainOutput, TupleTagList.empty()))).get(mainOutput).setCoder(VarIntCoder.of());
    StatefulParDoEvaluatorFactory<String, Integer, Integer> factory = new StatefulParDoEvaluatorFactory(mockEvaluationContext);
    AppliedPTransform<PCollection<? extends KeyedWorkItem<String, KV<String, Integer>>>, PCollectionTuple, StatefulParDo<String, Integer, Integer>> producingTransform = (AppliedPTransform) DirectGraphs.getProducer(produced);
    // Then there will be a digging down to the step context to get the state internals
    when(mockEvaluationContext.getExecutionContext(eq(producingTransform), Mockito.<StructuralKey>any())).thenReturn(mockExecutionContext);
    when(mockExecutionContext.getStepContext(anyString())).thenReturn(mockStepContext);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
    StateNamespace firstWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), firstWindow);
    StateNamespace secondWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), secondWindow);
    StateTag<ValueState<String>> tag = StateTags.tagForSpec(stateId, StateSpecs.value(StringUtf8Coder.of()));
    // Set up non-empty state. We don't mock + verify calls to clear() but instead
    // check that state is actually empty. We musn't care how it is accomplished.
    stateInternals.state(firstWindowNamespace, tag).write("first");
    stateInternals.state(secondWindowNamespace, tag).write("second");
    // A single bundle with some elements in the global window; it should register cleanup for the
    // global window state merely by having the evaluator created. The cleanup logic does not
    // depend on the window.
    CommittedBundle<KV<String, Integer>> inputBundle = BUNDLE_FACTORY.createBundle(input).add(WindowedValue.of(KV.of("hello", 1), new Instant(3), firstWindow, PaneInfo.NO_FIRING)).add(WindowedValue.of(KV.of("hello", 2), new Instant(11), secondWindow, PaneInfo.NO_FIRING)).commit(Instant.now());
    // Merely creating the evaluator should suffice to register the cleanup callback
    factory.forApplication(producingTransform, inputBundle);
    ArgumentCaptor<Runnable> argumentCaptor = ArgumentCaptor.forClass(Runnable.class);
    verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(firstWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
    // Should actually clear the state for the first window
    argumentCaptor.getValue().run();
    assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
    verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(secondWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
    // Should actually clear the state for the second window
    argumentCaptor.getValue().run();
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) Matchers.anyString(org.mockito.Matchers.anyString) StateSpec(org.apache.beam.sdk.state.StateSpec) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) StateNamespace(org.apache.beam.runners.core.StateNamespace) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) StatefulParDo(org.apache.beam.runners.direct.ParDoMultiOverrideFactory.StatefulParDo) Test(org.junit.Test)

Example 2 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testValueStateFixedWindows.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateFixedWindows() {
    final String stateId = "foo";
    DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
            Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
            c.output(currentValue);
            state.write(currentValue + 1);
        }
    };
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(10));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(20));
    PCollection<Integer> output = pipeline.apply(Create.timestamped(// first window
    TimestampedValue.of(KV.of("hello", 7), new Instant(1)), TimestampedValue.of(KV.of("hello", 14), new Instant(2)), TimestampedValue.of(KV.of("hello", 21), new Instant(3)), // second window
    TimestampedValue.of(KV.of("hello", 28), new Instant(11)), TimestampedValue.of(KV.of("hello", 35), new Instant(13)))).apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))).apply("Stateful ParDo", ParDo.of(fn));
    PAssert.that(output).inWindow(firstWindow).containsInAnyOrder(0, 1, 2);
    PAssert.that(output).inWindow(secondWindow).containsInAnyOrder(0, 1);
    pipeline.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) ValueState(org.apache.beam.sdk.state.ValueState) Instant(org.joda.time.Instant) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 3 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testSetStateCoderInferenceFailure.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesSetState.class })
public void testSetStateCoderInferenceFailure() throws Exception {
    final String stateId = "foo";
    final String countStateId = "count";
    Coder<MyInteger> myIntegerCoder = MyIntegerCoder.of();
    DoFn<KV<String, Integer>, Set<MyInteger>> fn = new DoFn<KV<String, Integer>, Set<MyInteger>>() {

        @StateId(stateId)
        private final StateSpec<SetState<MyInteger>> setState = StateSpecs.set();

        @StateId(countStateId)
        private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) SetState<MyInteger> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
            state.add(new MyInteger(c.element().getValue()));
            count.add(1);
            if (count.read() >= 4) {
                Set<MyInteger> set = Sets.newHashSet(state.read());
                c.output(set);
            }
        }
    };
    thrown.expect(RuntimeException.class);
    thrown.expectMessage("Unable to infer a coder for SetState and no Coder was specified.");
    pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))).apply(ParDo.of(fn)).setCoder(SetCoder.of(myIntegerCoder));
    pipeline.run();
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) SetState(org.apache.beam.sdk.state.SetState) UsesSetState(org.apache.beam.sdk.testing.UsesSetState) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 4 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testMapStateCoderInference.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesMapState.class })
public void testMapStateCoderInference() {
    final String stateId = "foo";
    final String countStateId = "count";
    Coder<MyInteger> myIntegerCoder = MyIntegerCoder.of();
    pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
    DoFn<KV<String, KV<String, Integer>>, KV<String, MyInteger>> fn = new DoFn<KV<String, KV<String, Integer>>, KV<String, MyInteger>>() {

        @StateId(stateId)
        private final StateSpec<MapState<String, MyInteger>> mapState = StateSpecs.map();

        @StateId(countStateId)
        private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) MapState<String, MyInteger> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
            KV<String, Integer> value = c.element().getValue();
            state.put(value.getKey(), new MyInteger(value.getValue()));
            count.add(1);
            if (count.read() >= 4) {
                Iterable<Map.Entry<String, MyInteger>> iterate = state.entries().read();
                for (Map.Entry<String, MyInteger> entry : iterate) {
                    c.output(KV.of(entry.getKey(), entry.getValue()));
                }
            }
        }
    };
    PCollection<KV<String, MyInteger>> output = pipeline.apply(Create.of(KV.of("hello", KV.of("a", 97)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("c", 12)))).apply(ParDo.of(fn)).setCoder(KvCoder.of(StringUtf8Coder.of(), myIntegerCoder));
    PAssert.that(output).containsInAnyOrder(KV.of("a", new MyInteger(97)), KV.of("b", new MyInteger(42)), KV.of("c", new MyInteger(12)));
    pipeline.run();
}
Also used : UsesMapState(org.apache.beam.sdk.testing.UsesMapState) MapState(org.apache.beam.sdk.state.MapState) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) Map(java.util.Map) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 5 with StateSpec

use of org.apache.beam.sdk.state.StateSpec in project beam by apache.

the class ParDoTest method testCombiningStateCoderInference.

@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCombiningStateCoderInference() {
    pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, MyIntegerCoder.of());
    final String stateId = "foo";
    DoFn<KV<String, Integer>, String> fn = new DoFn<KV<String, Integer>, String>() {

        private static final int EXPECTED_SUM = 16;

        @StateId(stateId)
        private final StateSpec<CombiningState<Integer, MyInteger, Integer>> combiningState = StateSpecs.combining(new Combine.CombineFn<Integer, MyInteger, Integer>() {

            @Override
            public MyInteger createAccumulator() {
                return new MyInteger(0);
            }

            @Override
            public MyInteger addInput(MyInteger accumulator, Integer input) {
                return new MyInteger(accumulator.getValue() + input);
            }

            @Override
            public MyInteger mergeAccumulators(Iterable<MyInteger> accumulators) {
                int newValue = 0;
                for (MyInteger myInteger : accumulators) {
                    newValue += myInteger.getValue();
                }
                return new MyInteger(newValue);
            }

            @Override
            public Integer extractOutput(MyInteger accumulator) {
                return accumulator.getValue();
            }
        });

        @ProcessElement
        public void processElement(ProcessContext c, @StateId(stateId) CombiningState<Integer, MyInteger, Integer> state) {
            state.add(c.element().getValue());
            Integer currentValue = state.read();
            if (currentValue == EXPECTED_SUM) {
                c.output("right on");
            }
        }
    };
    PCollection<String> output = pipeline.apply(Create.of(KV.of("hello", 3), KV.of("hello", 6), KV.of("hello", 7))).apply(ParDo.of(fn));
    // There should only be one moment at which the average is exactly 16
    PAssert.that(output).containsInAnyOrder("right on");
    pipeline.run();
}
Also used : StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) KV(org.apache.beam.sdk.values.KV) StateSpec(org.apache.beam.sdk.state.StateSpec) CombiningState(org.apache.beam.sdk.state.CombiningState) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

StateSpec (org.apache.beam.sdk.state.StateSpec)47 Test (org.junit.Test)38 KV (org.apache.beam.sdk.values.KV)35 Matchers.containsString (org.hamcrest.Matchers.containsString)24 Category (org.junit.experimental.categories.Category)24 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)23 ValueState (org.apache.beam.sdk.state.ValueState)21 DoFn (org.apache.beam.sdk.transforms.DoFn)19 ArrayList (java.util.ArrayList)10 CombiningState (org.apache.beam.sdk.state.CombiningState)10 Pipeline (org.apache.beam.sdk.Pipeline)9 List (java.util.List)8 TupleTag (org.apache.beam.sdk.values.TupleTag)8 Instant (org.joda.time.Instant)8 BagState (org.apache.beam.sdk.state.BagState)7 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)7 TupleTagList (org.apache.beam.sdk.values.TupleTagList)7 MapState (org.apache.beam.sdk.state.MapState)6 SetState (org.apache.beam.sdk.state.SetState)6 Timer (org.apache.beam.sdk.state.Timer)6