use of org.apache.beam.sdk.state.CombiningState in project beam by apache.
the class ParDoTest method testCombiningState.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCombiningState() {
final String stateId = "foo";
DoFn<KV<String, Double>, String> fn = new DoFn<KV<String, Double>, String>() {
private static final double EPSILON = 0.0001;
@StateId(stateId)
private final StateSpec<CombiningState<Double, CountSum<Double>, Double>> combiningState = StateSpecs.combining(new Mean.CountSumCoder<Double>(), Mean.<Double>of());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) CombiningState<Double, CountSum<Double>, Double> state) {
state.add(c.element().getValue());
Double currentValue = state.read();
if (Math.abs(currentValue - 0.5) < EPSILON) {
c.output("right on");
}
}
};
PCollection<String> output = pipeline.apply(Create.of(KV.of("hello", 0.3), KV.of("hello", 0.6), KV.of("hello", 0.6))).apply(ParDo.of(fn));
// There should only be one moment at which the average is exactly 0.5
PAssert.that(output).containsInAnyOrder("right on");
pipeline.run();
}
use of org.apache.beam.sdk.state.CombiningState in project beam by apache.
the class CopyOnAccessInMemoryStateInternalsTest method testAccumulatorCombiningStateWithUnderlying.
@Test
public void testAccumulatorCombiningStateWithUnderlying() throws CannotProvideCoderException {
CopyOnAccessInMemoryStateInternals<String> underlying = CopyOnAccessInMemoryStateInternals.withUnderlying(key, null);
CombineFn<Long, long[], Long> sumLongFn = Sum.ofLongs();
StateNamespace namespace = new StateNamespaceForTest("foo");
CoderRegistry reg = pipeline.getCoderRegistry();
StateTag<CombiningState<Long, long[], Long>> stateTag = StateTags.combiningValue("summer", sumLongFn.getAccumulatorCoder(reg, reg.getCoder(Long.class)), sumLongFn);
GroupingState<Long, Long> underlyingValue = underlying.state(namespace, stateTag);
assertThat(underlyingValue.read(), equalTo(0L));
underlyingValue.add(1L);
assertThat(underlyingValue.read(), equalTo(1L));
CopyOnAccessInMemoryStateInternals<String> internals = CopyOnAccessInMemoryStateInternals.withUnderlying(key, underlying);
GroupingState<Long, Long> copyOnAccessState = internals.state(namespace, stateTag);
assertThat(copyOnAccessState.read(), equalTo(1L));
copyOnAccessState.add(4L);
assertThat(copyOnAccessState.read(), equalTo(5L));
assertThat(underlyingValue.read(), equalTo(1L));
GroupingState<Long, Long> reReadUnderlyingValue = underlying.state(namespace, stateTag);
assertThat(underlyingValue.read(), equalTo(reReadUnderlyingValue.read()));
}
use of org.apache.beam.sdk.state.CombiningState in project beam by apache.
the class ParDoTest method testSetStateCoderInference.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesSetState.class })
public void testSetStateCoderInference() {
final String stateId = "foo";
final String countStateId = "count";
Coder<MyInteger> myIntegerCoder = MyIntegerCoder.of();
pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
DoFn<KV<String, Integer>, Set<MyInteger>> fn = new DoFn<KV<String, Integer>, Set<MyInteger>>() {
@StateId(stateId)
private final StateSpec<SetState<MyInteger>> setState = StateSpecs.set();
@StateId(countStateId)
private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) SetState<MyInteger> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
state.add(new MyInteger(c.element().getValue()));
count.add(1);
if (count.read() >= 4) {
Set<MyInteger> set = Sets.newHashSet(state.read());
c.output(set);
}
}
};
PCollection<Set<MyInteger>> output = pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))).apply(ParDo.of(fn)).setCoder(SetCoder.of(myIntegerCoder));
PAssert.that(output).containsInAnyOrder(Sets.newHashSet(new MyInteger(97), new MyInteger(42), new MyInteger(12)));
pipeline.run();
}
use of org.apache.beam.sdk.state.CombiningState in project beam by apache.
the class ParDoTest method testMapState.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesMapState.class })
public void testMapState() {
final String stateId = "foo";
final String countStateId = "count";
DoFn<KV<String, KV<String, Integer>>, KV<String, Integer>> fn = new DoFn<KV<String, KV<String, Integer>>, KV<String, Integer>>() {
@StateId(stateId)
private final StateSpec<MapState<String, Integer>> mapState = StateSpecs.map(StringUtf8Coder.of(), VarIntCoder.of());
@StateId(countStateId)
private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) MapState<String, Integer> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
KV<String, Integer> value = c.element().getValue();
state.put(value.getKey(), value.getValue());
count.add(1);
if (count.read() >= 4) {
Iterable<Map.Entry<String, Integer>> iterate = state.entries().read();
for (Map.Entry<String, Integer> entry : iterate) {
c.output(KV.of(entry.getKey(), entry.getValue()));
}
}
}
};
PCollection<KV<String, Integer>> output = pipeline.apply(Create.of(KV.of("hello", KV.of("a", 97)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("c", 12)))).apply(ParDo.of(fn));
PAssert.that(output).containsInAnyOrder(KV.of("a", 97), KV.of("b", 42), KV.of("c", 12));
pipeline.run();
}
use of org.apache.beam.sdk.state.CombiningState in project beam by apache.
the class ParDoTest method testSetState.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesSetState.class })
public void testSetState() {
final String stateId = "foo";
final String countStateId = "count";
DoFn<KV<String, Integer>, Set<Integer>> fn = new DoFn<KV<String, Integer>, Set<Integer>>() {
@StateId(stateId)
private final StateSpec<SetState<Integer>> setState = StateSpecs.set(VarIntCoder.of());
@StateId(countStateId)
private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) SetState<Integer> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
state.add(c.element().getValue());
count.add(1);
if (count.read() >= 4) {
Set<Integer> set = Sets.newHashSet(state.read());
c.output(set);
}
}
};
PCollection<Set<Integer>> output = pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))).apply(ParDo.of(fn));
PAssert.that(output).containsInAnyOrder(Sets.newHashSet(97, 42, 12));
pipeline.run();
}
Aggregations