use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testCombiningStateCoderInference.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCombiningStateCoderInference() {
pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, MyIntegerCoder.of());
final String stateId = "foo";
DoFn<KV<String, Integer>, String> fn = new DoFn<KV<String, Integer>, String>() {
private static final int EXPECTED_SUM = 16;
@StateId(stateId)
private final StateSpec<CombiningState<Integer, MyInteger, Integer>> combiningState = StateSpecs.combining(new Combine.CombineFn<Integer, MyInteger, Integer>() {
@Override
public MyInteger createAccumulator() {
return new MyInteger(0);
}
@Override
public MyInteger addInput(MyInteger accumulator, Integer input) {
return new MyInteger(accumulator.getValue() + input);
}
@Override
public MyInteger mergeAccumulators(Iterable<MyInteger> accumulators) {
int newValue = 0;
for (MyInteger myInteger : accumulators) {
newValue += myInteger.getValue();
}
return new MyInteger(newValue);
}
@Override
public Integer extractOutput(MyInteger accumulator) {
return accumulator.getValue();
}
});
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) CombiningState<Integer, MyInteger, Integer> state) {
state.add(c.element().getValue());
Integer currentValue = state.read();
if (currentValue == EXPECTED_SUM) {
c.output("right on");
}
}
};
PCollection<String> output = pipeline.apply(Create.of(KV.of("hello", 3), KV.of("hello", 6), KV.of("hello", 7))).apply(ParDo.of(fn));
// There should only be one moment at which the average is exactly 16
PAssert.that(output).containsInAnyOrder("right on");
pipeline.run();
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testValueStateCoderInference.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateCoderInference() {
final String stateId = "foo";
MyIntegerCoder myIntegerCoder = MyIntegerCoder.of();
pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
DoFn<KV<String, Integer>, MyInteger> fn = new DoFn<KV<String, Integer>, MyInteger>() {
@StateId(stateId)
private final StateSpec<ValueState<MyInteger>> intState = StateSpecs.value();
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) ValueState<MyInteger> state) {
MyInteger currentValue = MoreObjects.firstNonNull(state.read(), new MyInteger(0));
c.output(currentValue);
state.write(new MyInteger(currentValue.getValue() + 1));
}
};
PCollection<MyInteger> output = pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))).apply(ParDo.of(fn)).setCoder(myIntegerCoder);
PAssert.that(output).containsInAnyOrder(new MyInteger(0), new MyInteger(1), new MyInteger(2));
pipeline.run();
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testCombiningStateCoderInferenceFailure.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCombiningStateCoderInferenceFailure() throws Exception {
final String stateId = "foo";
DoFn<KV<String, Integer>, String> fn = new DoFn<KV<String, Integer>, String>() {
private static final int EXPECTED_SUM = 16;
@StateId(stateId)
private final StateSpec<CombiningState<Integer, MyInteger, Integer>> combiningState = StateSpecs.combining(new Combine.CombineFn<Integer, MyInteger, Integer>() {
@Override
public MyInteger createAccumulator() {
return new MyInteger(0);
}
@Override
public MyInteger addInput(MyInteger accumulator, Integer input) {
return new MyInteger(accumulator.getValue() + input);
}
@Override
public MyInteger mergeAccumulators(Iterable<MyInteger> accumulators) {
int newValue = 0;
for (MyInteger myInteger : accumulators) {
newValue += myInteger.getValue();
}
return new MyInteger(newValue);
}
@Override
public Integer extractOutput(MyInteger accumulator) {
return accumulator.getValue();
}
});
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) CombiningState<Integer, MyInteger, Integer> state) {
state.add(c.element().getValue());
Integer currentValue = state.read();
if (currentValue == EXPECTED_SUM) {
c.output("right on");
}
}
};
thrown.expect(RuntimeException.class);
thrown.expectMessage("Unable to infer a coder for CombiningState and no Coder was specified.");
pipeline.apply(Create.of(KV.of("hello", 3), KV.of("hello", 6), KV.of("hello", 7))).apply(ParDo.of(fn));
pipeline.run();
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testEventTimeTimerMultipleKeys.
/**
* Tests that event time timers for multiple keys both fire. This particularly exercises
* implementations that may GC in ways not simply governed by the watermark.
*/
@Test
@Category({ ValidatesRunner.class, UsesTimersInParDo.class })
public void testEventTimeTimerMultipleKeys() throws Exception {
final String timerId = "foo";
final String stateId = "sizzle";
final int offset = 5000;
final int timerOutput = 4093;
DoFn<KV<String, Integer>, KV<String, Integer>> fn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@StateId(stateId)
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
timer.set(window.maxTimestamp());
state.write(context.element().getKey());
context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
}
@OnTimer(timerId)
public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
context.output(KV.of(state.read(), timerOutput));
}
};
// Enough keys that we exercise interesting code paths
int numKeys = 50;
List<KV<String, Integer>> input = new ArrayList<>();
List<KV<String, Integer>> expectedOutput = new ArrayList<>();
for (Integer key = 0; key < numKeys; ++key) {
// Each key should have just one final output at GC time
expectedOutput.add(KV.of(key.toString(), timerOutput));
for (int i = 0; i < 15; ++i) {
// Each input should be output with the offset added
input.add(KV.of(key.toString(), i));
expectedOutput.add(KV.of(key.toString(), i + offset));
}
}
Collections.shuffle(input);
PCollection<KV<String, Integer>> output = pipeline.apply(Create.of(input)).apply(ParDo.of(fn));
PAssert.that(output).containsInAnyOrder(expectedOutput);
pipeline.run();
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testMapStateCoderInferenceFailure.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesMapState.class })
public void testMapStateCoderInferenceFailure() throws Exception {
final String stateId = "foo";
final String countStateId = "count";
Coder<MyInteger> myIntegerCoder = MyIntegerCoder.of();
DoFn<KV<String, KV<String, Integer>>, KV<String, MyInteger>> fn = new DoFn<KV<String, KV<String, Integer>>, KV<String, MyInteger>>() {
@StateId(stateId)
private final StateSpec<MapState<String, MyInteger>> mapState = StateSpecs.map();
@StateId(countStateId)
private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) MapState<String, MyInteger> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
KV<String, Integer> value = c.element().getValue();
state.put(value.getKey(), new MyInteger(value.getValue()));
count.add(1);
if (count.read() >= 4) {
Iterable<Map.Entry<String, MyInteger>> iterate = state.entries().read();
for (Map.Entry<String, MyInteger> entry : iterate) {
c.output(KV.of(entry.getKey(), entry.getValue()));
}
}
}
};
thrown.expect(RuntimeException.class);
thrown.expectMessage("Unable to infer a coder for MapState and no Coder was specified.");
pipeline.apply(Create.of(KV.of("hello", KV.of("a", 97)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("c", 12)))).apply(ParDo.of(fn)).setCoder(KvCoder.of(StringUtf8Coder.of(), myIntegerCoder));
pipeline.run();
}
Aggregations