use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testSetStateCoderInference.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesSetState.class })
public void testSetStateCoderInference() {
final String stateId = "foo";
final String countStateId = "count";
Coder<MyInteger> myIntegerCoder = MyIntegerCoder.of();
pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
DoFn<KV<String, Integer>, Set<MyInteger>> fn = new DoFn<KV<String, Integer>, Set<MyInteger>>() {
@StateId(stateId)
private final StateSpec<SetState<MyInteger>> setState = StateSpecs.set();
@StateId(countStateId)
private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) SetState<MyInteger> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
state.add(new MyInteger(c.element().getValue()));
count.add(1);
if (count.read() >= 4) {
Set<MyInteger> set = Sets.newHashSet(state.read());
c.output(set);
}
}
};
PCollection<Set<MyInteger>> output = pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))).apply(ParDo.of(fn)).setCoder(SetCoder.of(myIntegerCoder));
PAssert.that(output).containsInAnyOrder(Sets.newHashSet(new MyInteger(97), new MyInteger(42), new MyInteger(12)));
pipeline.run();
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testBagState.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testBagState() {
final String stateId = "foo";
DoFn<KV<String, Integer>, List<Integer>> fn = new DoFn<KV<String, Integer>, List<Integer>>() {
@StateId(stateId)
private final StateSpec<BagState<Integer>> bufferState = StateSpecs.bag(VarIntCoder.of());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) BagState<Integer> state) {
Iterable<Integer> currentValue = state.read();
state.add(c.element().getValue());
if (Iterables.size(state.read()) >= 4) {
List<Integer> sorted = Lists.newArrayList(currentValue);
Collections.sort(sorted);
c.output(sorted);
}
}
};
PCollection<List<Integer>> output = pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 84), KV.of("hello", 12))).apply(ParDo.of(fn));
PAssert.that(output).containsInAnyOrder(Lists.newArrayList(12, 42, 84, 97));
pipeline.run();
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testValueStateCoderInferenceFailure.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateCoderInferenceFailure() throws Exception {
final String stateId = "foo";
MyIntegerCoder myIntegerCoder = MyIntegerCoder.of();
DoFn<KV<String, Integer>, MyInteger> fn = new DoFn<KV<String, Integer>, MyInteger>() {
@StateId(stateId)
private final StateSpec<ValueState<MyInteger>> intState = StateSpecs.value();
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) ValueState<MyInteger> state) {
MyInteger currentValue = MoreObjects.firstNonNull(state.read(), new MyInteger(0));
c.output(currentValue);
state.write(new MyInteger(currentValue.getValue() + 1));
}
};
thrown.expect(RuntimeException.class);
thrown.expectMessage("Unable to infer a coder for ValueState and no Coder was specified.");
pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))).apply(ParDo.of(fn)).setCoder(myIntegerCoder);
pipeline.run();
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testValueStateTaggedOutput.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateTaggedOutput() {
final String stateId = "foo";
final TupleTag<Integer> evenTag = new TupleTag<Integer>() {
};
final TupleTag<Integer> oddTag = new TupleTag<Integer>() {
};
DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {
@StateId(stateId)
private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
if (currentValue % 2 == 0) {
c.output(currentValue);
} else {
c.output(oddTag, currentValue);
}
state.write(currentValue + 1);
}
};
PCollectionTuple output = pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84), KV.of("goodbye", 33), KV.of("hello", 859), KV.of("goodbye", 83945))).apply(ParDo.of(fn).withOutputTags(evenTag, TupleTagList.of(oddTag)));
PCollection<Integer> evens = output.get(evenTag);
PCollection<Integer> odds = output.get(oddTag);
// There are 0 and 2 from "hello" and just 0 from "goodbye"
PAssert.that(evens).containsInAnyOrder(0, 2, 0);
// There are 1 and 3 from "hello" and just "1" from "goodbye"
PAssert.that(odds).containsInAnyOrder(1, 3, 1);
pipeline.run();
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testMapState.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesMapState.class })
public void testMapState() {
final String stateId = "foo";
final String countStateId = "count";
DoFn<KV<String, KV<String, Integer>>, KV<String, Integer>> fn = new DoFn<KV<String, KV<String, Integer>>, KV<String, Integer>>() {
@StateId(stateId)
private final StateSpec<MapState<String, Integer>> mapState = StateSpecs.map(StringUtf8Coder.of(), VarIntCoder.of());
@StateId(countStateId)
private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) MapState<String, Integer> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
KV<String, Integer> value = c.element().getValue();
state.put(value.getKey(), value.getValue());
count.add(1);
if (count.read() >= 4) {
Iterable<Map.Entry<String, Integer>> iterate = state.entries().read();
for (Map.Entry<String, Integer> entry : iterate) {
c.output(KV.of(entry.getKey(), entry.getValue()));
}
}
}
};
PCollection<KV<String, Integer>> output = pipeline.apply(Create.of(KV.of("hello", KV.of("a", 97)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("b", 42)), KV.of("hello", KV.of("c", 12)))).apply(ParDo.of(fn));
PAssert.that(output).containsInAnyOrder(KV.of("a", 97), KV.of("b", 42), KV.of("c", 12));
pipeline.run();
}
Aggregations