use of org.apache.beam.sdk.state.ValueState in project beam by apache.
the class ParDoTest method testValueStateTaggedOutput.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateTaggedOutput() {
final String stateId = "foo";
final TupleTag<Integer> evenTag = new TupleTag<Integer>() {
};
final TupleTag<Integer> oddTag = new TupleTag<Integer>() {
};
DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {
@StateId(stateId)
private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> state) {
Integer currentValue = MoreObjects.firstNonNull(state.read(), 0);
if (currentValue % 2 == 0) {
c.output(currentValue);
} else {
c.output(oddTag, currentValue);
}
state.write(currentValue + 1);
}
};
PCollectionTuple output = pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84), KV.of("goodbye", 33), KV.of("hello", 859), KV.of("goodbye", 83945))).apply(ParDo.of(fn).withOutputTags(evenTag, TupleTagList.of(oddTag)));
PCollection<Integer> evens = output.get(evenTag);
PCollection<Integer> odds = output.get(oddTag);
// There are 0 and 2 from "hello" and just 0 from "goodbye"
PAssert.that(evens).containsInAnyOrder(0, 2, 0);
// There are 1 and 3 from "hello" and just "1" from "goodbye"
PAssert.that(odds).containsInAnyOrder(1, 3, 1);
pipeline.run();
}
use of org.apache.beam.sdk.state.ValueState in project beam by apache.
the class ParDoTest method testValueStateDedup.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateDedup() {
final String stateId = "foo";
DoFn<KV<Integer, Integer>, Integer> onePerKey = new DoFn<KV<Integer, Integer>, Integer>() {
@StateId(stateId)
private final StateSpec<ValueState<Integer>> seenSpec = StateSpecs.value(VarIntCoder.of());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> seenState) {
Integer seen = MoreObjects.firstNonNull(seenState.read(), 0);
if (seen == 0) {
seenState.write(seen + 1);
c.output(c.element().getValue());
}
}
};
int numKeys = 50;
// A big enough list that we can see some deduping
List<KV<Integer, Integer>> input = new ArrayList<>();
// The output should have no dupes
Set<Integer> expectedOutput = new HashSet<>();
for (int key = 0; key < numKeys; ++key) {
int output = 1000 + key;
expectedOutput.add(output);
for (int i = 0; i < 15; ++i) {
input.add(KV.of(key, output));
}
}
Collections.shuffle(input);
PCollection<Integer> output = pipeline.apply(Create.of(input)).apply(ParDo.of(onePerKey));
PAssert.that(output).containsInAnyOrder(expectedOutput);
pipeline.run();
}
use of org.apache.beam.sdk.state.ValueState in project beam by apache.
the class ParDoTest method testCoderInferenceOfList.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCoderInferenceOfList() {
final String stateId = "foo";
MyIntegerCoder myIntegerCoder = MyIntegerCoder.of();
pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
DoFn<KV<String, Integer>, List<MyInteger>> fn = new DoFn<KV<String, Integer>, List<MyInteger>>() {
@StateId(stateId)
private final StateSpec<ValueState<List<MyInteger>>> intState = StateSpecs.value();
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) ValueState<List<MyInteger>> state) {
MyInteger myInteger = new MyInteger(c.element().getValue());
List<MyInteger> currentValue = state.read();
List<MyInteger> newValue = currentValue != null ? ImmutableList.<MyInteger>builder().addAll(currentValue).add(myInteger).build() : Collections.singletonList(myInteger);
c.output(newValue);
state.write(newValue);
}
};
pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))).apply(ParDo.of(fn)).setCoder(ListCoder.of(myIntegerCoder));
pipeline.run();
}
Aggregations