use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testValueStateDedup.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testValueStateDedup() {
final String stateId = "foo";
DoFn<KV<Integer, Integer>, Integer> onePerKey = new DoFn<KV<Integer, Integer>, Integer>() {
@StateId(stateId)
private final StateSpec<ValueState<Integer>> seenSpec = StateSpecs.value(VarIntCoder.of());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) ValueState<Integer> seenState) {
Integer seen = MoreObjects.firstNonNull(seenState.read(), 0);
if (seen == 0) {
seenState.write(seen + 1);
c.output(c.element().getValue());
}
}
};
int numKeys = 50;
// A big enough list that we can see some deduping
List<KV<Integer, Integer>> input = new ArrayList<>();
// The output should have no dupes
Set<Integer> expectedOutput = new HashSet<>();
for (int key = 0; key < numKeys; ++key) {
int output = 1000 + key;
expectedOutput.add(output);
for (int i = 0; i < 15; ++i) {
input.add(KV.of(key, output));
}
}
Collections.shuffle(input);
PCollection<Integer> output = pipeline.apply(Create.of(input)).apply(ParDo.of(onePerKey));
PAssert.that(output).containsInAnyOrder(expectedOutput);
pipeline.run();
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testSetState.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class, UsesSetState.class })
public void testSetState() {
final String stateId = "foo";
final String countStateId = "count";
DoFn<KV<String, Integer>, Set<Integer>> fn = new DoFn<KV<String, Integer>, Set<Integer>>() {
@StateId(stateId)
private final StateSpec<SetState<Integer>> setState = StateSpecs.set(VarIntCoder.of());
@StateId(countStateId)
private final StateSpec<CombiningState<Integer, int[], Integer>> countState = StateSpecs.combiningFromInputInternal(VarIntCoder.of(), Sum.ofIntegers());
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) SetState<Integer> state, @StateId(countStateId) CombiningState<Integer, int[], Integer> count) {
state.add(c.element().getValue());
count.add(1);
if (count.read() >= 4) {
Set<Integer> set = Sets.newHashSet(state.read());
c.output(set);
}
}
};
PCollection<Set<Integer>> output = pipeline.apply(Create.of(KV.of("hello", 97), KV.of("hello", 42), KV.of("hello", 42), KV.of("hello", 12))).apply(ParDo.of(fn));
PAssert.that(output).containsInAnyOrder(Sets.newHashSet(97, 42, 12));
pipeline.run();
}
use of org.apache.beam.sdk.state.StateSpec in project beam by apache.
the class ParDoTest method testCoderInferenceOfList.
@Test
@Category({ ValidatesRunner.class, UsesStatefulParDo.class })
public void testCoderInferenceOfList() {
final String stateId = "foo";
MyIntegerCoder myIntegerCoder = MyIntegerCoder.of();
pipeline.getCoderRegistry().registerCoderForClass(MyInteger.class, myIntegerCoder);
DoFn<KV<String, Integer>, List<MyInteger>> fn = new DoFn<KV<String, Integer>, List<MyInteger>>() {
@StateId(stateId)
private final StateSpec<ValueState<List<MyInteger>>> intState = StateSpecs.value();
@ProcessElement
public void processElement(ProcessContext c, @StateId(stateId) ValueState<List<MyInteger>> state) {
MyInteger myInteger = new MyInteger(c.element().getValue());
List<MyInteger> currentValue = state.read();
List<MyInteger> newValue = currentValue != null ? ImmutableList.<MyInteger>builder().addAll(currentValue).add(myInteger).build() : Collections.singletonList(myInteger);
c.output(newValue);
state.write(newValue);
}
};
pipeline.apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))).apply(ParDo.of(fn)).setCoder(ListCoder.of(myIntegerCoder));
pipeline.run();
}
Aggregations