use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class DoFnOperatorTest method testExactlyOnceBufferingKeyed.
@Test
public void testExactlyOnceBufferingKeyed() throws Exception {
FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
options.setMaxBundleSize(2L);
options.setCheckpointingInterval(1L);
TupleTag<KV<String, String>> outputTag = new TupleTag<>("main-output");
StringUtf8Coder keyCoder = StringUtf8Coder.of();
KvToByteBufferKeySelector<String, String> keySelector = new KvToByteBufferKeySelector<>(keyCoder, new SerializablePipelineOptions(options));
KvCoder<String, String> kvCoder = KvCoder.of(keyCoder, StringUtf8Coder.of());
WindowedValue.ValueOnlyWindowedValueCoder<KV<String, String>> windowedValueCoder = WindowedValue.getValueOnlyCoder(kvCoder);
DoFn<KV<String, String>, KV<String, String>> doFn = new DoFn<KV<String, String>, KV<String, String>>() {
@StartBundle
public void startBundle() {
numStartBundleCalled++;
}
@ProcessElement
// Use RequiresStableInput to force buffering elements
@RequiresStableInput
public void processElement(ProcessContext context) {
context.output(context.element());
}
@FinishBundle
public void finishBundle(FinishBundleContext context) {
context.output(KV.of("key3", "finishBundle"), BoundedWindow.TIMESTAMP_MIN_VALUE, GlobalWindow.INSTANCE);
}
};
DoFnOperator.MultiOutputOutputManagerFactory<KV<String, String>> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, WindowedValue.getFullCoder(kvCoder, GlobalWindow.Coder.INSTANCE), new SerializablePipelineOptions(options));
Supplier<DoFnOperator<KV<String, String>, KV<String, String>>> doFnOperatorSupplier = () -> new DoFnOperator<>(doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
DoFnOperator<KV<String, String>, KV<String, String>> doFnOperator = doFnOperatorSupplier.get();
OneInputStreamOperatorTestHarness<WindowedValue<KV<String, String>>, WindowedValue<KV<String, String>>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, keySelector.getProducedType());
testHarness.open();
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key", "a"))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key", "b"))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key2", "c"))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key2", "d"))));
assertThat(Iterables.size(testHarness.getOutput()), is(0));
OperatorSubtaskState backup = testHarness.snapshot(0, 0);
doFnOperator.notifyCheckpointComplete(0L);
assertThat(numStartBundleCalled, is(1));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow(KV.of("key", "a")), WindowedValue.valueInGlobalWindow(KV.of("key", "b")), WindowedValue.valueInGlobalWindow(KV.of("key2", "c")), WindowedValue.valueInGlobalWindow(KV.of("key2", "d")), WindowedValue.valueInGlobalWindow(KV.of("key3", "finishBundle"))));
doFnOperator = doFnOperatorSupplier.get();
testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, keySelector.getProducedType());
// restore from the snapshot
testHarness.initializeState(backup);
testHarness.open();
doFnOperator.notifyCheckpointComplete(0L);
assertThat(numStartBundleCalled, is(2));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow(KV.of("key", "a")), WindowedValue.valueInGlobalWindow(KV.of("key", "b")), WindowedValue.valueInGlobalWindow(KV.of("key2", "c")), WindowedValue.valueInGlobalWindow(KV.of("key2", "d")), WindowedValue.valueInGlobalWindow(KV.of("key3", "finishBundle"))));
// repeat to see if elements are evicted
doFnOperator.notifyCheckpointComplete(1L);
assertThat(numStartBundleCalled, is(2));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow(KV.of("key", "a")), WindowedValue.valueInGlobalWindow(KV.of("key", "b")), WindowedValue.valueInGlobalWindow(KV.of("key2", "c")), WindowedValue.valueInGlobalWindow(KV.of("key2", "d")), WindowedValue.valueInGlobalWindow(KV.of("key3", "finishBundle"))));
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class DoFnOperatorTest method testStateRestore.
@Test
public void testStateRestore() throws Exception {
DoFn<KV<String, Long>, KV<String, Long>> filterElementsEqualToCountFn = new DoFn<KV<String, Long>, KV<String, Long>>() {
@StateId("counter")
private final StateSpec<ValueState<Long>> counterSpec = StateSpecs.value(VarLongCoder.of());
@ProcessElement
public void processElement(ProcessContext context, @StateId("counter") ValueState<Long> count) {
long currentCount = Optional.ofNullable(count.read()).orElse(0L);
currentCount = currentCount + 1;
count.write(currentCount);
KV<String, Long> currentElement = context.element();
if (currentCount == currentElement.getValue()) {
context.output(currentElement);
}
}
};
WindowingStrategy<Object, GlobalWindow> windowingStrategy = WindowingStrategy.globalDefault();
TupleTag<KV<String, Long>> outputTag = new TupleTag<>("main-output");
StringUtf8Coder keyCoder = StringUtf8Coder.of();
KvToByteBufferKeySelector<String, Long> keySelector = new KvToByteBufferKeySelector<>(keyCoder, null);
KvCoder<String, Long> coder = KvCoder.of(keyCoder, VarLongCoder.of());
FullWindowedValueCoder<KV<String, Long>> kvCoder = WindowedValue.getFullCoder(coder, windowingStrategy.getWindowFn().windowCoder());
CoderTypeInformation<ByteBuffer> keyCoderInfo = new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults());
OneInputStreamOperatorTestHarness<WindowedValue<KV<String, Long>>, WindowedValue<KV<String, Long>>> testHarness = createTestHarness(windowingStrategy, filterElementsEqualToCountFn, kvCoder, kvCoder, keyCoder, outputTag, keyCoderInfo, keySelector);
testHarness.open();
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));
OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
testHarness.close();
testHarness = createTestHarness(windowingStrategy, filterElementsEqualToCountFn, kvCoder, kvCoder, keyCoder, outputTag, keyCoderInfo, keySelector);
testHarness.initializeState(snapshot);
testHarness.open();
// after restore: counter = 2
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 4L))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 5L))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("a", 100L))));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow(KV.of("a", 4L)), WindowedValue.valueInGlobalWindow(KV.of("a", 5L))));
testHarness.close();
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class DoFnOperatorTest method testFailOnRequiresStableInputAndDisabledCheckpointing.
@Test(expected = IllegalStateException.class)
public void testFailOnRequiresStableInputAndDisabledCheckpointing() {
TupleTag<KV<String, String>> outputTag = new TupleTag<>("main-output");
StringUtf8Coder keyCoder = StringUtf8Coder.of();
KvToByteBufferKeySelector<String, String> keySelector = new KvToByteBufferKeySelector<>(keyCoder, null);
KvCoder<String, String> kvCoder = KvCoder.of(keyCoder, StringUtf8Coder.of());
WindowedValue.ValueOnlyWindowedValueCoder<KV<String, String>> windowedValueCoder = WindowedValue.getValueOnlyCoder(kvCoder);
DoFn<KV<String, String>, KV<String, String>> doFn = new DoFn<KV<String, String>, KV<String, String>>() {
@ProcessElement
// Use RequiresStableInput to force buffering elements
@RequiresStableInput
public void processElement(ProcessContext context) {
context.output(context.element());
}
};
FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
DoFnOperator.MultiOutputOutputManagerFactory<KV<String, String>> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, WindowedValue.getFullCoder(kvCoder, GlobalWindow.Coder.INSTANCE), new SerializablePipelineOptions(options));
// should make the DoFnOperator creation fail
options.setCheckpointingInterval(-1L);
new DoFnOperator<>(doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class DoFnOperatorTest method testTimersRestore.
@Test
public void testTimersRestore() throws Exception {
final Instant timerTimestamp = new Instant(1000);
final String outputMessage = "Timer fired";
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10_000)));
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
private static final String EVENT_TIMER_ID = "eventTimer";
@TimerId(EVENT_TIMER_ID)
private final TimerSpec eventTimer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@ProcessElement
public void processElement(ProcessContext context, @TimerId(EVENT_TIMER_ID) Timer timer) {
timer.set(timerTimestamp);
}
@OnTimer(EVENT_TIMER_ID)
public void onEventTime(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.timestamp());
context.outputWithTimestamp(outputMessage, context.timestamp());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
WindowedValue.FullWindowedValueCoder<Integer> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
WindowedValue.FullWindowedValueCoder<String> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
TupleTag<String> outputTag = new TupleTag<>("main-output");
final CoderTypeSerializer<WindowedValue<String>> outputSerializer = new CoderTypeSerializer<>(outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
CoderTypeInformation<ByteBuffer> keyCoderInfo = new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = createTestHarness(windowingStrategy, fn, inputCoder, outputCoder, keyCoder, outputTag, keyCoderInfo, keySelector);
testHarness.setup(outputSerializer);
testHarness.open();
testHarness.processWatermark(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10_000));
// this should register a timer
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
// snapshot and restore
final OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
testHarness.close();
testHarness = createTestHarness(windowingStrategy, fn, inputCoder, outputCoder, VarIntCoder.of(), outputTag, keyCoderInfo, keySelector);
testHarness.setup(outputSerializer);
testHarness.initializeState(snapshot);
testHarness.open();
// this must fire the timer
testHarness.processWatermark(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(outputMessage, timerTimestamp, window1, PaneInfo.NO_FIRING)));
testHarness.close();
}
use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.
the class GroupByNullKeyTest method testProgram.
// suppress since toString() of Void is called and key is deliberately null
@SuppressWarnings("ObjectToString")
@Test
public void testProgram() throws Exception {
Pipeline p = FlinkTestPipeline.createForStreaming();
p.getOptions().as(FlinkPipelineOptions.class).setParallelism(1);
PCollection<String> output = p.apply(Create.of(Arrays.asList(KV.of(0, "user1"), KV.of(1, "user1"), KV.of(2, "user1"), KV.of(10, "user2"), KV.of(1, "user2"), KV.of(15000, "user2"), KV.of(12000, "user2"), KV.of(25000, "user3")))).apply(ParDo.of(new ExtractUserAndTimestamp())).apply(Window.<String>into(FixedWindows.of(Duration.standardHours(1))).triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO).discardingFiredPanes()).apply(ParDo.of(new DoFn<String, KV<Void, String>>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
String elem = c.element();
c.output(KV.of(null, elem));
}
})).apply(GroupByKey.create()).apply(ParDo.of(new DoFn<KV<Void, Iterable<String>>, String>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
KV<Void, Iterable<String>> elem = c.element();
StringBuilder str = new StringBuilder();
str.append("k: " + elem.getKey() + " v:");
for (String v : elem.getValue()) {
str.append(" " + v);
}
c.output(str.toString());
}
}));
output.apply(TextIO.write().to(resultPath));
p.run();
}
Aggregations