use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class FlinkStateInternalsTest method testWatermarkHoldsPersistence.
@Test
public void testWatermarkHoldsPersistence() throws Exception {
KeyedStateBackend<ByteBuffer> keyedStateBackend = createStateBackend();
FlinkStateInternals stateInternals = new FlinkStateInternals<>(keyedStateBackend, StringUtf8Coder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
StateTag<WatermarkHoldState> stateTag = StateTags.watermarkStateInternal("hold", TimestampCombiner.EARLIEST);
WatermarkHoldState globalWindow = stateInternals.state(StateNamespaces.global(), stateTag);
WatermarkHoldState fixedWindow = stateInternals.state(StateNamespaces.window(IntervalWindow.getCoder(), new IntervalWindow(new Instant(0), new Instant(10))), stateTag);
Instant noHold = new Instant(Long.MAX_VALUE);
assertThat(stateInternals.minWatermarkHoldMs(), is(noHold.getMillis()));
Instant high = new Instant(10);
globalWindow.add(high);
assertThat(stateInternals.minWatermarkHoldMs(), is(high.getMillis()));
Instant middle = new Instant(5);
fixedWindow.add(middle);
assertThat(stateInternals.minWatermarkHoldMs(), is(middle.getMillis()));
Instant low = new Instant(1);
globalWindow.add(low);
assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
// Try to overwrite with later hold (should not succeed)
globalWindow.add(high);
assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
fixedWindow.add(high);
assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
// Watermark hold should be computed across all keys
ByteBuffer firstKey = keyedStateBackend.getCurrentKey();
changeKey(keyedStateBackend);
ByteBuffer secondKey = keyedStateBackend.getCurrentKey();
assertThat(firstKey, is(Matchers.not(secondKey)));
assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
// ..but be tracked per key / window
assertThat(globalWindow.read(), is(Matchers.nullValue()));
assertThat(fixedWindow.read(), is(Matchers.nullValue()));
globalWindow.add(middle);
fixedWindow.add(high);
assertThat(globalWindow.read(), is(middle));
assertThat(fixedWindow.read(), is(high));
// Old key should give previous results
keyedStateBackend.setCurrentKey(firstKey);
assertThat(globalWindow.read(), is(low));
assertThat(fixedWindow.read(), is(middle));
// Discard watermark view and recover it
stateInternals = new FlinkStateInternals<>(keyedStateBackend, StringUtf8Coder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
globalWindow = stateInternals.state(StateNamespaces.global(), stateTag);
fixedWindow = stateInternals.state(StateNamespaces.window(IntervalWindow.getCoder(), new IntervalWindow(new Instant(0), new Instant(10))), stateTag);
// Watermark hold across all keys should be unchanged
assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
// Check the holds for the second key and clear them
keyedStateBackend.setCurrentKey(secondKey);
assertThat(globalWindow.read(), is(middle));
assertThat(fixedWindow.read(), is(high));
globalWindow.clear();
fixedWindow.clear();
// Check the holds for the first key and clear them
keyedStateBackend.setCurrentKey(firstKey);
assertThat(globalWindow.read(), is(low));
assertThat(fixedWindow.read(), is(middle));
fixedWindow.clear();
assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
globalWindow.clear();
assertThat(stateInternals.minWatermarkHoldMs(), is(noHold.getMillis()));
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class AbstractParDoP method isCooperativenessAllowed.
private static Boolean isCooperativenessAllowed(SerializablePipelineOptions serializablePipelineOptions) {
PipelineOptions pipelineOptions = serializablePipelineOptions.get();
JetPipelineOptions jetPipelineOptions = pipelineOptions.as(JetPipelineOptions.class);
return jetPipelineOptions.getJetProcessorsCooperative();
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class DoFnOperatorTest method testExactlyOnceBuffering.
@Test
public void testExactlyOnceBuffering() throws Exception {
FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
options.setMaxBundleSize(2L);
options.setCheckpointingInterval(1L);
TupleTag<String> outputTag = new TupleTag<>("main-output");
WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
numStartBundleCalled = 0;
DoFn<String, String> doFn = new DoFn<String, String>() {
@StartBundle
public void startBundle(StartBundleContext context) {
numStartBundleCalled += 1;
}
@ProcessElement
// Use RequiresStableInput to force buffering elements
@RequiresStableInput
public void processElement(ProcessContext context) {
context.output(context.element());
}
@FinishBundle
public void finishBundle(FinishBundleContext context) {
context.output("finishBundle", BoundedWindow.TIMESTAMP_MIN_VALUE, GlobalWindow.INSTANCE);
}
};
DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE), new SerializablePipelineOptions(options));
Supplier<DoFnOperator<String, String>> doFnOperatorSupplier = () -> new DoFnOperator<>(doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, null, null, DoFnSchemaInformation.create(), Collections.emptyMap());
DoFnOperator<String, String> doFnOperator = doFnOperatorSupplier.get();
OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness = new OneInputStreamOperatorTestHarness<>(doFnOperator);
testHarness.open();
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("a")));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("b")));
assertThat(Iterables.size(testHarness.getOutput()), is(0));
assertThat(numStartBundleCalled, is(0));
// create a backup and then
OperatorSubtaskState backup = testHarness.snapshot(0, 0);
doFnOperator.notifyCheckpointComplete(0L);
assertThat(numStartBundleCalled, is(1));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("a"), WindowedValue.valueInGlobalWindow("b"), WindowedValue.valueInGlobalWindow("finishBundle")));
doFnOperator = doFnOperatorSupplier.get();
testHarness = new OneInputStreamOperatorTestHarness<>(doFnOperator);
// restore from the snapshot
testHarness.initializeState(backup);
testHarness.open();
doFnOperator.notifyCheckpointComplete(0L);
assertThat(numStartBundleCalled, is(2));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("a"), WindowedValue.valueInGlobalWindow("b"), WindowedValue.valueInGlobalWindow("finishBundle")));
// repeat to see if elements are evicted
doFnOperator.notifyCheckpointComplete(1L);
assertThat(numStartBundleCalled, is(2));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("a"), WindowedValue.valueInGlobalWindow("b"), WindowedValue.valueInGlobalWindow("finishBundle")));
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class DoFnOperatorTest method testLateDroppingForStatefulFn.
@Test
public void testLateDroppingForStatefulFn() throws Exception {
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
@StateId("state")
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context) {
context.output(context.element().toString());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
Coder<WindowedValue<Integer>> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
Coder<WindowedValue<String>> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
TupleTag<String> outputTag = new TupleTag<>("main-output");
DoFnOperator<Integer, String> doFnOperator = new DoFnOperator<>(fn, "stepName", inputCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), windowingStrategy, new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, /* key coder */
keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.open();
testHarness.processWatermark(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
// this should not be late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(9);
// this should still not be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(10);
// this should now be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
testHarness.close();
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class DoFnOperatorTest method testBundleKeyed.
@Test
public void testBundleKeyed() throws Exception {
StringUtf8Coder keyCoder = StringUtf8Coder.of();
KvToByteBufferKeySelector<String, String> keySelector = new KvToByteBufferKeySelector<>(keyCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
KvCoder<String, String> kvCoder = KvCoder.of(keyCoder, StringUtf8Coder.of());
WindowedValue.ValueOnlyWindowedValueCoder<KV<String, String>> windowedValueCoder = WindowedValue.getValueOnlyCoder(kvCoder);
TupleTag<String> outputTag = new TupleTag<>("main-output");
FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
options.setMaxBundleSize(2L);
options.setMaxBundleTimeMills(10L);
DoFn<KV<String, String>, String> doFn = new DoFn<KV<String, String>, String>() {
@ProcessElement
public void processElement(ProcessContext ctx) {
// Change output type of element to test that we do not depend on the input keying
ctx.output(ctx.element().getValue());
}
@FinishBundle
public void finishBundle(FinishBundleContext context) {
context.output("finishBundle", BoundedWindow.TIMESTAMP_MIN_VALUE, GlobalWindow.INSTANCE);
}
};
DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, WindowedValue.getFullCoder(kvCoder.getValueCoder(), GlobalWindow.Coder.INSTANCE), new SerializablePipelineOptions(options));
DoFnOperator<KV<String, String>, String> doFnOperator = new DoFnOperator<>(doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<KV<String, String>>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, keySelector.getProducedType());
testHarness.open();
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key", "a"))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key", "b"))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key", "c"))));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("a"), WindowedValue.valueInGlobalWindow("b"), WindowedValue.valueInGlobalWindow("finishBundle"), WindowedValue.valueInGlobalWindow("c")));
// Take a snapshot
OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
// Finish bundle element will be buffered as part of finishing a bundle in snapshot()
PushedBackElementsHandler<KV<Integer, WindowedValue<?>>> pushedBackElementsHandler = doFnOperator.outputManager.pushedBackElementsHandler;
assertThat(pushedBackElementsHandler, instanceOf(NonKeyedPushedBackElementsHandler.class));
List<KV<Integer, WindowedValue<?>>> bufferedElements = pushedBackElementsHandler.getElements().collect(Collectors.toList());
assertThat(bufferedElements, contains(KV.of(0, WindowedValue.valueInGlobalWindow("finishBundle"))));
testHarness.close();
doFnOperator = new DoFnOperator<>(doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, keySelector.getProducedType());
// Restore snapshot
testHarness.initializeState(snapshot);
testHarness.open();
// startBundle will output the buffered elements.
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key", "d"))));
// check finishBundle by timeout
testHarness.setProcessingTime(10);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(// The first finishBundle is restored from the checkpoint
WindowedValue.valueInGlobalWindow("finishBundle"), WindowedValue.valueInGlobalWindow("d"), WindowedValue.valueInGlobalWindow("finishBundle")));
testHarness.close();
}
Aggregations