use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class DoFnOperatorTest method testBundle.
@Test
public void testBundle() throws Exception {
WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder = WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
TupleTag<String> outputTag = new TupleTag<>("main-output");
FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
options.setMaxBundleSize(2L);
options.setMaxBundleTimeMills(10L);
IdentityDoFn<String> doFn = new IdentityDoFn<String>() {
@FinishBundle
public void finishBundle(FinishBundleContext context) {
context.output("finishBundle", BoundedWindow.TIMESTAMP_MIN_VALUE, GlobalWindow.INSTANCE);
}
};
DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE), new SerializablePipelineOptions(options));
DoFnOperator<String, String> doFnOperator = new DoFnOperator<>(doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, null, null, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness = new OneInputStreamOperatorTestHarness<>(doFnOperator);
testHarness.open();
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("a")));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("b")));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("c")));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("a"), WindowedValue.valueInGlobalWindow("b"), WindowedValue.valueInGlobalWindow("finishBundle"), WindowedValue.valueInGlobalWindow("c")));
// draw a snapshot
OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
// Finish bundle element will be buffered as part of finishing a bundle in snapshot()
PushedBackElementsHandler<KV<Integer, WindowedValue<?>>> pushedBackElementsHandler = doFnOperator.outputManager.pushedBackElementsHandler;
assertThat(pushedBackElementsHandler, instanceOf(NonKeyedPushedBackElementsHandler.class));
List<KV<Integer, WindowedValue<?>>> bufferedElements = pushedBackElementsHandler.getElements().collect(Collectors.toList());
assertThat(bufferedElements, contains(KV.of(0, WindowedValue.valueInGlobalWindow("finishBundle"))));
testHarness.close();
DoFnOperator<String, String> newDoFnOperator = new DoFnOperator<>(doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, null, null, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> newHarness = new OneInputStreamOperatorTestHarness<>(newDoFnOperator);
// restore snapshot
newHarness.initializeState(snapshot);
newHarness.open();
// startBundle will output the buffered elements.
newHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("d")));
// check finishBundle by timeout
newHarness.setProcessingTime(10);
assertThat(stripStreamRecordFromWindowedValue(newHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("finishBundle"), WindowedValue.valueInGlobalWindow("d"), WindowedValue.valueInGlobalWindow("finishBundle")));
// No bundle will be created when sending the MAX watermark
// (unless pushed back items are emitted)
newHarness.close();
assertThat(stripStreamRecordFromWindowedValue(newHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("finishBundle"), WindowedValue.valueInGlobalWindow("d"), WindowedValue.valueInGlobalWindow("finishBundle")));
// close() will also call dispose(), but call again to verify no new bundle
// is created afterwards
newDoFnOperator.cleanUp();
assertThat(stripStreamRecordFromWindowedValue(newHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("finishBundle"), WindowedValue.valueInGlobalWindow("d"), WindowedValue.valueInGlobalWindow("finishBundle")));
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class DoFnOperatorTest method testCheckpointBufferingWithMultipleBundles.
@Test
public void testCheckpointBufferingWithMultipleBundles() throws Exception {
FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
options.setMaxBundleSize(10L);
options.setCheckpointingInterval(1L);
TupleTag<String> outputTag = new TupleTag<>("main-output");
StringUtf8Coder coder = StringUtf8Coder.of();
WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder = WindowedValue.getValueOnlyCoder(coder);
DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE), new SerializablePipelineOptions(options));
Supplier<DoFnOperator<String, String>> doFnOperatorSupplier = () -> new DoFnOperator<>(new IdentityDoFn<>(), "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, null, null, DoFnSchemaInformation.create(), Collections.emptyMap());
DoFnOperator<String, String> doFnOperator = doFnOperatorSupplier.get();
OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness = new OneInputStreamOperatorTestHarness<>(doFnOperator);
testHarness.open();
// start a bundle
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("regular element")));
// This callback will be executed in the snapshotState function in the course of
// finishing the currently active bundle. Everything emitted in the callback should
// be buffered and not sent downstream.
doFnOperator.setBundleFinishedCallback(() -> {
try {
// Clear this early for the test here because we want to finish the bundle from within
// the callback which would otherwise cause an infinitive recursion
doFnOperator.setBundleFinishedCallback(null);
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("trigger another bundle")));
doFnOperator.invokeFinishBundle();
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("check that the previous element is not flushed")));
} catch (Exception e) {
throw new RuntimeException(e);
}
});
OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
// Check that we have only the element which was emitted before the snapshot
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("regular element")));
// Check that we would flush the buffered elements when continuing to run
testHarness.processWatermark(Long.MAX_VALUE);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("regular element"), WindowedValue.valueInGlobalWindow("trigger another bundle"), WindowedValue.valueInGlobalWindow("check that the previous element is not flushed")));
testHarness.close();
// Check that we would flush the buffered elements when restoring from a checkpoint
OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness2 = new OneInputStreamOperatorTestHarness<>(doFnOperatorSupplier.get());
testHarness2.initializeState(snapshot);
testHarness2.open();
testHarness2.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("after restore")));
assertThat(stripStreamRecordFromWindowedValue(testHarness2.getOutput()), contains(WindowedValue.valueInGlobalWindow("trigger another bundle"), WindowedValue.valueInGlobalWindow("check that the previous element is not flushed"), WindowedValue.valueInGlobalWindow("after restore")));
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class FlinkStateInternalsTest method testGlobalWindowWatermarkHoldClear.
@Test
public void testGlobalWindowWatermarkHoldClear() throws Exception {
KeyedStateBackend<ByteBuffer> keyedStateBackend = createStateBackend();
FlinkStateInternals<String> stateInternals = new FlinkStateInternals<>(keyedStateBackend, StringUtf8Coder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
StateTag<WatermarkHoldState> stateTag = StateTags.watermarkStateInternal("hold", TimestampCombiner.EARLIEST);
Instant now = Instant.now();
WatermarkHoldState state = stateInternals.state(StateNamespaces.global(), stateTag);
state.add(now);
stateInternals.clearGlobalState();
assertThat(state.read(), is((Instant) null));
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class FlinkDoFnFunction method open.
@Override
public void open(Configuration parameters) {
// Note that the SerializablePipelineOptions already initialize FileSystems in the readObject()
// deserialization method. However, this is a hack, and we want to properly initialize the
// options where they are needed.
PipelineOptions options = serializedOptions.get();
FileSystems.setDefaultPipelineOptions(options);
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, options);
metricContainer = new FlinkMetricContainer(getRuntimeContext());
// setup DoFnRunner
final RuntimeContext runtimeContext = getRuntimeContext();
final DoFnRunners.OutputManager outputManager;
if (outputMap.size() == 1) {
outputManager = new DoFnOutputManager();
} else {
// it has some additional outputs
outputManager = new MultiDoFnOutputManager(outputMap);
}
final List<TupleTag<?>> additionalOutputTags = Lists.newArrayList(outputMap.keySet());
DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(options, doFn, new FlinkSideInputReader(sideInputs, runtimeContext), outputManager, mainOutputTag, additionalOutputTags, new FlinkNoOpStepContext(), inputCoder, outputCoderMap, windowingStrategy, doFnSchemaInformation, sideInputMapping);
if (!serializedOptions.get().as(FlinkPipelineOptions.class).getDisableMetrics()) {
doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, metricContainer);
}
this.collectorAware = (CollectorAware) outputManager;
this.doFnRunner = doFnRunner;
}
use of org.apache.beam.runners.core.construction.SerializablePipelineOptions in project beam by apache.
the class FlinkStreamingPortablePipelineTranslator method addGBK.
private <K, V> SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> addGBK(DataStream<WindowedValue<KV<K, V>>> inputDataStream, WindowingStrategy<?, ?> windowingStrategy, WindowedValueCoder<KV<K, V>> windowedInputCoder, String operatorName, StreamingTranslationContext context) {
KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) windowedInputCoder.getValueCoder();
SingletonKeyedWorkItemCoder<K, V> workItemCoder = SingletonKeyedWorkItemCoder.of(inputElementCoder.getKeyCoder(), inputElementCoder.getValueCoder(), windowingStrategy.getWindowFn().windowCoder());
WindowedValue.FullWindowedValueCoder<KeyedWorkItem<K, V>> windowedWorkItemCoder = WindowedValue.getFullCoder(workItemCoder, windowingStrategy.getWindowFn().windowCoder());
CoderTypeInformation<WindowedValue<KeyedWorkItem<K, V>>> workItemTypeInfo = new CoderTypeInformation<>(windowedWorkItemCoder, context.getPipelineOptions());
DataStream<WindowedValue<KeyedWorkItem<K, V>>> workItemStream = inputDataStream.flatMap(new FlinkStreamingTransformTranslators.ToKeyedWorkItem<>(context.getPipelineOptions())).returns(workItemTypeInfo).name("ToKeyedWorkItem");
WorkItemKeySelector<K, V> keySelector = new WorkItemKeySelector<>(inputElementCoder.getKeyCoder(), new SerializablePipelineOptions(context.getPipelineOptions()));
KeyedStream<WindowedValue<KeyedWorkItem<K, V>>, ByteBuffer> keyedWorkItemStream = workItemStream.keyBy(keySelector);
SystemReduceFn<K, V, Iterable<V>, Iterable<V>, BoundedWindow> reduceFn = SystemReduceFn.buffering(inputElementCoder.getValueCoder());
Coder<Iterable<V>> accumulatorCoder = IterableCoder.of(inputElementCoder.getValueCoder());
Coder<WindowedValue<KV<K, Iterable<V>>>> outputCoder = WindowedValue.getFullCoder(KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder), windowingStrategy.getWindowFn().windowCoder());
TypeInformation<WindowedValue<KV<K, Iterable<V>>>> outputTypeInfo = new CoderTypeInformation<>(outputCoder, context.getPipelineOptions());
TupleTag<KV<K, Iterable<V>>> mainTag = new TupleTag<>("main output");
WindowDoFnOperator<K, V, Iterable<V>> doFnOperator = new WindowDoFnOperator<>(reduceFn, operatorName, windowedWorkItemCoder, mainTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, outputCoder, new SerializablePipelineOptions(context.getPipelineOptions())), windowingStrategy, new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
context.getPipelineOptions(), inputElementCoder.getKeyCoder(), keySelector);
return keyedWorkItemStream.transform(operatorName, outputTypeInfo, doFnOperator);
}
Aggregations