use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class DoFnOperatorTest method testFailOnRequiresStableInputAndDisabledCheckpointing.
@Test(expected = IllegalStateException.class)
public void testFailOnRequiresStableInputAndDisabledCheckpointing() {
TupleTag<KV<String, String>> outputTag = new TupleTag<>("main-output");
StringUtf8Coder keyCoder = StringUtf8Coder.of();
KvToByteBufferKeySelector<String, String> keySelector = new KvToByteBufferKeySelector<>(keyCoder, null);
KvCoder<String, String> kvCoder = KvCoder.of(keyCoder, StringUtf8Coder.of());
WindowedValue.ValueOnlyWindowedValueCoder<KV<String, String>> windowedValueCoder = WindowedValue.getValueOnlyCoder(kvCoder);
DoFn<KV<String, String>, KV<String, String>> doFn = new DoFn<KV<String, String>, KV<String, String>>() {
@ProcessElement
// Use RequiresStableInput to force buffering elements
@RequiresStableInput
public void processElement(ProcessContext context) {
context.output(context.element());
}
};
FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
DoFnOperator.MultiOutputOutputManagerFactory<KV<String, String>> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, WindowedValue.getFullCoder(kvCoder, GlobalWindow.Coder.INSTANCE), new SerializablePipelineOptions(options));
// should make the DoFnOperator creation fail
options.setCheckpointingInterval(-1L);
new DoFnOperator<>(doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
}
use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class DoFnOperatorTest method keyedParDoSideInputCheckpointing.
@Test
public void keyedParDoSideInputCheckpointing() throws Exception {
sideInputCheckpointing(() -> {
StringUtf8Coder keyCoder = StringUtf8Coder.of();
Coder<WindowedValue<String>> coder = WindowedValue.getFullCoder(keyCoder, IntervalWindow.getCoder());
TupleTag<String> outputTag = new TupleTag<>("main-output");
KeySelector<WindowedValue<String>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder().put(1, view1).put(2, view2).build();
DoFnOperator<String, String> doFnOperator = new DoFnOperator<>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.of(FixedWindows.of(Duration.millis(100))), sideInputMapping, /* side-input mapping */
ImmutableList.of(view1, view2), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
return new KeyedTwoInputStreamOperatorTestHarness<>(doFnOperator, keySelector, // we use a dummy key for the second input since it is considered to be broadcast
null, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
});
}
use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class DoFnOperatorTest method testBundleKeyed.
@Test
public void testBundleKeyed() throws Exception {
StringUtf8Coder keyCoder = StringUtf8Coder.of();
KvToByteBufferKeySelector<String, String> keySelector = new KvToByteBufferKeySelector<>(keyCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
KvCoder<String, String> kvCoder = KvCoder.of(keyCoder, StringUtf8Coder.of());
WindowedValue.ValueOnlyWindowedValueCoder<KV<String, String>> windowedValueCoder = WindowedValue.getValueOnlyCoder(kvCoder);
TupleTag<String> outputTag = new TupleTag<>("main-output");
FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
options.setMaxBundleSize(2L);
options.setMaxBundleTimeMills(10L);
DoFn<KV<String, String>, String> doFn = new DoFn<KV<String, String>, String>() {
@ProcessElement
public void processElement(ProcessContext ctx) {
// Change output type of element to test that we do not depend on the input keying
ctx.output(ctx.element().getValue());
}
@FinishBundle
public void finishBundle(FinishBundleContext context) {
context.output("finishBundle", BoundedWindow.TIMESTAMP_MIN_VALUE, GlobalWindow.INSTANCE);
}
};
DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, WindowedValue.getFullCoder(kvCoder.getValueCoder(), GlobalWindow.Coder.INSTANCE), new SerializablePipelineOptions(options));
DoFnOperator<KV<String, String>, String> doFnOperator = new DoFnOperator<>(doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<KV<String, String>>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, keySelector.getProducedType());
testHarness.open();
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key", "a"))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key", "b"))));
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key", "c"))));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.valueInGlobalWindow("a"), WindowedValue.valueInGlobalWindow("b"), WindowedValue.valueInGlobalWindow("finishBundle"), WindowedValue.valueInGlobalWindow("c")));
// Take a snapshot
OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
// Finish bundle element will be buffered as part of finishing a bundle in snapshot()
PushedBackElementsHandler<KV<Integer, WindowedValue<?>>> pushedBackElementsHandler = doFnOperator.outputManager.pushedBackElementsHandler;
assertThat(pushedBackElementsHandler, instanceOf(NonKeyedPushedBackElementsHandler.class));
List<KV<Integer, WindowedValue<?>>> bufferedElements = pushedBackElementsHandler.getElements().collect(Collectors.toList());
assertThat(bufferedElements, contains(KV.of(0, WindowedValue.valueInGlobalWindow("finishBundle"))));
testHarness.close();
doFnOperator = new DoFnOperator<>(doFn, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, keySelector.getProducedType());
// Restore snapshot
testHarness.initializeState(snapshot);
testHarness.open();
// startBundle will output the buffered elements.
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow(KV.of("key", "d"))));
// check finishBundle by timeout
testHarness.setProcessingTime(10);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(// The first finishBundle is restored from the checkpoint
WindowedValue.valueInGlobalWindow("finishBundle"), WindowedValue.valueInGlobalWindow("d"), WindowedValue.valueInGlobalWindow("finishBundle")));
testHarness.close();
}
use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class DoFnOperatorTest method testBundleProcessingExceptionIsFatalDuringCheckpointing.
@Test
public void testBundleProcessingExceptionIsFatalDuringCheckpointing() throws Exception {
FlinkPipelineOptions options = FlinkPipelineOptions.defaults();
options.setMaxBundleSize(10L);
options.setCheckpointingInterval(1L);
TupleTag<String> outputTag = new TupleTag<>("main-output");
StringUtf8Coder coder = StringUtf8Coder.of();
WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder = WindowedValue.getValueOnlyCoder(coder);
DoFnOperator.MultiOutputOutputManagerFactory<String> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE), new SerializablePipelineOptions(options));
DoFnOperator<String, String> doFnOperator = new DoFnOperator<>(new IdentityDoFn<String>() {
@FinishBundle
public void finishBundle() {
throw new RuntimeException("something went wrong here");
}
}, "stepName", windowedValueCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
options, null, null, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<String>, WindowedValue<String>> testHarness = new OneInputStreamOperatorTestHarness<>(doFnOperator);
testHarness.open();
// start a bundle
testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("regular element")));
// Make sure we throw Error, not a regular Exception.
// A regular exception would just cause the checkpoint to fail.
assertThrows(Error.class, () -> testHarness.snapshot(0, 0));
}
use of org.apache.beam.sdk.coders.StringUtf8Coder in project beam by apache.
the class DoFnOperatorTest method keyedParDoPushbackDataCheckpointing.
@Test
public void keyedParDoPushbackDataCheckpointing() throws Exception {
pushbackDataCheckpointing(() -> {
StringUtf8Coder keyCoder = StringUtf8Coder.of();
Coder<WindowedValue<String>> coder = WindowedValue.getFullCoder(keyCoder, IntervalWindow.getCoder());
TupleTag<String> outputTag = new TupleTag<>("main-output");
KeySelector<WindowedValue<String>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder().put(1, view1).put(2, view2).build();
DoFnOperator<String, String> doFnOperator = new DoFnOperator<>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.of(FixedWindows.of(Duration.millis(100))), sideInputMapping, /* side-input mapping */
ImmutableList.of(view1, view2), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
return new KeyedTwoInputStreamOperatorTestHarness<>(doFnOperator, keySelector, // we use a dummy key for the second input since it is considered to be broadcast
null, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
});
}
Aggregations