use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class ProcessBundleBenchmark method testLargeBundle.
@Benchmark
// Use several threads since we expect contention during bundle processing.
@Threads(16)
public void testLargeBundle(TrivialTransform trivialTransform) throws Exception {
Map<String, ? super Coder<WindowedValue<?>>> remoteOutputCoders = trivialTransform.descriptor.getRemoteOutputCoders();
Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
AtomicInteger outputValuesCount = new AtomicInteger();
for (Entry<String, ? super Coder<WindowedValue<?>>> remoteOutputCoder : remoteOutputCoders.entrySet()) {
outputReceivers.put(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder) remoteOutputCoder.getValue(), (FnDataReceiver<? super WindowedValue<?>>) (WindowedValue<?> value) -> outputValuesCount.incrementAndGet()));
}
try (RemoteBundle bundle = trivialTransform.processor.newBundle(outputReceivers, BundleProgressHandler.ignored())) {
for (int i = 0; i < 1_000; i++) {
Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(new byte[0]));
}
}
assertEquals(3_000, outputValuesCount.getAndSet(0));
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class BeamFnDataOutboundAggregatorTest method testConfiguredTimeLimitExceptionPropagation.
@Test
public void testConfiguredTimeLimitExceptionPropagation() throws Exception {
PipelineOptions options = PipelineOptionsFactory.create();
options.as(ExperimentalOptions.class).setExperiments(Arrays.asList("data_buffer_time_limit_ms=1"));
BeamFnDataOutboundAggregator aggregator = new BeamFnDataOutboundAggregator(options, endpoint::getInstructionId, TestStreams.withOnNext((Consumer<Elements>) e -> {
throw new RuntimeException("");
}).build(), false);
// Test that it emits when time passed the time limit
FnDataReceiver<byte[]> dataReceiver = registerOutputLocation(aggregator, endpoint, CODER);
aggregator.start();
dataReceiver.accept(new byte[1]);
// wait the flush thread to flush the buffer
while (!aggregator.flushFuture.isDone()) {
Thread.sleep(1);
}
try {
// Test that the exception caught in the flush thread is propagated to
// the main thread when processing the next element
dataReceiver.accept(new byte[1]);
fail();
} catch (Exception e) {
// expected
}
aggregator = new BeamFnDataOutboundAggregator(options, endpoint::getInstructionId, TestStreams.withOnNext((Consumer<Elements>) e -> {
throw new RuntimeException("");
}).build(), false);
dataReceiver = registerOutputLocation(aggregator, endpoint, CODER);
aggregator.start();
dataReceiver.accept(new byte[1]);
// wait the flush thread to flush the buffer
while (!aggregator.flushFuture.isDone()) {
Thread.sleep(1);
}
try {
// Test that the exception caught in the flush thread is propagated to
// the main thread when closing
aggregator.sendOrCollectBufferedDataAndFinishOutboundStreams();
fail();
} catch (Exception e) {
// expected
}
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class DoFnOperatorTest method testLateDroppingForStatefulFn.
@Test
public void testLateDroppingForStatefulFn() throws Exception {
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
@StateId("state")
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context) {
context.output(context.element().toString());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
Coder<WindowedValue<Integer>> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
Coder<WindowedValue<String>> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
TupleTag<String> outputTag = new TupleTag<>("main-output");
DoFnOperator<Integer, String> doFnOperator = new DoFnOperator<>(fn, "stepName", inputCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), windowingStrategy, new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, /* key coder */
keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.open();
testHarness.processWatermark(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
// this should not be late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(9);
// this should still not be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(10);
// this should now be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
testHarness.close();
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class DoFnOperatorTest method testWatermarkContract.
/**
* This test specifically verifies that we correctly map Flink watermarks to Beam watermarks. In
* Beam, a watermark {@code T} guarantees there will not be elements with a timestamp {@code < T}
* in the future. In Flink, a watermark {@code T} guarantees there will not be elements with a
* timestamp {@code <= T} in the future. We have to make sure to take this into account when
* firing timers.
*
* <p>This does not test the timer API in general or processing-time timers because there are
* generic tests for this in {@code ParDoTest}.
*/
@Test
public void testWatermarkContract() throws Exception {
final Instant timerTimestamp = new Instant(1000);
final Instant timerOutputTimestamp = timerTimestamp.minus(Duration.millis(1));
final String eventTimeMessage = "Event timer fired: ";
final String processingTimeMessage = "Processing timer fired";
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10_000)));
final String eventTimerId = "eventTimer";
final String eventTimerId2 = "eventTimer2";
final String processingTimerId = "processingTimer";
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
@TimerId(eventTimerId)
private final TimerSpec eventTimer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@TimerId(eventTimerId2)
private final TimerSpec eventTimer2 = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@TimerId(processingTimerId)
private final TimerSpec processingTimer = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);
@ProcessElement
public void processElement(ProcessContext context, @TimerId(eventTimerId) Timer eventTimer, @TimerId(eventTimerId2) Timer eventTimerWithOutputTimestamp, @TimerId(processingTimerId) Timer processingTimer) {
eventTimer.set(timerTimestamp);
eventTimerWithOutputTimestamp.withOutputTimestamp(timerOutputTimestamp).set(timerTimestamp);
processingTimer.offset(Duration.millis(timerTimestamp.getMillis())).setRelative();
}
@OnTimer(eventTimerId)
public void onEventTime(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.timestamp());
context.outputWithTimestamp(eventTimeMessage + eventTimerId, context.timestamp());
}
@OnTimer(eventTimerId2)
public void onEventTime2(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.fireTimestamp());
context.output(eventTimeMessage + eventTimerId2);
}
@OnTimer(processingTimerId)
public void onProcessingTime(OnTimerContext context) {
assertEquals(// See SimpleDoFnRunner#onTimer
"Timer timestamp must match current input watermark", timerTimestamp.plus(Duration.millis(1)), context.timestamp());
context.outputWithTimestamp(processingTimeMessage, context.timestamp());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
WindowedValue.FullWindowedValueCoder<Integer> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
WindowedValue.FullWindowedValueCoder<String> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
TupleTag<String> outputTag = new TupleTag<>("main-output");
DoFnOperator<Integer, String> doFnOperator = new DoFnOperator<>(fn, "stepName", inputCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), windowingStrategy, new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, /* key coder */
keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.setup(new CoderTypeSerializer<>(outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())));
testHarness.open();
testHarness.processWatermark(0);
testHarness.setProcessingTime(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10_000));
// this should register the two timers above
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
// this does not yet fire the timers (in vanilla Flink it would)
testHarness.processWatermark(timerTimestamp.getMillis());
testHarness.setProcessingTime(timerTimestamp.getMillis());
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
assertThat(doFnOperator.keyedStateInternals.minWatermarkHoldMs(), is(timerOutputTimestamp.getMillis()));
// this must fire the event timers
testHarness.processWatermark(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), containsInAnyOrder(WindowedValue.of(eventTimeMessage + eventTimerId, timerTimestamp, window1, PaneInfo.NO_FIRING), WindowedValue.of(eventTimeMessage + eventTimerId2, timerTimestamp.minus(Duration.millis(1)), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
// this must fire the processing timer
testHarness.setProcessingTime(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(// See SimpleDoFnRunner#onTimer
processingTimeMessage, timerTimestamp.plus(Duration.millis(1)), window1, PaneInfo.NO_FIRING)));
testHarness.close();
}
use of org.apache.beam.sdk.coders.Coder in project beam by apache.
the class DoFnOperatorTest method keyedParDoPushbackDataCheckpointing.
@Test
public void keyedParDoPushbackDataCheckpointing() throws Exception {
pushbackDataCheckpointing(() -> {
StringUtf8Coder keyCoder = StringUtf8Coder.of();
Coder<WindowedValue<String>> coder = WindowedValue.getFullCoder(keyCoder, IntervalWindow.getCoder());
TupleTag<String> outputTag = new TupleTag<>("main-output");
KeySelector<WindowedValue<String>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
ImmutableMap<Integer, PCollectionView<?>> sideInputMapping = ImmutableMap.<Integer, PCollectionView<?>>builder().put(1, view1).put(2, view2).build();
DoFnOperator<String, String> doFnOperator = new DoFnOperator<>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.of(FixedWindows.of(Duration.millis(100))), sideInputMapping, /* side-input mapping */
ImmutableList.of(view1, view2), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
return new KeyedTwoInputStreamOperatorTestHarness<>(doFnOperator, keySelector, // we use a dummy key for the second input since it is considered to be broadcast
null, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
});
}
Aggregations