use of org.apache.beam.sdk.coders.VarIntCoder in project beam by apache.
the class TransformTranslatorTest method testSplitBySameKey.
@Test
public void testSplitBySameKey() {
VarIntCoder coder = VarIntCoder.of();
WindowedValue.WindowedValueCoder<Integer> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder, GlobalWindow.Coder.INSTANCE);
Instant now = Instant.now();
List<GlobalWindow> window = Arrays.asList(GlobalWindow.INSTANCE);
PaneInfo paneInfo = PaneInfo.NO_FIRING;
List<Tuple2<ByteArray, byte[]>> firstKey = Arrays.asList(new Tuple2(new ByteArray(CoderHelpers.toByteArrayWithTs(1, coder, now)), CoderHelpers.toByteArray(WindowedValue.of(1, now, window, paneInfo), wvCoder)), new Tuple2(new ByteArray(CoderHelpers.toByteArrayWithTs(1, coder, now.plus(Duration.millis(1)))), CoderHelpers.toByteArray(WindowedValue.of(2, now.plus(Duration.millis(1)), window, paneInfo), wvCoder)));
List<Tuple2<ByteArray, byte[]>> secondKey = Arrays.asList(new Tuple2(new ByteArray(CoderHelpers.toByteArrayWithTs(2, coder, now)), CoderHelpers.toByteArray(WindowedValue.of(3, now, window, paneInfo), wvCoder)), new Tuple2(new ByteArray(CoderHelpers.toByteArrayWithTs(2, coder, now.plus(Duration.millis(2)))), CoderHelpers.toByteArray(WindowedValue.of(4, now.plus(Duration.millis(2)), window, paneInfo), wvCoder)));
Iterable<Tuple2<ByteArray, byte[]>> concat = Iterables.concat(firstKey, secondKey);
Iterator<Iterator<WindowedValue<KV<Integer, Integer>>>> keySplit;
keySplit = TransformTranslator.splitBySameKey(concat.iterator(), coder, wvCoder);
for (int i = 0; i < 2; i++) {
Iterator<WindowedValue<KV<Integer, Integer>>> iter = keySplit.next();
List<WindowedValue<KV<Integer, Integer>>> list = new ArrayList<>();
Iterators.addAll(list, iter);
if (i == 0) {
// first key
assertEquals(Arrays.asList(WindowedValue.of(KV.of(1, 1), now, window, paneInfo), WindowedValue.of(KV.of(1, 2), now.plus(Duration.millis(1)), window, paneInfo)), list);
} else {
// second key
assertEquals(Arrays.asList(WindowedValue.of(KV.of(2, 3), now, window, paneInfo), WindowedValue.of(KV.of(2, 4), now.plus(Duration.millis(2)), window, paneInfo)), list);
}
}
}
use of org.apache.beam.sdk.coders.VarIntCoder in project beam by apache.
the class DoFnOperatorTest method testTimersRestore.
@Test
public void testTimersRestore() throws Exception {
final Instant timerTimestamp = new Instant(1000);
final String outputMessage = "Timer fired";
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10_000)));
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
private static final String EVENT_TIMER_ID = "eventTimer";
@TimerId(EVENT_TIMER_ID)
private final TimerSpec eventTimer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@ProcessElement
public void processElement(ProcessContext context, @TimerId(EVENT_TIMER_ID) Timer timer) {
timer.set(timerTimestamp);
}
@OnTimer(EVENT_TIMER_ID)
public void onEventTime(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.timestamp());
context.outputWithTimestamp(outputMessage, context.timestamp());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
WindowedValue.FullWindowedValueCoder<Integer> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
WindowedValue.FullWindowedValueCoder<String> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
TupleTag<String> outputTag = new TupleTag<>("main-output");
final CoderTypeSerializer<WindowedValue<String>> outputSerializer = new CoderTypeSerializer<>(outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
CoderTypeInformation<ByteBuffer> keyCoderInfo = new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = createTestHarness(windowingStrategy, fn, inputCoder, outputCoder, keyCoder, outputTag, keyCoderInfo, keySelector);
testHarness.setup(outputSerializer);
testHarness.open();
testHarness.processWatermark(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10_000));
// this should register a timer
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
// snapshot and restore
final OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
testHarness.close();
testHarness = createTestHarness(windowingStrategy, fn, inputCoder, outputCoder, VarIntCoder.of(), outputTag, keyCoderInfo, keySelector);
testHarness.setup(outputSerializer);
testHarness.initializeState(snapshot);
testHarness.open();
// this must fire the timer
testHarness.processWatermark(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(outputMessage, timerTimestamp, window1, PaneInfo.NO_FIRING)));
testHarness.close();
}
use of org.apache.beam.sdk.coders.VarIntCoder in project beam by apache.
the class DoFnOperatorTest method testLateDroppingForStatefulFn.
@Test
public void testLateDroppingForStatefulFn() throws Exception {
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10)));
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
@StateId("state")
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context) {
context.output(context.element().toString());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
Coder<WindowedValue<Integer>> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
Coder<WindowedValue<String>> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
TupleTag<String> outputTag = new TupleTag<>("main-output");
DoFnOperator<Integer, String> doFnOperator = new DoFnOperator<>(fn, "stepName", inputCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), windowingStrategy, new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, /* key coder */
keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.open();
testHarness.processWatermark(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
// this should not be late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(9);
// this should still not be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
testHarness.processWatermark(10);
// this should now be considered late
testHarness.processElement(new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
testHarness.close();
}
use of org.apache.beam.sdk.coders.VarIntCoder in project beam by apache.
the class DoFnOperatorTest method testWatermarkContract.
/**
* This test specifically verifies that we correctly map Flink watermarks to Beam watermarks. In
* Beam, a watermark {@code T} guarantees there will not be elements with a timestamp {@code < T}
* in the future. In Flink, a watermark {@code T} guarantees there will not be elements with a
* timestamp {@code <= T} in the future. We have to make sure to take this into account when
* firing timers.
*
* <p>This does not test the timer API in general or processing-time timers because there are
* generic tests for this in {@code ParDoTest}.
*/
@Test
public void testWatermarkContract() throws Exception {
final Instant timerTimestamp = new Instant(1000);
final Instant timerOutputTimestamp = timerTimestamp.minus(Duration.millis(1));
final String eventTimeMessage = "Event timer fired: ";
final String processingTimeMessage = "Processing timer fired";
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10_000)));
final String eventTimerId = "eventTimer";
final String eventTimerId2 = "eventTimer2";
final String processingTimerId = "processingTimer";
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
@TimerId(eventTimerId)
private final TimerSpec eventTimer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@TimerId(eventTimerId2)
private final TimerSpec eventTimer2 = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@TimerId(processingTimerId)
private final TimerSpec processingTimer = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);
@ProcessElement
public void processElement(ProcessContext context, @TimerId(eventTimerId) Timer eventTimer, @TimerId(eventTimerId2) Timer eventTimerWithOutputTimestamp, @TimerId(processingTimerId) Timer processingTimer) {
eventTimer.set(timerTimestamp);
eventTimerWithOutputTimestamp.withOutputTimestamp(timerOutputTimestamp).set(timerTimestamp);
processingTimer.offset(Duration.millis(timerTimestamp.getMillis())).setRelative();
}
@OnTimer(eventTimerId)
public void onEventTime(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.timestamp());
context.outputWithTimestamp(eventTimeMessage + eventTimerId, context.timestamp());
}
@OnTimer(eventTimerId2)
public void onEventTime2(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.fireTimestamp());
context.output(eventTimeMessage + eventTimerId2);
}
@OnTimer(processingTimerId)
public void onProcessingTime(OnTimerContext context) {
assertEquals(// See SimpleDoFnRunner#onTimer
"Timer timestamp must match current input watermark", timerTimestamp.plus(Duration.millis(1)), context.timestamp());
context.outputWithTimestamp(processingTimeMessage, context.timestamp());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
WindowedValue.FullWindowedValueCoder<Integer> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
WindowedValue.FullWindowedValueCoder<String> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
TupleTag<String> outputTag = new TupleTag<>("main-output");
DoFnOperator<Integer, String> doFnOperator = new DoFnOperator<>(fn, "stepName", inputCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), windowingStrategy, new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, /* key coder */
keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.setup(new CoderTypeSerializer<>(outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())));
testHarness.open();
testHarness.processWatermark(0);
testHarness.setProcessingTime(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10_000));
// this should register the two timers above
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
// this does not yet fire the timers (in vanilla Flink it would)
testHarness.processWatermark(timerTimestamp.getMillis());
testHarness.setProcessingTime(timerTimestamp.getMillis());
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
assertThat(doFnOperator.keyedStateInternals.minWatermarkHoldMs(), is(timerOutputTimestamp.getMillis()));
// this must fire the event timers
testHarness.processWatermark(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), containsInAnyOrder(WindowedValue.of(eventTimeMessage + eventTimerId, timerTimestamp, window1, PaneInfo.NO_FIRING), WindowedValue.of(eventTimeMessage + eventTimerId2, timerTimestamp.minus(Duration.millis(1)), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
// this must fire the processing timer
testHarness.setProcessingTime(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(// See SimpleDoFnRunner#onTimer
processingTimeMessage, timerTimestamp.plus(Duration.millis(1)), window1, PaneInfo.NO_FIRING)));
testHarness.close();
}
use of org.apache.beam.sdk.coders.VarIntCoder in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testEnsureStateCleanupWithKeyedInput.
@Test
@SuppressWarnings("unchecked")
public void testEnsureStateCleanupWithKeyedInput() throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VarIntCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
VarIntCoder keyCoder = VarIntCoder.of();
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, WindowingStrategy.globalDefault(), keyCoder, WindowedValue.getFullCoder(keyCoder, GlobalWindow.Coder.INSTANCE));
KeyedOneInputStreamOperatorTestHarness<Integer, WindowedValue<Integer>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, val -> val, new CoderTypeInformation<>(keyCoder, FlinkPipelineOptions.defaults()));
RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.<String, FnDataReceiver<WindowedValue>>builder().put("input", Mockito.mock(FnDataReceiver.class)).build());
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
testHarness.open();
Object doFnRunner = Whitebox.getInternalState(operator, "doFnRunner");
assertThat(doFnRunner, instanceOf(DoFnRunnerWithMetricsUpdate.class));
// There should be a StatefulDoFnRunner installed which takes care of clearing state
Object statefulDoFnRunner = Whitebox.getInternalState(doFnRunner, "delegate");
assertThat(statefulDoFnRunner, instanceOf(StatefulDoFnRunner.class));
}
Aggregations