use of org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals in project beam by apache.
the class DoFnOperator method initializeState.
@Override
public void initializeState(StateInitializationContext context) throws Exception {
super.initializeState(context);
ListStateDescriptor<WindowedValue<InputT>> pushedBackStateDescriptor = new ListStateDescriptor<>("pushed-back-elements", new CoderTypeSerializer<>(windowedInputCoder, serializedOptions));
if (keySelector != null) {
pushedBackElementsHandler = KeyedPushedBackElementsHandler.create(keySelector, getKeyedStateBackend(), pushedBackStateDescriptor);
} else {
ListState<WindowedValue<InputT>> listState = getOperatorStateBackend().getListState(pushedBackStateDescriptor);
pushedBackElementsHandler = NonKeyedPushedBackElementsHandler.create(listState);
}
currentInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis();
currentSideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis();
currentOutputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis();
sideInputReader = NullSideInputReader.of(sideInputs);
if (!sideInputs.isEmpty()) {
FlinkBroadcastStateInternals sideInputStateInternals = new FlinkBroadcastStateInternals<>(getContainingTask().getIndexInSubtaskGroup(), getOperatorStateBackend(), serializedOptions);
sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
sideInputReader = sideInputHandler;
Stream<WindowedValue<InputT>> pushedBack = pushedBackElementsHandler.getElements();
long min = pushedBack.map(v -> v.getTimestamp().getMillis()).reduce(Long.MAX_VALUE, Math::min);
pushedBackWatermark = min;
} else {
pushedBackWatermark = Long.MAX_VALUE;
}
// StatefulPardo or WindowDoFn
if (keyCoder != null) {
keyedStateInternals = new FlinkStateInternals<>((KeyedStateBackend) getKeyedStateBackend(), keyCoder, serializedOptions);
if (timerService == null) {
timerService = getInternalTimerService("beam-timer", new CoderTypeSerializer<>(timerCoder, serializedOptions), this);
}
timerInternals = new FlinkTimerInternals();
timeServiceManagerCompat = getTimeServiceManagerCompat();
}
outputManager = outputManagerFactory.create(output, getLockToAcquireForStateAccessDuringBundles(), getOperatorStateBackend());
}
use of org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals in project beam by apache.
the class FlinkStateInternalsTest method testWatermarkHoldsPersistence.
@Test
public void testWatermarkHoldsPersistence() throws Exception {
KeyedStateBackend<ByteBuffer> keyedStateBackend = createStateBackend();
FlinkStateInternals stateInternals = new FlinkStateInternals<>(keyedStateBackend, StringUtf8Coder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
StateTag<WatermarkHoldState> stateTag = StateTags.watermarkStateInternal("hold", TimestampCombiner.EARLIEST);
WatermarkHoldState globalWindow = stateInternals.state(StateNamespaces.global(), stateTag);
WatermarkHoldState fixedWindow = stateInternals.state(StateNamespaces.window(IntervalWindow.getCoder(), new IntervalWindow(new Instant(0), new Instant(10))), stateTag);
Instant noHold = new Instant(Long.MAX_VALUE);
assertThat(stateInternals.minWatermarkHoldMs(), is(noHold.getMillis()));
Instant high = new Instant(10);
globalWindow.add(high);
assertThat(stateInternals.minWatermarkHoldMs(), is(high.getMillis()));
Instant middle = new Instant(5);
fixedWindow.add(middle);
assertThat(stateInternals.minWatermarkHoldMs(), is(middle.getMillis()));
Instant low = new Instant(1);
globalWindow.add(low);
assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
// Try to overwrite with later hold (should not succeed)
globalWindow.add(high);
assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
fixedWindow.add(high);
assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
// Watermark hold should be computed across all keys
ByteBuffer firstKey = keyedStateBackend.getCurrentKey();
changeKey(keyedStateBackend);
ByteBuffer secondKey = keyedStateBackend.getCurrentKey();
assertThat(firstKey, is(Matchers.not(secondKey)));
assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
// ..but be tracked per key / window
assertThat(globalWindow.read(), is(Matchers.nullValue()));
assertThat(fixedWindow.read(), is(Matchers.nullValue()));
globalWindow.add(middle);
fixedWindow.add(high);
assertThat(globalWindow.read(), is(middle));
assertThat(fixedWindow.read(), is(high));
// Old key should give previous results
keyedStateBackend.setCurrentKey(firstKey);
assertThat(globalWindow.read(), is(low));
assertThat(fixedWindow.read(), is(middle));
// Discard watermark view and recover it
stateInternals = new FlinkStateInternals<>(keyedStateBackend, StringUtf8Coder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
globalWindow = stateInternals.state(StateNamespaces.global(), stateTag);
fixedWindow = stateInternals.state(StateNamespaces.window(IntervalWindow.getCoder(), new IntervalWindow(new Instant(0), new Instant(10))), stateTag);
// Watermark hold across all keys should be unchanged
assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
// Check the holds for the second key and clear them
keyedStateBackend.setCurrentKey(secondKey);
assertThat(globalWindow.read(), is(middle));
assertThat(fixedWindow.read(), is(high));
globalWindow.clear();
fixedWindow.clear();
// Check the holds for the first key and clear them
keyedStateBackend.setCurrentKey(firstKey);
assertThat(globalWindow.read(), is(low));
assertThat(fixedWindow.read(), is(middle));
fixedWindow.clear();
assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
globalWindow.clear();
assertThat(stateInternals.minWatermarkHoldMs(), is(noHold.getMillis()));
}
use of org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals in project beam by apache.
the class FlinkStateInternalsTest method testGlobalWindowWatermarkHoldClear.
@Test
public void testGlobalWindowWatermarkHoldClear() throws Exception {
KeyedStateBackend<ByteBuffer> keyedStateBackend = createStateBackend();
FlinkStateInternals<String> stateInternals = new FlinkStateInternals<>(keyedStateBackend, StringUtf8Coder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
StateTag<WatermarkHoldState> stateTag = StateTags.watermarkStateInternal("hold", TimestampCombiner.EARLIEST);
Instant now = Instant.now();
WatermarkHoldState state = stateInternals.state(StateNamespaces.global(), stateTag);
state.add(now);
stateInternals.clearGlobalState();
assertThat(state.read(), is((Instant) null));
}
Aggregations