Search in sources :

Example 1 with FlinkStateInternals

use of org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals in project beam by apache.

the class DoFnOperator method initializeState.

@Override
public void initializeState(StateInitializationContext context) throws Exception {
    super.initializeState(context);
    ListStateDescriptor<WindowedValue<InputT>> pushedBackStateDescriptor = new ListStateDescriptor<>("pushed-back-elements", new CoderTypeSerializer<>(windowedInputCoder, serializedOptions));
    if (keySelector != null) {
        pushedBackElementsHandler = KeyedPushedBackElementsHandler.create(keySelector, getKeyedStateBackend(), pushedBackStateDescriptor);
    } else {
        ListState<WindowedValue<InputT>> listState = getOperatorStateBackend().getListState(pushedBackStateDescriptor);
        pushedBackElementsHandler = NonKeyedPushedBackElementsHandler.create(listState);
    }
    currentInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis();
    currentSideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis();
    currentOutputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis();
    sideInputReader = NullSideInputReader.of(sideInputs);
    if (!sideInputs.isEmpty()) {
        FlinkBroadcastStateInternals sideInputStateInternals = new FlinkBroadcastStateInternals<>(getContainingTask().getIndexInSubtaskGroup(), getOperatorStateBackend(), serializedOptions);
        sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
        sideInputReader = sideInputHandler;
        Stream<WindowedValue<InputT>> pushedBack = pushedBackElementsHandler.getElements();
        long min = pushedBack.map(v -> v.getTimestamp().getMillis()).reduce(Long.MAX_VALUE, Math::min);
        pushedBackWatermark = min;
    } else {
        pushedBackWatermark = Long.MAX_VALUE;
    }
    // StatefulPardo or WindowDoFn
    if (keyCoder != null) {
        keyedStateInternals = new FlinkStateInternals<>((KeyedStateBackend) getKeyedStateBackend(), keyCoder, serializedOptions);
        if (timerService == null) {
            timerService = getInternalTimerService("beam-timer", new CoderTypeSerializer<>(timerCoder, serializedOptions), this);
        }
        timerInternals = new FlinkTimerInternals();
        timeServiceManagerCompat = getTimeServiceManagerCompat();
    }
    outputManager = outputManagerFactory.create(output, getLockToAcquireForStateAccessDuringBundles(), getOperatorStateBackend());
}
Also used : FlinkBroadcastStateInternals(org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkBroadcastStateInternals) MetricName(org.apache.beam.sdk.metrics.MetricName) InternalTimeServiceManager(org.apache.flink.streaming.api.operators.InternalTimeServiceManager) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer) Joiner(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) TimerInternals(org.apache.beam.runners.core.TimerInternals) DoFnSignatures(org.apache.beam.sdk.transforms.reflect.DoFnSignatures) Map(java.util.Map) InternalTimerService(org.apache.flink.streaming.api.operators.InternalTimerService) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) OperatorStateBackend(org.apache.flink.runtime.state.OperatorStateBackend) FlinkBroadcastStateInternals(org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkBroadcastStateInternals) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) InternalTimer(org.apache.flink.streaming.api.operators.InternalTimer) OutputTag(org.apache.flink.util.OutputTag) Serializable(java.io.Serializable) Workarounds(org.apache.beam.runners.flink.translation.utils.Workarounds) Stream(java.util.stream.Stream) StructuredCoder(org.apache.beam.sdk.coders.StructuredCoder) DoFnInvokers(org.apache.beam.sdk.transforms.reflect.DoFnInvokers) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) VoidNamespace(org.apache.flink.runtime.state.VoidNamespace) KV(org.apache.beam.sdk.values.KV) PushbackSideInputDoFnRunner(org.apache.beam.runners.core.PushbackSideInputDoFnRunner) BundleFinalizer(org.apache.beam.sdk.transforms.DoFn.BundleFinalizer) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) InternalPriorityQueue(org.apache.flink.runtime.state.InternalPriorityQueue) CoderTypeSerializer(org.apache.beam.runners.flink.translation.types.CoderTypeSerializer) TupleTag(org.apache.beam.sdk.values.TupleTag) Output(org.apache.flink.streaming.api.operators.Output) StateInternals(org.apache.beam.runners.core.StateInternals) SideInputReader(org.apache.beam.runners.core.SideInputReader) DoFn(org.apache.beam.sdk.transforms.DoFn) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) WindowNamespace(org.apache.beam.runners.core.StateNamespaces.WindowNamespace) NullSideInputReader(org.apache.beam.runners.core.NullSideInputReader) IOException(java.io.IOException) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) NoopLock(org.apache.beam.sdk.util.NoopLock) Lock(java.util.concurrent.locks.Lock) MapState(org.apache.flink.api.common.state.MapState) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) FileSystems(org.apache.beam.sdk.io.FileSystems) TimeDomain(org.apache.beam.sdk.state.TimeDomain) SplittableParDoViaKeyedWorkItems(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems) StateSpec(org.apache.beam.sdk.state.StateSpec) ScheduledFuture(java.util.concurrent.ScheduledFuture) StateNamespace(org.apache.beam.runners.core.StateNamespace) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) CheckpointingMode(org.apache.flink.streaming.api.CheckpointingMode) LoggerFactory(org.slf4j.LoggerFactory) StepContext(org.apache.beam.runners.core.StepContext) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) DoFnRunners(org.apache.beam.runners.core.DoFnRunners) ByteBuffer(java.nio.ByteBuffer) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) ListState(org.apache.flink.api.common.state.ListState) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) CheckpointStats(org.apache.beam.runners.flink.translation.utils.CheckpointStats) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) DoFnInvoker(org.apache.beam.sdk.transforms.reflect.DoFnInvoker) KeySelector(org.apache.flink.api.java.functions.KeySelector) StreamTask(org.apache.flink.streaming.runtime.tasks.StreamTask) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) List(java.util.List) Preconditions.checkArgument(org.apache.flink.util.Preconditions.checkArgument) Optional(java.util.Optional) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StateAndTimerBundleCheckpointHandler(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandlers.StateAndTimerBundleCheckpointHandler) Coder(org.apache.beam.sdk.coders.Coder) Watermark(org.apache.flink.streaming.api.watermark.Watermark) HashMap(java.util.HashMap) ProcessFnRunner(org.apache.beam.runners.core.ProcessFnRunner) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) SideInputHandler(org.apache.beam.runners.core.SideInputHandler) FlinkStateInternals(org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Nullable(org.checkerframework.checker.nullness.qual.Nullable) DoFnRunnerWithMetricsUpdate(org.apache.beam.runners.flink.metrics.DoFnRunnerWithMetricsUpdate) OutputStream(java.io.OutputStream) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) Triggerable(org.apache.flink.streaming.api.operators.Triggerable) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) KeyedStateBackend(org.apache.flink.runtime.state.KeyedStateBackend) SimplePushbackSideInputDoFnRunner(org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner) InMemoryBundleFinalizer(org.apache.beam.runners.core.InMemoryBundleFinalizer) Preconditions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions) Instant(org.joda.time.Instant) BufferingDoFnRunner(org.apache.beam.runners.flink.translation.wrappers.streaming.stableinput.BufferingDoFnRunner) InputStream(java.io.InputStream) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) KeyedStateBackend(org.apache.flink.runtime.state.KeyedStateBackend) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) SideInputHandler(org.apache.beam.runners.core.SideInputHandler) WindowedValue(org.apache.beam.sdk.util.WindowedValue) CoderTypeSerializer(org.apache.beam.runners.flink.translation.types.CoderTypeSerializer)

Example 2 with FlinkStateInternals

use of org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals in project beam by apache.

the class FlinkStateInternalsTest method testWatermarkHoldsPersistence.

@Test
public void testWatermarkHoldsPersistence() throws Exception {
    KeyedStateBackend<ByteBuffer> keyedStateBackend = createStateBackend();
    FlinkStateInternals stateInternals = new FlinkStateInternals<>(keyedStateBackend, StringUtf8Coder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
    StateTag<WatermarkHoldState> stateTag = StateTags.watermarkStateInternal("hold", TimestampCombiner.EARLIEST);
    WatermarkHoldState globalWindow = stateInternals.state(StateNamespaces.global(), stateTag);
    WatermarkHoldState fixedWindow = stateInternals.state(StateNamespaces.window(IntervalWindow.getCoder(), new IntervalWindow(new Instant(0), new Instant(10))), stateTag);
    Instant noHold = new Instant(Long.MAX_VALUE);
    assertThat(stateInternals.minWatermarkHoldMs(), is(noHold.getMillis()));
    Instant high = new Instant(10);
    globalWindow.add(high);
    assertThat(stateInternals.minWatermarkHoldMs(), is(high.getMillis()));
    Instant middle = new Instant(5);
    fixedWindow.add(middle);
    assertThat(stateInternals.minWatermarkHoldMs(), is(middle.getMillis()));
    Instant low = new Instant(1);
    globalWindow.add(low);
    assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
    // Try to overwrite with later hold (should not succeed)
    globalWindow.add(high);
    assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
    fixedWindow.add(high);
    assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
    // Watermark hold should be computed across all keys
    ByteBuffer firstKey = keyedStateBackend.getCurrentKey();
    changeKey(keyedStateBackend);
    ByteBuffer secondKey = keyedStateBackend.getCurrentKey();
    assertThat(firstKey, is(Matchers.not(secondKey)));
    assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
    // ..but be tracked per key / window
    assertThat(globalWindow.read(), is(Matchers.nullValue()));
    assertThat(fixedWindow.read(), is(Matchers.nullValue()));
    globalWindow.add(middle);
    fixedWindow.add(high);
    assertThat(globalWindow.read(), is(middle));
    assertThat(fixedWindow.read(), is(high));
    // Old key should give previous results
    keyedStateBackend.setCurrentKey(firstKey);
    assertThat(globalWindow.read(), is(low));
    assertThat(fixedWindow.read(), is(middle));
    // Discard watermark view and recover it
    stateInternals = new FlinkStateInternals<>(keyedStateBackend, StringUtf8Coder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
    globalWindow = stateInternals.state(StateNamespaces.global(), stateTag);
    fixedWindow = stateInternals.state(StateNamespaces.window(IntervalWindow.getCoder(), new IntervalWindow(new Instant(0), new Instant(10))), stateTag);
    // Watermark hold across all keys should be unchanged
    assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
    // Check the holds for the second key and clear them
    keyedStateBackend.setCurrentKey(secondKey);
    assertThat(globalWindow.read(), is(middle));
    assertThat(fixedWindow.read(), is(high));
    globalWindow.clear();
    fixedWindow.clear();
    // Check the holds for the first key and clear them
    keyedStateBackend.setCurrentKey(firstKey);
    assertThat(globalWindow.read(), is(low));
    assertThat(fixedWindow.read(), is(middle));
    fixedWindow.clear();
    assertThat(stateInternals.minWatermarkHoldMs(), is(low.getMillis()));
    globalWindow.clear();
    assertThat(stateInternals.minWatermarkHoldMs(), is(noHold.getMillis()));
}
Also used : Instant(org.joda.time.Instant) FlinkStateInternals(org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) ByteBuffer(java.nio.ByteBuffer) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) StateInternalsTest(org.apache.beam.runners.core.StateInternalsTest) Test(org.junit.Test)

Example 3 with FlinkStateInternals

use of org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals in project beam by apache.

the class FlinkStateInternalsTest method testGlobalWindowWatermarkHoldClear.

@Test
public void testGlobalWindowWatermarkHoldClear() throws Exception {
    KeyedStateBackend<ByteBuffer> keyedStateBackend = createStateBackend();
    FlinkStateInternals<String> stateInternals = new FlinkStateInternals<>(keyedStateBackend, StringUtf8Coder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
    StateTag<WatermarkHoldState> stateTag = StateTags.watermarkStateInternal("hold", TimestampCombiner.EARLIEST);
    Instant now = Instant.now();
    WatermarkHoldState state = stateInternals.state(StateNamespaces.global(), stateTag);
    state.add(now);
    stateInternals.clearGlobalState();
    assertThat(state.read(), is((Instant) null));
}
Also used : Instant(org.joda.time.Instant) FlinkStateInternals(org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) ByteBuffer(java.nio.ByteBuffer) WatermarkHoldState(org.apache.beam.sdk.state.WatermarkHoldState) StateInternalsTest(org.apache.beam.runners.core.StateInternalsTest) Test(org.junit.Test)

Aggregations

ByteBuffer (java.nio.ByteBuffer)3 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)3 FlinkStateInternals (org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals)3 Instant (org.joda.time.Instant)3 StateInternalsTest (org.apache.beam.runners.core.StateInternalsTest)2 SuppressFBWarnings (edu.umd.cs.findbugs.annotations.SuppressFBWarnings)1 IOException (java.io.IOException)1 InputStream (java.io.InputStream)1 OutputStream (java.io.OutputStream)1 Serializable (java.io.Serializable)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 Map (java.util.Map)1 Optional (java.util.Optional)1 ScheduledFuture (java.util.concurrent.ScheduledFuture)1 Lock (java.util.concurrent.locks.Lock)1