Search in sources :

Example 6 with StateInternals

use of org.apache.beam.runners.core.StateInternals in project twister2 by DSC-SPIDAL.

the class GroupByWindowFunction method flatMap.

@Override
public void flatMap(KV<K, Iterable<WindowedValue<V>>> kIteratorKV, RecordCollector<WindowedValue<KV<K, Iterable<V>>>> collector) {
    try {
        K key = kIteratorKV.getKey();
        Iterable<WindowedValue<V>> values = kIteratorKV.getValue();
        InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
        timerInternals.advanceProcessingTime(Instant.now());
        timerInternals.advanceSynchronizedProcessingTime(Instant.now());
        StateInternals stateInternals = InMemoryStateInternals.forKey(key);
        GABWOutputWindowedValue<K, V> outputter = new GABWOutputWindowedValue<>();
        ReduceFnRunner<K, V, Iterable<V>, W> reduceFnRunner = new ReduceFnRunner<>(key, windowingStrategy, ExecutableTriggerStateMachine.create(TriggerStateMachines.stateMachineForTrigger(TriggerTranslation.toProto(windowingStrategy.getTrigger()))), stateInternals, timerInternals, outputter, new UnsupportedSideInputReader("GroupAlsoByWindow"), reduceFn, null);
        // Process the grouped values.
        reduceFnRunner.processElements(values);
        // Finish any pending windows by advancing the input watermark to infinity.
        timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
        // Finally, advance the processing time to infinity to fire any timers.
        timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
        timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
        fireEligibleTimers(timerInternals, reduceFnRunner);
        reduceFnRunner.persist();
        Iterator<WindowedValue<KV<K, Iterable<V>>>> outputs = outputter.getOutputs().iterator();
        while (outputs.hasNext()) {
            collector.collect(outputs.next());
        }
    } catch (Exception e) {
        LOG.info(e.getMessage());
    }
}
Also used : ReduceFnRunner(org.apache.beam.runners.core.ReduceFnRunner) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) WindowedValue(org.apache.beam.sdk.util.WindowedValue) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) KV(org.apache.beam.sdk.values.KV) UnsupportedSideInputReader(org.apache.beam.runners.core.UnsupportedSideInputReader) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) StateInternals(org.apache.beam.runners.core.StateInternals)

Example 7 with StateInternals

use of org.apache.beam.runners.core.StateInternals in project beam by apache.

the class SplittableProcessElementsEvaluatorFactory method createEvaluator.

@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
    final ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform = application.getTransform();
    ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn = transform.newProcessFn(transform.getFn());
    DoFnLifecycleManager fnManager = DoFnLifecycleManager.of(processFn);
    processFn = ((ProcessFn<InputT, OutputT, RestrictionT, TrackerT>) fnManager.<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>get());
    String stepName = evaluationContext.getStepName(application);
    final DirectExecutionContext.DirectStepContext stepContext = evaluationContext.getExecutionContext(application, inputBundle.getKey()).getStepContext(stepName);
    final ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> parDoEvaluator = delegateFactory.createParDoEvaluator(application, inputBundle.getKey(), transform.getSideInputs(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), stepContext, processFn, fnManager);
    processFn.setStateInternalsFactory(new StateInternalsFactory<String>() {

        @SuppressWarnings({ "unchecked", "rawtypes" })
        @Override
        public StateInternals stateInternalsForKey(String key) {
            return (StateInternals) stepContext.stateInternals();
        }
    });
    processFn.setTimerInternalsFactory(new TimerInternalsFactory<String>() {

        @Override
        public TimerInternals timerInternalsForKey(String key) {
            return stepContext.timerInternals();
        }
    });
    OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {

        private final OutputManager outputManager = parDoEvaluator.getOutputManager();

        @Override
        public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
        }

        @Override
        public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
        }
    };
    processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, TrackerT>(transform.getFn(), evaluationContext.getPipelineOptions(), outputWindowedValue, evaluationContext.createSideInputReader(transform.getSideInputs()), // DirectRunner.
    Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setThreadFactory(MoreExecutors.platformThreadFactory()).setDaemon(true).setNameFormat("direct-splittable-process-element-checkpoint-executor").build()), 10000, Duration.standardSeconds(10)));
    return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(parDoEvaluator, fnManager);
}
Also used : ProcessFn(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn) TupleTag(org.apache.beam.sdk.values.TupleTag) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ElementAndRestriction(org.apache.beam.runners.core.construction.ElementAndRestriction) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) Instant(org.joda.time.Instant) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) TimerInternals(org.apache.beam.runners.core.TimerInternals) StateInternals(org.apache.beam.runners.core.StateInternals) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager)

Example 8 with StateInternals

use of org.apache.beam.runners.core.StateInternals in project beam by apache.

the class DoFnOperator method open.

@Override
public void open() throws Exception {
    // WindowDoFnOperator need use state and timer to get DoFn.
    // So must wait StateInternals and TimerInternals ready.
    // This will be called after initializeState()
    this.doFn = getDoFn();
    FlinkPipelineOptions options = serializedOptions.get().as(FlinkPipelineOptions.class);
    doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, options);
    StepContext stepContext = new FlinkStepContext();
    doFnRunner = DoFnRunners.simpleRunner(options, doFn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, getInputCoder(), outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
    if (requiresStableInput) {
        // put this in front of the root FnRunner before any additional wrappers
        doFnRunner = bufferingDoFnRunner = BufferingDoFnRunner.create(doFnRunner, "stable-input-buffer", windowedInputCoder, windowingStrategy.getWindowFn().windowCoder(), getOperatorStateBackend(), getKeyedStateBackend(), options.getNumConcurrentCheckpoints(), serializedOptions);
    }
    doFnRunner = createWrappingDoFnRunner(doFnRunner, stepContext);
    earlyBindStateIfNeeded();
    if (!options.getDisableMetrics()) {
        flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext());
        doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer);
        String checkpointMetricNamespace = options.getReportCheckpointDuration();
        if (checkpointMetricNamespace != null) {
            MetricName checkpointMetric = MetricName.named(checkpointMetricNamespace, "checkpoint_duration");
            checkpointStats = new CheckpointStats(() -> flinkMetricContainer.getMetricsContainer(stepName).getDistribution(checkpointMetric));
        }
    }
    elementCount = 0L;
    lastFinishBundleTime = getProcessingTimeService().getCurrentProcessingTime();
    // Schedule timer to check timeout of finish bundle.
    long bundleCheckPeriod = Math.max(maxBundleTimeMills / 2, 1);
    checkFinishBundleTimer = getProcessingTimeService().scheduleAtFixedRate(timestamp -> checkInvokeFinishBundleByTime(), bundleCheckPeriod, bundleCheckPeriod);
    if (doFn instanceof SplittableParDoViaKeyedWorkItems.ProcessFn) {
        pushbackDoFnRunner = new ProcessFnRunner<>((DoFnRunner) doFnRunner, sideInputs, sideInputHandler);
    } else {
        pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
    }
    bundleFinalizer = new InMemoryBundleFinalizer();
    pendingFinalizations = new LinkedHashMap<>();
}
Also used : MetricName(org.apache.beam.sdk.metrics.MetricName) InternalTimeServiceManager(org.apache.flink.streaming.api.operators.InternalTimeServiceManager) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer) Joiner(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Joiner) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) TimerInternals(org.apache.beam.runners.core.TimerInternals) DoFnSignatures(org.apache.beam.sdk.transforms.reflect.DoFnSignatures) Map(java.util.Map) InternalTimerService(org.apache.flink.streaming.api.operators.InternalTimerService) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) OperatorStateBackend(org.apache.flink.runtime.state.OperatorStateBackend) FlinkBroadcastStateInternals(org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkBroadcastStateInternals) StateSnapshotContext(org.apache.flink.runtime.state.StateSnapshotContext) InternalTimer(org.apache.flink.streaming.api.operators.InternalTimer) OutputTag(org.apache.flink.util.OutputTag) Serializable(java.io.Serializable) Workarounds(org.apache.beam.runners.flink.translation.utils.Workarounds) Stream(java.util.stream.Stream) StructuredCoder(org.apache.beam.sdk.coders.StructuredCoder) DoFnInvokers(org.apache.beam.sdk.transforms.reflect.DoFnInvokers) OneInputStreamOperator(org.apache.flink.streaming.api.operators.OneInputStreamOperator) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) VoidNamespace(org.apache.flink.runtime.state.VoidNamespace) KV(org.apache.beam.sdk.values.KV) PushbackSideInputDoFnRunner(org.apache.beam.runners.core.PushbackSideInputDoFnRunner) BundleFinalizer(org.apache.beam.sdk.transforms.DoFn.BundleFinalizer) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) InternalPriorityQueue(org.apache.flink.runtime.state.InternalPriorityQueue) CoderTypeSerializer(org.apache.beam.runners.flink.translation.types.CoderTypeSerializer) TupleTag(org.apache.beam.sdk.values.TupleTag) Output(org.apache.flink.streaming.api.operators.Output) StateInternals(org.apache.beam.runners.core.StateInternals) SideInputReader(org.apache.beam.runners.core.SideInputReader) DoFn(org.apache.beam.sdk.transforms.DoFn) TwoInputStreamOperator(org.apache.flink.streaming.api.operators.TwoInputStreamOperator) WindowNamespace(org.apache.beam.runners.core.StateNamespaces.WindowNamespace) NullSideInputReader(org.apache.beam.runners.core.NullSideInputReader) IOException(java.io.IOException) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) NoopLock(org.apache.beam.sdk.util.NoopLock) Lock(java.util.concurrent.locks.Lock) MapState(org.apache.flink.api.common.state.MapState) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) FileSystems(org.apache.beam.sdk.io.FileSystems) TimeDomain(org.apache.beam.sdk.state.TimeDomain) SplittableParDoViaKeyedWorkItems(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems) StateSpec(org.apache.beam.sdk.state.StateSpec) ScheduledFuture(java.util.concurrent.ScheduledFuture) StateNamespace(org.apache.beam.runners.core.StateNamespace) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) CheckpointingMode(org.apache.flink.streaming.api.CheckpointingMode) LoggerFactory(org.slf4j.LoggerFactory) StepContext(org.apache.beam.runners.core.StepContext) StringSerializer(org.apache.flink.api.common.typeutils.base.StringSerializer) DoFnRunners(org.apache.beam.runners.core.DoFnRunners) ByteBuffer(java.nio.ByteBuffer) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) ListState(org.apache.flink.api.common.state.ListState) ChainingStrategy(org.apache.flink.streaming.api.operators.ChainingStrategy) CheckpointStats(org.apache.beam.runners.flink.translation.utils.CheckpointStats) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) DoFnInvoker(org.apache.beam.sdk.transforms.reflect.DoFnInvoker) KeySelector(org.apache.flink.api.java.functions.KeySelector) StreamTask(org.apache.flink.streaming.runtime.tasks.StreamTask) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) List(java.util.List) Preconditions.checkArgument(org.apache.flink.util.Preconditions.checkArgument) Optional(java.util.Optional) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) StreamConfig(org.apache.flink.streaming.api.graph.StreamConfig) StateAndTimerBundleCheckpointHandler(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandlers.StateAndTimerBundleCheckpointHandler) Coder(org.apache.beam.sdk.coders.Coder) Watermark(org.apache.flink.streaming.api.watermark.Watermark) HashMap(java.util.HashMap) ProcessFnRunner(org.apache.beam.runners.core.ProcessFnRunner) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) SideInputHandler(org.apache.beam.runners.core.SideInputHandler) FlinkStateInternals(org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Nullable(org.checkerframework.checker.nullness.qual.Nullable) DoFnRunnerWithMetricsUpdate(org.apache.beam.runners.flink.metrics.DoFnRunnerWithMetricsUpdate) OutputStream(java.io.OutputStream) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) Triggerable(org.apache.flink.streaming.api.operators.Triggerable) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) KeyedStateBackend(org.apache.flink.runtime.state.KeyedStateBackend) SimplePushbackSideInputDoFnRunner(org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner) InMemoryBundleFinalizer(org.apache.beam.runners.core.InMemoryBundleFinalizer) Preconditions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions) Instant(org.joda.time.Instant) BufferingDoFnRunner(org.apache.beam.runners.flink.translation.wrappers.streaming.stableinput.BufferingDoFnRunner) InputStream(java.io.InputStream) StateInitializationContext(org.apache.flink.runtime.state.StateInitializationContext) StepContext(org.apache.beam.runners.core.StepContext) CheckpointStats(org.apache.beam.runners.flink.translation.utils.CheckpointStats) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) MetricName(org.apache.beam.sdk.metrics.MetricName) InMemoryBundleFinalizer(org.apache.beam.runners.core.InMemoryBundleFinalizer) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) PushbackSideInputDoFnRunner(org.apache.beam.runners.core.PushbackSideInputDoFnRunner) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) SimplePushbackSideInputDoFnRunner(org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner) BufferingDoFnRunner(org.apache.beam.runners.flink.translation.wrappers.streaming.stableinput.BufferingDoFnRunner) FlinkMetricContainer(org.apache.beam.runners.flink.metrics.FlinkMetricContainer)

Example 9 with StateInternals

use of org.apache.beam.runners.core.StateInternals in project beam by apache.

the class StreamingGroupAlsoByWindowViaWindowSetFn method processElement.

@Override
public void processElement(KeyedWorkItem<K, InputT> keyedWorkItem, PipelineOptions options, StepContext stepContext, SideInputReader sideInputReader, OutputWindowedValue<KV<K, OutputT>> output) throws Exception {
    K key = keyedWorkItem.key();
    StateInternals stateInternals = stateInternalsFactory.stateInternalsForKey(key);
    ReduceFnRunner<K, InputT, OutputT, W> reduceFnRunner = new ReduceFnRunner<K, InputT, OutputT, W>(key, windowingStrategy, ExecutableTriggerStateMachine.create(TriggerStateMachines.stateMachineForTrigger(triggerProto)), stateInternals, stepContext.timerInternals(), output, sideInputReader, reduceFn, options);
    reduceFnRunner.processElements(keyedWorkItem.elementsIterable());
    reduceFnRunner.onTimers(keyedWorkItem.timersIterable());
    reduceFnRunner.persist();
}
Also used : ReduceFnRunner(org.apache.beam.runners.core.ReduceFnRunner) StateInternals(org.apache.beam.runners.core.StateInternals)

Example 10 with StateInternals

use of org.apache.beam.runners.core.StateInternals in project beam by apache.

the class SimpleParDoFn method processSystemTimer.

private void processSystemTimer(TimerData timer) throws Exception {
    // Timer owned by this class, for cleaning up state in expired windows
    if (timer.getTimerId().equals(CLEANUP_TIMER_ID)) {
        checkState(timer.getDomain().equals(TimeDomain.EVENT_TIME), "%s received cleanup timer with domain not EVENT_TIME: %s", this, timer);
        checkState(timer.getNamespace() instanceof WindowNamespace, "%s received cleanup timer not for a %s: %s", this, WindowNamespace.class.getSimpleName(), timer);
        BoundedWindow window = ((WindowNamespace) timer.getNamespace()).getWindow();
        Instant targetTime = earliestAllowableCleanupTime(window, fnInfo.getWindowingStrategy());
        checkState(!targetTime.isAfter(timer.getTimestamp()), "%s received state cleanup timer for window %s " + " that is before the appropriate cleanup time %s", this, window, targetTime);
        fnRunner.onWindowExpiration(window, timer.getOutputTimestamp(), this.stepContext.stateInternals().getKey());
        // This is for a timer for a window that is expired, so clean it up.
        for (StateDeclaration stateDecl : fnSignature.stateDeclarations().values()) {
            StateTag<?> tag;
            try {
                tag = StateTags.tagForSpec(stateDecl.id(), (StateSpec) stateDecl.field().get(fnInfo.getDoFn()));
            } catch (IllegalAccessException e) {
                throw new RuntimeException(String.format("Error accessing %s for %s", StateSpec.class.getName(), fnInfo.getDoFn().getClass().getName()), e);
            }
            StateInternals stateInternals = userStepContext.stateInternals();
            org.apache.beam.sdk.state.State state = stateInternals.state(timer.getNamespace(), tag);
            state.clear();
        }
    }
}
Also used : Instant(org.joda.time.Instant) StateSpec(org.apache.beam.sdk.state.StateSpec) WindowNamespace(org.apache.beam.runners.core.StateNamespaces.WindowNamespace) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) StateInternals(org.apache.beam.runners.core.StateInternals) StateDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.StateDeclaration)

Aggregations

StateInternals (org.apache.beam.runners.core.StateInternals)15 InMemoryTimerInternals (org.apache.beam.runners.core.InMemoryTimerInternals)9 WindowedValue (org.apache.beam.sdk.util.WindowedValue)8 ReduceFnRunner (org.apache.beam.runners.core.ReduceFnRunner)7 InMemoryStateInternals (org.apache.beam.runners.core.InMemoryStateInternals)6 OutputWindowedValue (org.apache.beam.runners.core.OutputWindowedValue)6 TimerInternals (org.apache.beam.runners.core.TimerInternals)6 KV (org.apache.beam.sdk.values.KV)6 UnsupportedSideInputReader (org.apache.beam.runners.core.UnsupportedSideInputReader)4 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)4 TupleTag (org.apache.beam.sdk.values.TupleTag)4 Instant (org.joda.time.Instant)4 StepContext (org.apache.beam.runners.core.StepContext)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Collection (java.util.Collection)2 DoFnRunners (org.apache.beam.runners.core.DoFnRunners)2 StateNamespace (org.apache.beam.runners.core.StateNamespace)2 WindowNamespace (org.apache.beam.runners.core.StateNamespaces.WindowNamespace)2 StatefulDoFnRunner (org.apache.beam.runners.core.StatefulDoFnRunner)2