use of org.apache.beam.runners.core.StateInternals in project twister2 by DSC-SPIDAL.
the class GroupByWindowFunction method flatMap.
@Override
public void flatMap(KV<K, Iterable<WindowedValue<V>>> kIteratorKV, RecordCollector<WindowedValue<KV<K, Iterable<V>>>> collector) {
try {
K key = kIteratorKV.getKey();
Iterable<WindowedValue<V>> values = kIteratorKV.getValue();
InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
timerInternals.advanceProcessingTime(Instant.now());
timerInternals.advanceSynchronizedProcessingTime(Instant.now());
StateInternals stateInternals = InMemoryStateInternals.forKey(key);
GABWOutputWindowedValue<K, V> outputter = new GABWOutputWindowedValue<>();
ReduceFnRunner<K, V, Iterable<V>, W> reduceFnRunner = new ReduceFnRunner<>(key, windowingStrategy, ExecutableTriggerStateMachine.create(TriggerStateMachines.stateMachineForTrigger(TriggerTranslation.toProto(windowingStrategy.getTrigger()))), stateInternals, timerInternals, outputter, new UnsupportedSideInputReader("GroupAlsoByWindow"), reduceFn, null);
// Process the grouped values.
reduceFnRunner.processElements(values);
// Finish any pending windows by advancing the input watermark to infinity.
timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
// Finally, advance the processing time to infinity to fire any timers.
timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
fireEligibleTimers(timerInternals, reduceFnRunner);
reduceFnRunner.persist();
Iterator<WindowedValue<KV<K, Iterable<V>>>> outputs = outputter.getOutputs().iterator();
while (outputs.hasNext()) {
collector.collect(outputs.next());
}
} catch (Exception e) {
LOG.info(e.getMessage());
}
}
use of org.apache.beam.runners.core.StateInternals in project beam by apache.
the class SplittableProcessElementsEvaluatorFactory method createEvaluator.
@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
final ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform = application.getTransform();
ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn = transform.newProcessFn(transform.getFn());
DoFnLifecycleManager fnManager = DoFnLifecycleManager.of(processFn);
processFn = ((ProcessFn<InputT, OutputT, RestrictionT, TrackerT>) fnManager.<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>get());
String stepName = evaluationContext.getStepName(application);
final DirectExecutionContext.DirectStepContext stepContext = evaluationContext.getExecutionContext(application, inputBundle.getKey()).getStepContext(stepName);
final ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> parDoEvaluator = delegateFactory.createParDoEvaluator(application, inputBundle.getKey(), transform.getSideInputs(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), stepContext, processFn, fnManager);
processFn.setStateInternalsFactory(new StateInternalsFactory<String>() {
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public StateInternals stateInternalsForKey(String key) {
return (StateInternals) stepContext.stateInternals();
}
});
processFn.setTimerInternalsFactory(new TimerInternalsFactory<String>() {
@Override
public TimerInternals timerInternalsForKey(String key) {
return stepContext.timerInternals();
}
});
OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {
private final OutputManager outputManager = parDoEvaluator.getOutputManager();
@Override
public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
}
@Override
public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
}
};
processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, TrackerT>(transform.getFn(), evaluationContext.getPipelineOptions(), outputWindowedValue, evaluationContext.createSideInputReader(transform.getSideInputs()), // DirectRunner.
Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setThreadFactory(MoreExecutors.platformThreadFactory()).setDaemon(true).setNameFormat("direct-splittable-process-element-checkpoint-executor").build()), 10000, Duration.standardSeconds(10)));
return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(parDoEvaluator, fnManager);
}
use of org.apache.beam.runners.core.StateInternals in project beam by apache.
the class DoFnOperator method open.
@Override
public void open() throws Exception {
// WindowDoFnOperator need use state and timer to get DoFn.
// So must wait StateInternals and TimerInternals ready.
// This will be called after initializeState()
this.doFn = getDoFn();
FlinkPipelineOptions options = serializedOptions.get().as(FlinkPipelineOptions.class);
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, options);
StepContext stepContext = new FlinkStepContext();
doFnRunner = DoFnRunners.simpleRunner(options, doFn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, getInputCoder(), outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
if (requiresStableInput) {
// put this in front of the root FnRunner before any additional wrappers
doFnRunner = bufferingDoFnRunner = BufferingDoFnRunner.create(doFnRunner, "stable-input-buffer", windowedInputCoder, windowingStrategy.getWindowFn().windowCoder(), getOperatorStateBackend(), getKeyedStateBackend(), options.getNumConcurrentCheckpoints(), serializedOptions);
}
doFnRunner = createWrappingDoFnRunner(doFnRunner, stepContext);
earlyBindStateIfNeeded();
if (!options.getDisableMetrics()) {
flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext());
doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer);
String checkpointMetricNamespace = options.getReportCheckpointDuration();
if (checkpointMetricNamespace != null) {
MetricName checkpointMetric = MetricName.named(checkpointMetricNamespace, "checkpoint_duration");
checkpointStats = new CheckpointStats(() -> flinkMetricContainer.getMetricsContainer(stepName).getDistribution(checkpointMetric));
}
}
elementCount = 0L;
lastFinishBundleTime = getProcessingTimeService().getCurrentProcessingTime();
// Schedule timer to check timeout of finish bundle.
long bundleCheckPeriod = Math.max(maxBundleTimeMills / 2, 1);
checkFinishBundleTimer = getProcessingTimeService().scheduleAtFixedRate(timestamp -> checkInvokeFinishBundleByTime(), bundleCheckPeriod, bundleCheckPeriod);
if (doFn instanceof SplittableParDoViaKeyedWorkItems.ProcessFn) {
pushbackDoFnRunner = new ProcessFnRunner<>((DoFnRunner) doFnRunner, sideInputs, sideInputHandler);
} else {
pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
}
bundleFinalizer = new InMemoryBundleFinalizer();
pendingFinalizations = new LinkedHashMap<>();
}
use of org.apache.beam.runners.core.StateInternals in project beam by apache.
the class StreamingGroupAlsoByWindowViaWindowSetFn method processElement.
@Override
public void processElement(KeyedWorkItem<K, InputT> keyedWorkItem, PipelineOptions options, StepContext stepContext, SideInputReader sideInputReader, OutputWindowedValue<KV<K, OutputT>> output) throws Exception {
K key = keyedWorkItem.key();
StateInternals stateInternals = stateInternalsFactory.stateInternalsForKey(key);
ReduceFnRunner<K, InputT, OutputT, W> reduceFnRunner = new ReduceFnRunner<K, InputT, OutputT, W>(key, windowingStrategy, ExecutableTriggerStateMachine.create(TriggerStateMachines.stateMachineForTrigger(triggerProto)), stateInternals, stepContext.timerInternals(), output, sideInputReader, reduceFn, options);
reduceFnRunner.processElements(keyedWorkItem.elementsIterable());
reduceFnRunner.onTimers(keyedWorkItem.timersIterable());
reduceFnRunner.persist();
}
use of org.apache.beam.runners.core.StateInternals in project beam by apache.
the class SimpleParDoFn method processSystemTimer.
private void processSystemTimer(TimerData timer) throws Exception {
// Timer owned by this class, for cleaning up state in expired windows
if (timer.getTimerId().equals(CLEANUP_TIMER_ID)) {
checkState(timer.getDomain().equals(TimeDomain.EVENT_TIME), "%s received cleanup timer with domain not EVENT_TIME: %s", this, timer);
checkState(timer.getNamespace() instanceof WindowNamespace, "%s received cleanup timer not for a %s: %s", this, WindowNamespace.class.getSimpleName(), timer);
BoundedWindow window = ((WindowNamespace) timer.getNamespace()).getWindow();
Instant targetTime = earliestAllowableCleanupTime(window, fnInfo.getWindowingStrategy());
checkState(!targetTime.isAfter(timer.getTimestamp()), "%s received state cleanup timer for window %s " + " that is before the appropriate cleanup time %s", this, window, targetTime);
fnRunner.onWindowExpiration(window, timer.getOutputTimestamp(), this.stepContext.stateInternals().getKey());
// This is for a timer for a window that is expired, so clean it up.
for (StateDeclaration stateDecl : fnSignature.stateDeclarations().values()) {
StateTag<?> tag;
try {
tag = StateTags.tagForSpec(stateDecl.id(), (StateSpec) stateDecl.field().get(fnInfo.getDoFn()));
} catch (IllegalAccessException e) {
throw new RuntimeException(String.format("Error accessing %s for %s", StateSpec.class.getName(), fnInfo.getDoFn().getClass().getName()), e);
}
StateInternals stateInternals = userStepContext.stateInternals();
org.apache.beam.sdk.state.State state = stateInternals.state(timer.getNamespace(), tag);
state.clear();
}
}
}
Aggregations