use of org.apache.beam.runners.core.GroupAlsoByWindowViaWindowSetNewDoFn in project beam by apache.
the class DoFnOperator method open.
@Override
public void open() throws Exception {
super.open();
setCurrentInputWatermark(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis());
setCurrentSideInputWatermark(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis());
setCurrentOutputWatermark(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis());
sideInputReader = NullSideInputReader.of(sideInputs);
if (!sideInputs.isEmpty()) {
pushedBackTag = StateTags.bag("pushed-back-values", inputCoder);
FlinkBroadcastStateInternals sideInputStateInternals = new FlinkBroadcastStateInternals<>(getContainingTask().getIndexInSubtaskGroup(), getOperatorStateBackend());
sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
sideInputReader = sideInputHandler;
// maybe init by initializeState
if (pushbackStateInternals == null) {
if (keyCoder != null) {
pushbackStateInternals = new FlinkKeyGroupStateInternals<>(keyCoder, getKeyedStateBackend());
} else {
pushbackStateInternals = new FlinkSplitStateInternals<Object>(getOperatorStateBackend());
}
}
pushedBackWatermark = Optional.absent();
}
outputManager = outputManagerFactory.create(output);
// StatefulPardo or WindowDoFn
if (keyCoder != null) {
stateInternals = new FlinkStateInternals<>((KeyedStateBackend) getKeyedStateBackend(), keyCoder);
timerService = (HeapInternalTimerService<?, TimerInternals.TimerData>) getInternalTimerService("beam-timer", new CoderTypeSerializer<>(timerCoder), this);
timerInternals = new FlinkTimerInternals();
}
// WindowDoFnOperator need use state and timer to get DoFn.
// So must wait StateInternals and TimerInternals ready.
this.doFn = getDoFn();
doFnInvoker = DoFnInvokers.invokerFor(doFn);
doFnInvoker.invokeSetup();
org.apache.beam.runners.core.StepContext stepContext = createStepContext();
doFnRunner = DoFnRunners.simpleRunner(serializedOptions.getPipelineOptions(), doFn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, windowingStrategy);
if (doFn instanceof GroupAlsoByWindowViaWindowSetNewDoFn) {
// When the doFn is this, we know it came from WindowDoFnOperator and
// InputT = KeyedWorkItem<K, V>
// OutputT = KV<K, V>
//
// for some K, V
doFnRunner = DoFnRunners.lateDataDroppingRunner((DoFnRunner) doFnRunner, stepContext, windowingStrategy);
} else if (keyCoder != null) {
// It is a stateful DoFn
StatefulDoFnRunner.CleanupTimer cleanupTimer = new StatefulDoFnRunner.TimeInternalsCleanupTimer(stepContext.timerInternals(), windowingStrategy);
// we don't know the window type
@SuppressWarnings({ "unchecked", "rawtypes" }) Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
@SuppressWarnings({ "unchecked", "rawtypes" }) StatefulDoFnRunner.StateCleaner<?> stateCleaner = new StatefulDoFnRunner.StateInternalsStateCleaner<>(doFn, stepContext.stateInternals(), windowCoder);
doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, doFnRunner, windowingStrategy, cleanupTimer, stateCleaner);
}
if ((serializedOptions.getPipelineOptions().as(FlinkPipelineOptions.class)).getEnableMetrics()) {
doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, getRuntimeContext());
}
pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
}
Aggregations