Search in sources :

Example 1 with GroupAlsoByWindowViaWindowSetNewDoFn

use of org.apache.beam.runners.core.GroupAlsoByWindowViaWindowSetNewDoFn in project beam by apache.

the class DoFnOperator method open.

@Override
public void open() throws Exception {
    super.open();
    setCurrentInputWatermark(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis());
    setCurrentSideInputWatermark(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis());
    setCurrentOutputWatermark(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis());
    sideInputReader = NullSideInputReader.of(sideInputs);
    if (!sideInputs.isEmpty()) {
        pushedBackTag = StateTags.bag("pushed-back-values", inputCoder);
        FlinkBroadcastStateInternals sideInputStateInternals = new FlinkBroadcastStateInternals<>(getContainingTask().getIndexInSubtaskGroup(), getOperatorStateBackend());
        sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
        sideInputReader = sideInputHandler;
        // maybe init by initializeState
        if (pushbackStateInternals == null) {
            if (keyCoder != null) {
                pushbackStateInternals = new FlinkKeyGroupStateInternals<>(keyCoder, getKeyedStateBackend());
            } else {
                pushbackStateInternals = new FlinkSplitStateInternals<Object>(getOperatorStateBackend());
            }
        }
        pushedBackWatermark = Optional.absent();
    }
    outputManager = outputManagerFactory.create(output);
    // StatefulPardo or WindowDoFn
    if (keyCoder != null) {
        stateInternals = new FlinkStateInternals<>((KeyedStateBackend) getKeyedStateBackend(), keyCoder);
        timerService = (HeapInternalTimerService<?, TimerInternals.TimerData>) getInternalTimerService("beam-timer", new CoderTypeSerializer<>(timerCoder), this);
        timerInternals = new FlinkTimerInternals();
    }
    // WindowDoFnOperator need use state and timer to get DoFn.
    // So must wait StateInternals and TimerInternals ready.
    this.doFn = getDoFn();
    doFnInvoker = DoFnInvokers.invokerFor(doFn);
    doFnInvoker.invokeSetup();
    org.apache.beam.runners.core.StepContext stepContext = createStepContext();
    doFnRunner = DoFnRunners.simpleRunner(serializedOptions.getPipelineOptions(), doFn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, windowingStrategy);
    if (doFn instanceof GroupAlsoByWindowViaWindowSetNewDoFn) {
        // When the doFn is this, we know it came from WindowDoFnOperator and
        //   InputT = KeyedWorkItem<K, V>
        //   OutputT = KV<K, V>
        //
        // for some K, V
        doFnRunner = DoFnRunners.lateDataDroppingRunner((DoFnRunner) doFnRunner, stepContext, windowingStrategy);
    } else if (keyCoder != null) {
        // It is a stateful DoFn
        StatefulDoFnRunner.CleanupTimer cleanupTimer = new StatefulDoFnRunner.TimeInternalsCleanupTimer(stepContext.timerInternals(), windowingStrategy);
        // we don't know the window type
        @SuppressWarnings({ "unchecked", "rawtypes" }) Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
        @SuppressWarnings({ "unchecked", "rawtypes" }) StatefulDoFnRunner.StateCleaner<?> stateCleaner = new StatefulDoFnRunner.StateInternalsStateCleaner<>(doFn, stepContext.stateInternals(), windowCoder);
        doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, doFnRunner, windowingStrategy, cleanupTimer, stateCleaner);
    }
    if ((serializedOptions.getPipelineOptions().as(FlinkPipelineOptions.class)).getEnableMetrics()) {
        doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, getRuntimeContext());
    }
    pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
}
Also used : FlinkBroadcastStateInternals(org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkBroadcastStateInternals) KeyedStateBackend(org.apache.flink.runtime.state.KeyedStateBackend) Coder(org.apache.beam.sdk.coders.Coder) SideInputHandler(org.apache.beam.runners.core.SideInputHandler) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) GroupAlsoByWindowViaWindowSetNewDoFn(org.apache.beam.runners.core.GroupAlsoByWindowViaWindowSetNewDoFn) DoFnRunner(org.apache.beam.runners.core.DoFnRunner) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) PushbackSideInputDoFnRunner(org.apache.beam.runners.core.PushbackSideInputDoFnRunner) SimplePushbackSideInputDoFnRunner(org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner)

Aggregations

DoFnRunner (org.apache.beam.runners.core.DoFnRunner)1 GroupAlsoByWindowViaWindowSetNewDoFn (org.apache.beam.runners.core.GroupAlsoByWindowViaWindowSetNewDoFn)1 PushbackSideInputDoFnRunner (org.apache.beam.runners.core.PushbackSideInputDoFnRunner)1 SideInputHandler (org.apache.beam.runners.core.SideInputHandler)1 SimplePushbackSideInputDoFnRunner (org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner)1 StatefulDoFnRunner (org.apache.beam.runners.core.StatefulDoFnRunner)1 TimerData (org.apache.beam.runners.core.TimerInternals.TimerData)1 FlinkBroadcastStateInternals (org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkBroadcastStateInternals)1 Coder (org.apache.beam.sdk.coders.Coder)1 KeyedStateBackend (org.apache.flink.runtime.state.KeyedStateBackend)1