Search in sources :

Example 1 with ProcessFn

use of org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn in project beam by apache.

the class ApexParDoOperator method setup.

@Override
public void setup(OperatorContext context) {
    this.traceTuples = ApexStreamTuple.Logging.isDebugEnabled(pipelineOptions.get(), this);
    SideInputReader sideInputReader = NullSideInputReader.of(sideInputs);
    if (!sideInputs.isEmpty()) {
        sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
        sideInputReader = sideInputHandler;
    }
    for (int i = 0; i < additionalOutputTags.size(); i++) {
        @SuppressWarnings("unchecked") DefaultOutputPort<ApexStreamTuple<?>> port = (DefaultOutputPort<ApexStreamTuple<?>>) additionalOutputPorts[i];
        additionalOutputPortMapping.put(additionalOutputTags.get(i), port);
    }
    NoOpStepContext stepContext = new NoOpStepContext() {

        @Override
        public StateInternals stateInternals() {
            return currentKeyStateInternals;
        }

        @Override
        public TimerInternals timerInternals() {
            return currentKeyTimerInternals;
        }
    };
    DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(pipelineOptions.get(), doFn, sideInputReader, this, mainOutputTag, additionalOutputTags, stepContext, windowingStrategy);
    doFnInvoker = DoFnInvokers.invokerFor(doFn);
    doFnInvoker.invokeSetup();
    if (this.currentKeyStateInternals != null) {
        StatefulDoFnRunner.CleanupTimer cleanupTimer = new StatefulDoFnRunner.TimeInternalsCleanupTimer(stepContext.timerInternals(), windowingStrategy);
        @SuppressWarnings({ "rawtypes" }) Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
        @SuppressWarnings({ "unchecked" }) StatefulDoFnRunner.StateCleaner<?> stateCleaner = new StatefulDoFnRunner.StateInternalsStateCleaner<>(doFn, stepContext.stateInternals(), windowCoder);
        doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, doFnRunner, windowingStrategy, cleanupTimer, stateCleaner);
    }
    pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
    if (doFn instanceof ProcessFn) {
        @SuppressWarnings("unchecked") StateInternalsFactory<String> stateInternalsFactory = (StateInternalsFactory<String>) this.currentKeyStateInternals.getFactory();
        @SuppressWarnings({ "rawtypes", "unchecked" }) ProcessFn<InputT, OutputT, Object, RestrictionTracker<Object>> splittableDoFn = (ProcessFn) doFn;
        splittableDoFn.setStateInternalsFactory(stateInternalsFactory);
        TimerInternalsFactory<String> timerInternalsFactory = new TimerInternalsFactory<String>() {

            @Override
            public TimerInternals timerInternalsForKey(String key) {
                return currentKeyTimerInternals;
            }
        };
        splittableDoFn.setTimerInternalsFactory(timerInternalsFactory);
        splittableDoFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(doFn, pipelineOptions.get(), new OutputWindowedValue<OutputT>() {

            @Override
            public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
                output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane));
            }

            @Override
            public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
                output(tag, WindowedValue.of(output, timestamp, windows, pane));
            }
        }, sideInputReader, Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), 10000, Duration.standardSeconds(10)));
    }
}
Also used : RestrictionTracker(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker) ApexStreamTuple(org.apache.beam.runners.apex.translation.utils.ApexStreamTuple) ProcessFn(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn) SideInputHandler(org.apache.beam.runners.core.SideInputHandler) TupleTag(org.apache.beam.sdk.values.TupleTag) SideInputReader(org.apache.beam.runners.core.SideInputReader) NullSideInputReader(org.apache.beam.runners.core.NullSideInputReader) NoOpStepContext(org.apache.beam.runners.apex.translation.utils.NoOpStepContext) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) DefaultOutputPort(com.datatorrent.api.DefaultOutputPort) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) KeyedWorkItemCoder(org.apache.beam.runners.core.KeyedWorkItemCoder) ListCoder(org.apache.beam.sdk.coders.ListCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) TimerInternalsFactory(org.apache.beam.runners.core.TimerInternalsFactory) Instant(org.joda.time.Instant) StateInternalsFactory(org.apache.beam.runners.core.StateInternalsFactory) Collection(java.util.Collection)

Example 2 with ProcessFn

use of org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn in project beam by apache.

the class SplittableProcessElementsEvaluatorFactory method createEvaluator.

@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
    final ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform = application.getTransform();
    ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn = transform.newProcessFn(transform.getFn());
    DoFnLifecycleManager fnManager = DoFnLifecycleManager.of(processFn);
    processFn = ((ProcessFn<InputT, OutputT, RestrictionT, TrackerT>) fnManager.<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>get());
    String stepName = evaluationContext.getStepName(application);
    final DirectExecutionContext.DirectStepContext stepContext = evaluationContext.getExecutionContext(application, inputBundle.getKey()).getStepContext(stepName);
    final ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> parDoEvaluator = delegateFactory.createParDoEvaluator(application, inputBundle.getKey(), transform.getSideInputs(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), stepContext, processFn, fnManager);
    processFn.setStateInternalsFactory(new StateInternalsFactory<String>() {

        @SuppressWarnings({ "unchecked", "rawtypes" })
        @Override
        public StateInternals stateInternalsForKey(String key) {
            return (StateInternals) stepContext.stateInternals();
        }
    });
    processFn.setTimerInternalsFactory(new TimerInternalsFactory<String>() {

        @Override
        public TimerInternals timerInternalsForKey(String key) {
            return stepContext.timerInternals();
        }
    });
    OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {

        private final OutputManager outputManager = parDoEvaluator.getOutputManager();

        @Override
        public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
        }

        @Override
        public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
        }
    };
    processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, TrackerT>(transform.getFn(), evaluationContext.getPipelineOptions(), outputWindowedValue, evaluationContext.createSideInputReader(transform.getSideInputs()), // DirectRunner.
    Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setThreadFactory(MoreExecutors.platformThreadFactory()).setDaemon(true).setNameFormat("direct-splittable-process-element-checkpoint-executor").build()), 10000, Duration.standardSeconds(10)));
    return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(parDoEvaluator, fnManager);
}
Also used : ProcessFn(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn) TupleTag(org.apache.beam.sdk.values.TupleTag) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ElementAndRestriction(org.apache.beam.runners.core.construction.ElementAndRestriction) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) Instant(org.joda.time.Instant) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) TimerInternals(org.apache.beam.runners.core.TimerInternals) StateInternals(org.apache.beam.runners.core.StateInternals) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager)

Aggregations

Collection (java.util.Collection)2 OutputWindowedValue (org.apache.beam.runners.core.OutputWindowedValue)2 ProcessFn (org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn)2 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)2 PaneInfo (org.apache.beam.sdk.transforms.windowing.PaneInfo)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2 Instant (org.joda.time.Instant)2 DefaultOutputPort (com.datatorrent.api.DefaultOutputPort)1 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)1 ApexStreamTuple (org.apache.beam.runners.apex.translation.utils.ApexStreamTuple)1 NoOpStepContext (org.apache.beam.runners.apex.translation.utils.NoOpStepContext)1 OutputManager (org.apache.beam.runners.core.DoFnRunners.OutputManager)1 KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)1 KeyedWorkItemCoder (org.apache.beam.runners.core.KeyedWorkItemCoder)1 NullSideInputReader (org.apache.beam.runners.core.NullSideInputReader)1 SideInputHandler (org.apache.beam.runners.core.SideInputHandler)1 SideInputReader (org.apache.beam.runners.core.SideInputReader)1 StateInternals (org.apache.beam.runners.core.StateInternals)1 StateInternalsFactory (org.apache.beam.runners.core.StateInternalsFactory)1 StatefulDoFnRunner (org.apache.beam.runners.core.StatefulDoFnRunner)1