Search in sources :

Example 1 with RestrictionTracker

use of org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker in project beam by apache.

the class ApexParDoOperator method setup.

@Override
public void setup(OperatorContext context) {
    this.traceTuples = ApexStreamTuple.Logging.isDebugEnabled(pipelineOptions.get(), this);
    SideInputReader sideInputReader = NullSideInputReader.of(sideInputs);
    if (!sideInputs.isEmpty()) {
        sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
        sideInputReader = sideInputHandler;
    }
    for (int i = 0; i < additionalOutputTags.size(); i++) {
        @SuppressWarnings("unchecked") DefaultOutputPort<ApexStreamTuple<?>> port = (DefaultOutputPort<ApexStreamTuple<?>>) additionalOutputPorts[i];
        additionalOutputPortMapping.put(additionalOutputTags.get(i), port);
    }
    NoOpStepContext stepContext = new NoOpStepContext() {

        @Override
        public StateInternals stateInternals() {
            return currentKeyStateInternals;
        }

        @Override
        public TimerInternals timerInternals() {
            return currentKeyTimerInternals;
        }
    };
    DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(pipelineOptions.get(), doFn, sideInputReader, this, mainOutputTag, additionalOutputTags, stepContext, windowingStrategy);
    doFnInvoker = DoFnInvokers.invokerFor(doFn);
    doFnInvoker.invokeSetup();
    if (this.currentKeyStateInternals != null) {
        StatefulDoFnRunner.CleanupTimer cleanupTimer = new StatefulDoFnRunner.TimeInternalsCleanupTimer(stepContext.timerInternals(), windowingStrategy);
        @SuppressWarnings({ "rawtypes" }) Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
        @SuppressWarnings({ "unchecked" }) StatefulDoFnRunner.StateCleaner<?> stateCleaner = new StatefulDoFnRunner.StateInternalsStateCleaner<>(doFn, stepContext.stateInternals(), windowCoder);
        doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, doFnRunner, windowingStrategy, cleanupTimer, stateCleaner);
    }
    pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
    if (doFn instanceof ProcessFn) {
        @SuppressWarnings("unchecked") StateInternalsFactory<String> stateInternalsFactory = (StateInternalsFactory<String>) this.currentKeyStateInternals.getFactory();
        @SuppressWarnings({ "rawtypes", "unchecked" }) ProcessFn<InputT, OutputT, Object, RestrictionTracker<Object>> splittableDoFn = (ProcessFn) doFn;
        splittableDoFn.setStateInternalsFactory(stateInternalsFactory);
        TimerInternalsFactory<String> timerInternalsFactory = new TimerInternalsFactory<String>() {

            @Override
            public TimerInternals timerInternalsForKey(String key) {
                return currentKeyTimerInternals;
            }
        };
        splittableDoFn.setTimerInternalsFactory(timerInternalsFactory);
        splittableDoFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(doFn, pipelineOptions.get(), new OutputWindowedValue<OutputT>() {

            @Override
            public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
                output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane));
            }

            @Override
            public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
                output(tag, WindowedValue.of(output, timestamp, windows, pane));
            }
        }, sideInputReader, Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), 10000, Duration.standardSeconds(10)));
    }
}
Also used : RestrictionTracker(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker) ApexStreamTuple(org.apache.beam.runners.apex.translation.utils.ApexStreamTuple) ProcessFn(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn) SideInputHandler(org.apache.beam.runners.core.SideInputHandler) TupleTag(org.apache.beam.sdk.values.TupleTag) SideInputReader(org.apache.beam.runners.core.SideInputReader) NullSideInputReader(org.apache.beam.runners.core.NullSideInputReader) NoOpStepContext(org.apache.beam.runners.apex.translation.utils.NoOpStepContext) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) DefaultOutputPort(com.datatorrent.api.DefaultOutputPort) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) KeyedWorkItemCoder(org.apache.beam.runners.core.KeyedWorkItemCoder) ListCoder(org.apache.beam.sdk.coders.ListCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) TimerInternalsFactory(org.apache.beam.runners.core.TimerInternalsFactory) Instant(org.joda.time.Instant) StateInternalsFactory(org.apache.beam.runners.core.StateInternalsFactory) Collection(java.util.Collection)

Example 2 with RestrictionTracker

use of org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker in project beam by apache.

the class DoFnSignatures method analyzeExtraParameter.

private static Parameter analyzeExtraParameter(ErrorReporter methodErrors, FnAnalysisContext fnContext, MethodAnalysisContext methodContext, TypeDescriptor<? extends DoFn<?, ?>> fnClass, ParameterDescription param, TypeDescriptor<?> inputT, TypeDescriptor<?> outputT) {
    TypeDescriptor<?> expectedProcessContextT = doFnProcessContextTypeOf(inputT, outputT);
    TypeDescriptor<?> expectedOnTimerContextT = doFnOnTimerContextTypeOf(inputT, outputT);
    TypeDescriptor<?> paramT = param.getType();
    Class<?> rawType = paramT.getRawType();
    ErrorReporter paramErrors = methodErrors.forParameter(param);
    if (rawType.equals(DoFn.ProcessContext.class)) {
        paramErrors.checkArgument(paramT.equals(expectedProcessContextT), "ProcessContext argument must have type %s", formatType(expectedProcessContextT));
        return Parameter.processContext();
    } else if (rawType.equals(DoFn.OnTimerContext.class)) {
        paramErrors.checkArgument(paramT.equals(expectedOnTimerContextT), "OnTimerContext argument must have type %s", formatType(expectedOnTimerContextT));
        return Parameter.onTimerContext();
    } else if (BoundedWindow.class.isAssignableFrom(rawType)) {
        methodErrors.checkArgument(!methodContext.hasWindowParameter(), "Multiple %s parameters", BoundedWindow.class.getSimpleName());
        return Parameter.boundedWindow((TypeDescriptor<? extends BoundedWindow>) paramT);
    } else if (RestrictionTracker.class.isAssignableFrom(rawType)) {
        methodErrors.checkArgument(!methodContext.hasRestrictionTrackerParameter(), "Multiple %s parameters", RestrictionTracker.class.getSimpleName());
        return Parameter.restrictionTracker(paramT);
    } else if (rawType.equals(Timer.class)) {
        // m.getParameters() is not available until Java 8
        String id = getTimerId(param.getAnnotations());
        paramErrors.checkArgument(id != null, "%s missing %s annotation", Timer.class.getSimpleName(), TimerId.class.getSimpleName());
        paramErrors.checkArgument(!methodContext.getTimerParameters().containsKey(id), "duplicate %s: \"%s\"", TimerId.class.getSimpleName(), id);
        TimerDeclaration timerDecl = fnContext.getTimerDeclarations().get(id);
        paramErrors.checkArgument(timerDecl != null, "reference to undeclared %s: \"%s\"", TimerId.class.getSimpleName(), id);
        paramErrors.checkArgument(timerDecl.field().getDeclaringClass().equals(param.getMethod().getDeclaringClass()), "%s %s declared in a different class %s." + " Timers may be referenced only in the lexical scope where they are declared.", TimerId.class.getSimpleName(), id, timerDecl.field().getDeclaringClass().getName());
        return Parameter.timerParameter(timerDecl);
    } else if (State.class.isAssignableFrom(rawType)) {
        // m.getParameters() is not available until Java 8
        String id = getStateId(param.getAnnotations());
        paramErrors.checkArgument(id != null, "missing %s annotation", DoFn.StateId.class.getSimpleName());
        paramErrors.checkArgument(!methodContext.getStateParameters().containsKey(id), "duplicate %s: \"%s\"", DoFn.StateId.class.getSimpleName(), id);
        // By static typing this is already a well-formed State subclass
        TypeDescriptor<? extends State> stateType = (TypeDescriptor<? extends State>) param.getType();
        StateDeclaration stateDecl = fnContext.getStateDeclarations().get(id);
        paramErrors.checkArgument(stateDecl != null, "reference to undeclared %s: \"%s\"", DoFn.StateId.class.getSimpleName(), id);
        paramErrors.checkArgument(stateDecl.stateType().equals(stateType), "reference to %s %s with different type %s", StateId.class.getSimpleName(), id, formatType(stateDecl.stateType()));
        paramErrors.checkArgument(stateDecl.field().getDeclaringClass().equals(param.getMethod().getDeclaringClass()), "%s %s declared in a different class %s." + " State may be referenced only in the class where it is declared.", StateId.class.getSimpleName(), id, stateDecl.field().getDeclaringClass().getName());
        return Parameter.stateParameter(stateDecl);
    } else {
        List<String> allowedParamTypes = Arrays.asList(formatType(new TypeDescriptor<BoundedWindow>() {
        }), formatType(new TypeDescriptor<RestrictionTracker<?>>() {
        }));
        paramErrors.throwIllegalArgument("%s is not a valid context parameter. Should be one of %s", formatType(paramT), allowedParamTypes);
        // Unreachable
        return null;
    }
}
Also used : RestrictionTracker(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker) TimerId(org.apache.beam.sdk.transforms.DoFn.TimerId) TimerDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.TimerDeclaration) DoFn(org.apache.beam.sdk.transforms.DoFn) Timer(org.apache.beam.sdk.state.Timer) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) State(org.apache.beam.sdk.state.State) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) StateDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.StateDeclaration)

Aggregations

RestrictionTracker (org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker)2 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)2 DefaultOutputPort (com.datatorrent.api.DefaultOutputPort)1 ImmutableList (com.google.common.collect.ImmutableList)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 List (java.util.List)1 ApexStreamTuple (org.apache.beam.runners.apex.translation.utils.ApexStreamTuple)1 NoOpStepContext (org.apache.beam.runners.apex.translation.utils.NoOpStepContext)1 KeyedWorkItemCoder (org.apache.beam.runners.core.KeyedWorkItemCoder)1 NullSideInputReader (org.apache.beam.runners.core.NullSideInputReader)1 OutputWindowedValue (org.apache.beam.runners.core.OutputWindowedValue)1 SideInputHandler (org.apache.beam.runners.core.SideInputHandler)1 SideInputReader (org.apache.beam.runners.core.SideInputReader)1 ProcessFn (org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn)1 StateInternalsFactory (org.apache.beam.runners.core.StateInternalsFactory)1 StatefulDoFnRunner (org.apache.beam.runners.core.StatefulDoFnRunner)1 TimerInternalsFactory (org.apache.beam.runners.core.TimerInternalsFactory)1 Coder (org.apache.beam.sdk.coders.Coder)1 KvCoder (org.apache.beam.sdk.coders.KvCoder)1