Search in sources :

Example 1 with OutputManager

use of org.apache.beam.runners.core.DoFnRunners.OutputManager in project beam by apache.

the class SimpleParDoFn method reallyStartBundle.

private void reallyStartBundle() throws Exception {
    checkState(fnRunner == null, "bundle already started (or not properly finished)");
    OutputManager outputManager = new OutputManager() {

        final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs = new HashMap<>();

        @Nullable
        private Receiver getReceiverOrNull(TupleTag<?> tag) {
            Integer receiverIndex = outputTupleTagsToReceiverIndices.get(tag);
            if (receiverIndex != null) {
                return receivers[receiverIndex];
            } else {
                return undeclaredOutputs.get(tag);
            }
        }

        @Override
        public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
            outputsPerElementTracker.onOutput();
            Receiver receiver = getReceiverOrNull(tag);
            if (receiver == null) {
                // A new undeclared output.
                // TODO: plumb through the operationName, so that we can
                // name implicit outputs after it.
                String outputName = "implicit-" + tag.getId();
                // TODO: plumb through the counter prefix, so we can
                // make it available to the OutputReceiver class in case
                // it wants to use it in naming output counterFactory.  (It
                // doesn't today.)
                OutputReceiver undeclaredReceiver = new OutputReceiver();
                ElementCounter outputCounter = new DataflowOutputCounter(outputName, counterFactory, stepContext.getNameContext());
                undeclaredReceiver.addOutputCounter(outputCounter);
                undeclaredOutputs.put(tag, undeclaredReceiver);
                receiver = undeclaredReceiver;
            }
            try {
                receiver.process(output);
            } catch (RuntimeException | Error e) {
                // via a chain of DoFn's.
                throw e;
            } catch (Exception e) {
                // with other Receivers.
                throw new RuntimeException(e);
            }
        }
    };
    fnInfo = (DoFnInfo) doFnInstanceManager.get();
    fnSignature = DoFnSignatures.getSignature(fnInfo.getDoFn().getClass());
    fnRunner = runnerFactory.createRunner(fnInfo.getDoFn(), options, mainOutputTag, sideOutputTags, fnInfo.getSideInputViews(), sideInputReader, fnInfo.getInputCoder(), fnInfo.getOutputCoders(), fnInfo.getWindowingStrategy(), stepContext, userStepContext, outputManager, doFnSchemaInformation, sideInputMapping);
    fnRunner.startBundle();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) Receiver(org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) WindowedValue(org.apache.beam.sdk.util.WindowedValue) HashMap(java.util.HashMap) Map(java.util.Map) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager) ElementCounter(org.apache.beam.runners.dataflow.worker.util.common.worker.ElementCounter)

Example 2 with OutputManager

use of org.apache.beam.runners.core.DoFnRunners.OutputManager in project beam by apache.

the class ProcessBundleHandler method createDoFnRunner.

/**
   * Converts a {@link org.apache.beam.fn.v1.BeamFnApi.FunctionSpec} into a {@link DoFnRunner}.
   */
private <InputT, OutputT> DoFnRunner<InputT, OutputT> createDoFnRunner(BeamFnApi.FunctionSpec functionSpec, Map<String, Collection<ThrowingConsumer<WindowedValue<OutputT>>>> outputMap) {
    ByteString serializedFn;
    try {
        serializedFn = functionSpec.getData().unpack(BytesValue.class).getValue();
    } catch (InvalidProtocolBufferException e) {
        throw new IllegalArgumentException(String.format("Unable to unwrap DoFn %s", functionSpec), e);
    }
    DoFnInfo<?, ?> doFnInfo = (DoFnInfo<?, ?>) SerializableUtils.deserializeFromByteArray(serializedFn.toByteArray(), "DoFnInfo");
    checkArgument(Objects.equals(new HashSet<>(Collections2.transform(outputMap.keySet(), Long::parseLong)), doFnInfo.getOutputMap().keySet()), "Unexpected mismatch between transform output map %s and DoFnInfo output map %s.", outputMap.keySet(), doFnInfo.getOutputMap());
    ImmutableMultimap.Builder<TupleTag<?>, ThrowingConsumer<WindowedValue<OutputT>>> tagToOutput = ImmutableMultimap.builder();
    for (Map.Entry<Long, TupleTag<?>> entry : doFnInfo.getOutputMap().entrySet()) {
        tagToOutput.putAll(entry.getValue(), outputMap.get(Long.toString(entry.getKey())));
    }
    @SuppressWarnings({ "unchecked", "rawtypes" }) final Map<TupleTag<?>, Collection<ThrowingConsumer<WindowedValue<?>>>> tagBasedOutputMap = (Map) tagToOutput.build().asMap();
    OutputManager outputManager = new OutputManager() {

        Map<TupleTag<?>, Collection<ThrowingConsumer<WindowedValue<?>>>> tupleTagToOutput = tagBasedOutputMap;

        @Override
        public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
            try {
                Collection<ThrowingConsumer<WindowedValue<?>>> consumers = tupleTagToOutput.get(tag);
                if (consumers == null) {
                    /* This is a normal case, e.g., if a DoFn has output but that output is not
                 * consumed. Drop the output. */
                    return;
                }
                for (ThrowingConsumer<WindowedValue<?>> consumer : consumers) {
                    consumer.accept(output);
                }
            } catch (Throwable t) {
                throw new RuntimeException(t);
            }
        }
    };
    @SuppressWarnings({ "unchecked", "rawtypes", "deprecation" }) DoFnRunner<InputT, OutputT> runner = DoFnRunners.simpleRunner(PipelineOptionsFactory.create(), /* TODO */
    (DoFn) doFnInfo.getDoFn(), NullSideInputReader.empty(), /* TODO */
    outputManager, (TupleTag) doFnInfo.getOutputMap().get(doFnInfo.getMainOutput()), new ArrayList<>(doFnInfo.getOutputMap().values()), new FakeStepContext(), (WindowingStrategy) doFnInfo.getWindowingStrategy());
    return runner;
}
Also used : ByteString(com.google.protobuf.ByteString) TupleTag(org.apache.beam.sdk.values.TupleTag) FakeStepContext(org.apache.beam.fn.harness.fake.FakeStepContext) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) HashSet(java.util.HashSet) DoFnInfo(org.apache.beam.runners.dataflow.util.DoFnInfo) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) Collection(java.util.Collection) ThrowingConsumer(org.apache.beam.fn.harness.fn.ThrowingConsumer) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager)

Example 3 with OutputManager

use of org.apache.beam.runners.core.DoFnRunners.OutputManager in project beam by apache.

the class SplittableProcessElementsEvaluatorFactory method createEvaluator.

@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
    final ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform = application.getTransform();
    ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn = transform.newProcessFn(transform.getFn());
    DoFnLifecycleManager fnManager = DoFnLifecycleManager.of(processFn);
    processFn = ((ProcessFn<InputT, OutputT, RestrictionT, TrackerT>) fnManager.<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>get());
    String stepName = evaluationContext.getStepName(application);
    final DirectExecutionContext.DirectStepContext stepContext = evaluationContext.getExecutionContext(application, inputBundle.getKey()).getStepContext(stepName);
    final ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> parDoEvaluator = delegateFactory.createParDoEvaluator(application, inputBundle.getKey(), transform.getSideInputs(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), stepContext, processFn, fnManager);
    processFn.setStateInternalsFactory(new StateInternalsFactory<String>() {

        @SuppressWarnings({ "unchecked", "rawtypes" })
        @Override
        public StateInternals stateInternalsForKey(String key) {
            return (StateInternals) stepContext.stateInternals();
        }
    });
    processFn.setTimerInternalsFactory(new TimerInternalsFactory<String>() {

        @Override
        public TimerInternals timerInternalsForKey(String key) {
            return stepContext.timerInternals();
        }
    });
    OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {

        private final OutputManager outputManager = parDoEvaluator.getOutputManager();

        @Override
        public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
        }

        @Override
        public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
        }
    };
    processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, TrackerT>(transform.getFn(), evaluationContext.getPipelineOptions(), outputWindowedValue, evaluationContext.createSideInputReader(transform.getSideInputs()), // DirectRunner.
    Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setThreadFactory(MoreExecutors.platformThreadFactory()).setDaemon(true).setNameFormat("direct-splittable-process-element-checkpoint-executor").build()), 10000, Duration.standardSeconds(10)));
    return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(parDoEvaluator, fnManager);
}
Also used : ProcessFn(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn) TupleTag(org.apache.beam.sdk.values.TupleTag) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) ElementAndRestriction(org.apache.beam.runners.core.construction.ElementAndRestriction) OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) Instant(org.joda.time.Instant) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) TimerInternals(org.apache.beam.runners.core.TimerInternals) StateInternals(org.apache.beam.runners.core.StateInternals) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager)

Example 4 with OutputManager

use of org.apache.beam.runners.core.DoFnRunners.OutputManager in project beam by apache.

the class StatefulDoFnRunnerTest method testOutput.

private void testOutput(boolean ordered, BiFunction<MyDoFn, OutputManager, DoFnRunner<KV<String, Integer>, Integer>> runnerFactory) throws Exception {
    timerInternals.advanceInputWatermark(new Instant(1L));
    MyDoFn fn = MyDoFn.create(ordered);
    StateTag<ValueState<Integer>> stateTag = StateTags.tagForSpec(MyDoFn.STATE_ID, fn.intState());
    List<KV<TupleTag<?>, WindowedValue<?>>> outputs = new ArrayList<>();
    OutputManager output = asOutputManager(outputs);
    DoFnRunner<KV<String, Integer>, Integer> runner = runnerFactory.apply(fn, output);
    Instant elementTime = new Instant(5);
    // write two elements, with descending timestamps
    runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime, WINDOW_1, PaneInfo.NO_FIRING));
    runner.processElement(WindowedValue.of(KV.of("hello", 2), elementTime.minus(Duration.millis(1)), WINDOW_1, PaneInfo.NO_FIRING));
    if (ordered) {
        // move forward in time so that the input might get flushed
        advanceInputWatermark(timerInternals, elementTime.plus(Duration.millis(ALLOWED_LATENESS + 1)), runner);
    }
    assertEquals(3, (int) stateInternals.state(windowNamespace(WINDOW_1), stateTag).read());
    assertEquals(2, outputs.size());
    if (ordered) {
        assertEquals(Arrays.asList(KV.of(outputTag, WindowedValue.of(2, elementTime.minus(Duration.millis(1)), WINDOW_1, PaneInfo.NO_FIRING)), KV.of(outputTag, WindowedValue.of(3, elementTime, WINDOW_1, PaneInfo.NO_FIRING))), outputs);
    } else {
        assertEquals(Arrays.asList(KV.of(outputTag, WindowedValue.of(1, elementTime, WINDOW_1, PaneInfo.NO_FIRING)), KV.of(outputTag, WindowedValue.of(3, elementTime.minus(Duration.millis(1)), WINDOW_1, PaneInfo.NO_FIRING))), outputs);
    }
    outputs.clear();
    // another window
    elementTime = elementTime.plus(Duration.millis(WINDOW_SIZE));
    runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime, WINDOW_2, PaneInfo.NO_FIRING));
    runner.processElement(WindowedValue.of(KV.of("hello", 2), elementTime.minus(Duration.millis(1)), WINDOW_2, PaneInfo.NO_FIRING));
    runner.processElement(WindowedValue.of(KV.of("hello", 3), elementTime.minus(Duration.millis(2)), WINDOW_2, PaneInfo.NO_FIRING));
    if (ordered) {
        // move forward in time so that the input might get flushed
        advanceInputWatermark(timerInternals, elementTime.plus(Duration.millis(ALLOWED_LATENESS + 1)), runner);
    }
    assertEquals(6, (int) stateInternals.state(windowNamespace(WINDOW_2), stateTag).read());
    assertEquals(3, outputs.size());
    if (ordered) {
        assertEquals(Arrays.asList(KV.of(outputTag, WindowedValue.of(3, elementTime.minus(Duration.millis(2)), WINDOW_2, PaneInfo.NO_FIRING)), KV.of(outputTag, WindowedValue.of(5, elementTime.minus(Duration.millis(1)), WINDOW_2, PaneInfo.NO_FIRING)), KV.of(outputTag, WindowedValue.of(6, elementTime, WINDOW_2, PaneInfo.NO_FIRING))), outputs);
    } else {
        assertEquals(Arrays.asList(KV.of(outputTag, WindowedValue.of(1, elementTime, WINDOW_2, PaneInfo.NO_FIRING)), KV.of(outputTag, WindowedValue.of(3, elementTime.minus(Duration.millis(1)), WINDOW_2, PaneInfo.NO_FIRING)), KV.of(outputTag, WindowedValue.of(6, elementTime.minus(Duration.millis(2)), WINDOW_2, PaneInfo.NO_FIRING))), outputs);
    }
}
Also used : ValueState(org.apache.beam.sdk.state.ValueState) Instant(org.joda.time.Instant) ArrayList(java.util.ArrayList) KV(org.apache.beam.sdk.values.KV) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager)

Example 5 with OutputManager

use of org.apache.beam.runners.core.DoFnRunners.OutputManager in project beam by apache.

the class SplittableProcessElementsEvaluatorFactory method createEvaluator.

@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
    final ProcessElements<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> transform = application.getTransform();
    final DoFnLifecycleManagerRemovingTransformEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> evaluator = delegateFactory.createEvaluator((AppliedPTransform) application, (PCollection<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>) inputBundle.getPCollection(), inputBundle.getKey(), application.getTransform().getSideInputs(), application.getTransform().getMainOutputTag(), application.getTransform().getAdditionalOutputTags().getAll(), DoFnSchemaInformation.create(), application.getTransform().getSideInputMapping());
    final ParDoEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> pde = evaluator.getParDoEvaluator();
    final ProcessFn<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> processFn = (ProcessFn<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT>) ProcessFnRunner.class.cast(pde.getFnRunner()).getFn();
    final DirectExecutionContext.DirectStepContext stepContext = pde.getStepContext();
    processFn.setStateInternalsFactory(key -> stepContext.stateInternals());
    processFn.setTimerInternalsFactory(key -> stepContext.timerInternals());
    OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {

        private final OutputManager outputManager = pde.getOutputManager();

        @Override
        public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
        }

        @Override
        public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
            outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
        }
    };
    SideInputReader sideInputReader = evaluationContext.createSideInputReader(transform.getSideInputs());
    processFn.setSideInputReader(sideInputReader);
    processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(transform.getFn(), options, outputWindowedValue, sideInputReader, ses, // splittable DoFn's in that respect.
    100, Duration.standardSeconds(1), stepContext::bundleFinalizer));
    return evaluator;
}
Also used : OutputWindowedValue(org.apache.beam.runners.core.OutputWindowedValue) ProcessFn(org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) SideInputReader(org.apache.beam.runners.core.SideInputReader) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) OutputManager(org.apache.beam.runners.core.DoFnRunners.OutputManager)

Aggregations

OutputManager (org.apache.beam.runners.core.DoFnRunners.OutputManager)7 TupleTag (org.apache.beam.sdk.values.TupleTag)6 WindowedValue (org.apache.beam.sdk.util.WindowedValue)4 Collection (java.util.Collection)3 Map (java.util.Map)3 KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)3 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)3 PCollection (org.apache.beam.sdk.values.PCollection)3 Instant (org.joda.time.Instant)3 HashMap (java.util.HashMap)2 DoFnRunner (org.apache.beam.runners.core.DoFnRunner)2 OutputWindowedValue (org.apache.beam.runners.core.OutputWindowedValue)2 ProcessFn (org.apache.beam.runners.core.SplittableParDoViaKeyedWorkItems.ProcessFn)2 PaneInfo (org.apache.beam.sdk.transforms.windowing.PaneInfo)2 KV (org.apache.beam.sdk.values.KV)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableMultimap (com.google.common.collect.ImmutableMultimap)1 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)1 ByteString (com.google.protobuf.ByteString)1 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)1