use of org.apache.beam.runners.core.DoFnRunners.OutputManager in project beam by apache.
the class SimpleParDoFn method reallyStartBundle.
private void reallyStartBundle() throws Exception {
checkState(fnRunner == null, "bundle already started (or not properly finished)");
OutputManager outputManager = new OutputManager() {
final Map<TupleTag<?>, OutputReceiver> undeclaredOutputs = new HashMap<>();
@Nullable
private Receiver getReceiverOrNull(TupleTag<?> tag) {
Integer receiverIndex = outputTupleTagsToReceiverIndices.get(tag);
if (receiverIndex != null) {
return receivers[receiverIndex];
} else {
return undeclaredOutputs.get(tag);
}
}
@Override
public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
outputsPerElementTracker.onOutput();
Receiver receiver = getReceiverOrNull(tag);
if (receiver == null) {
// A new undeclared output.
// TODO: plumb through the operationName, so that we can
// name implicit outputs after it.
String outputName = "implicit-" + tag.getId();
// TODO: plumb through the counter prefix, so we can
// make it available to the OutputReceiver class in case
// it wants to use it in naming output counterFactory. (It
// doesn't today.)
OutputReceiver undeclaredReceiver = new OutputReceiver();
ElementCounter outputCounter = new DataflowOutputCounter(outputName, counterFactory, stepContext.getNameContext());
undeclaredReceiver.addOutputCounter(outputCounter);
undeclaredOutputs.put(tag, undeclaredReceiver);
receiver = undeclaredReceiver;
}
try {
receiver.process(output);
} catch (RuntimeException | Error e) {
// via a chain of DoFn's.
throw e;
} catch (Exception e) {
// with other Receivers.
throw new RuntimeException(e);
}
}
};
fnInfo = (DoFnInfo) doFnInstanceManager.get();
fnSignature = DoFnSignatures.getSignature(fnInfo.getDoFn().getClass());
fnRunner = runnerFactory.createRunner(fnInfo.getDoFn(), options, mainOutputTag, sideOutputTags, fnInfo.getSideInputViews(), sideInputReader, fnInfo.getInputCoder(), fnInfo.getOutputCoders(), fnInfo.getWindowingStrategy(), stepContext, userStepContext, outputManager, doFnSchemaInformation, sideInputMapping);
fnRunner.startBundle();
}
use of org.apache.beam.runners.core.DoFnRunners.OutputManager in project beam by apache.
the class ProcessBundleHandler method createDoFnRunner.
/**
* Converts a {@link org.apache.beam.fn.v1.BeamFnApi.FunctionSpec} into a {@link DoFnRunner}.
*/
private <InputT, OutputT> DoFnRunner<InputT, OutputT> createDoFnRunner(BeamFnApi.FunctionSpec functionSpec, Map<String, Collection<ThrowingConsumer<WindowedValue<OutputT>>>> outputMap) {
ByteString serializedFn;
try {
serializedFn = functionSpec.getData().unpack(BytesValue.class).getValue();
} catch (InvalidProtocolBufferException e) {
throw new IllegalArgumentException(String.format("Unable to unwrap DoFn %s", functionSpec), e);
}
DoFnInfo<?, ?> doFnInfo = (DoFnInfo<?, ?>) SerializableUtils.deserializeFromByteArray(serializedFn.toByteArray(), "DoFnInfo");
checkArgument(Objects.equals(new HashSet<>(Collections2.transform(outputMap.keySet(), Long::parseLong)), doFnInfo.getOutputMap().keySet()), "Unexpected mismatch between transform output map %s and DoFnInfo output map %s.", outputMap.keySet(), doFnInfo.getOutputMap());
ImmutableMultimap.Builder<TupleTag<?>, ThrowingConsumer<WindowedValue<OutputT>>> tagToOutput = ImmutableMultimap.builder();
for (Map.Entry<Long, TupleTag<?>> entry : doFnInfo.getOutputMap().entrySet()) {
tagToOutput.putAll(entry.getValue(), outputMap.get(Long.toString(entry.getKey())));
}
@SuppressWarnings({ "unchecked", "rawtypes" }) final Map<TupleTag<?>, Collection<ThrowingConsumer<WindowedValue<?>>>> tagBasedOutputMap = (Map) tagToOutput.build().asMap();
OutputManager outputManager = new OutputManager() {
Map<TupleTag<?>, Collection<ThrowingConsumer<WindowedValue<?>>>> tupleTagToOutput = tagBasedOutputMap;
@Override
public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
try {
Collection<ThrowingConsumer<WindowedValue<?>>> consumers = tupleTagToOutput.get(tag);
if (consumers == null) {
/* This is a normal case, e.g., if a DoFn has output but that output is not
* consumed. Drop the output. */
return;
}
for (ThrowingConsumer<WindowedValue<?>> consumer : consumers) {
consumer.accept(output);
}
} catch (Throwable t) {
throw new RuntimeException(t);
}
}
};
@SuppressWarnings({ "unchecked", "rawtypes", "deprecation" }) DoFnRunner<InputT, OutputT> runner = DoFnRunners.simpleRunner(PipelineOptionsFactory.create(), /* TODO */
(DoFn) doFnInfo.getDoFn(), NullSideInputReader.empty(), /* TODO */
outputManager, (TupleTag) doFnInfo.getOutputMap().get(doFnInfo.getMainOutput()), new ArrayList<>(doFnInfo.getOutputMap().values()), new FakeStepContext(), (WindowingStrategy) doFnInfo.getWindowingStrategy());
return runner;
}
use of org.apache.beam.runners.core.DoFnRunners.OutputManager in project beam by apache.
the class SplittableProcessElementsEvaluatorFactory method createEvaluator.
@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
final ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform = application.getTransform();
ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn = transform.newProcessFn(transform.getFn());
DoFnLifecycleManager fnManager = DoFnLifecycleManager.of(processFn);
processFn = ((ProcessFn<InputT, OutputT, RestrictionT, TrackerT>) fnManager.<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>get());
String stepName = evaluationContext.getStepName(application);
final DirectExecutionContext.DirectStepContext stepContext = evaluationContext.getExecutionContext(application, inputBundle.getKey()).getStepContext(stepName);
final ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>> parDoEvaluator = delegateFactory.createParDoEvaluator(application, inputBundle.getKey(), transform.getSideInputs(), transform.getMainOutputTag(), transform.getAdditionalOutputTags().getAll(), stepContext, processFn, fnManager);
processFn.setStateInternalsFactory(new StateInternalsFactory<String>() {
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public StateInternals stateInternalsForKey(String key) {
return (StateInternals) stepContext.stateInternals();
}
});
processFn.setTimerInternalsFactory(new TimerInternalsFactory<String>() {
@Override
public TimerInternals timerInternalsForKey(String key) {
return stepContext.timerInternals();
}
});
OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {
private final OutputManager outputManager = parDoEvaluator.getOutputManager();
@Override
public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
}
@Override
public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
}
};
processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<InputT, OutputT, RestrictionT, TrackerT>(transform.getFn(), evaluationContext.getPipelineOptions(), outputWindowedValue, evaluationContext.createSideInputReader(transform.getSideInputs()), // DirectRunner.
Executors.newSingleThreadScheduledExecutor(new ThreadFactoryBuilder().setThreadFactory(MoreExecutors.platformThreadFactory()).setDaemon(true).setNameFormat("direct-splittable-process-element-checkpoint-executor").build()), 10000, Duration.standardSeconds(10)));
return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(parDoEvaluator, fnManager);
}
use of org.apache.beam.runners.core.DoFnRunners.OutputManager in project beam by apache.
the class StatefulDoFnRunnerTest method testOutput.
private void testOutput(boolean ordered, BiFunction<MyDoFn, OutputManager, DoFnRunner<KV<String, Integer>, Integer>> runnerFactory) throws Exception {
timerInternals.advanceInputWatermark(new Instant(1L));
MyDoFn fn = MyDoFn.create(ordered);
StateTag<ValueState<Integer>> stateTag = StateTags.tagForSpec(MyDoFn.STATE_ID, fn.intState());
List<KV<TupleTag<?>, WindowedValue<?>>> outputs = new ArrayList<>();
OutputManager output = asOutputManager(outputs);
DoFnRunner<KV<String, Integer>, Integer> runner = runnerFactory.apply(fn, output);
Instant elementTime = new Instant(5);
// write two elements, with descending timestamps
runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime, WINDOW_1, PaneInfo.NO_FIRING));
runner.processElement(WindowedValue.of(KV.of("hello", 2), elementTime.minus(Duration.millis(1)), WINDOW_1, PaneInfo.NO_FIRING));
if (ordered) {
// move forward in time so that the input might get flushed
advanceInputWatermark(timerInternals, elementTime.plus(Duration.millis(ALLOWED_LATENESS + 1)), runner);
}
assertEquals(3, (int) stateInternals.state(windowNamespace(WINDOW_1), stateTag).read());
assertEquals(2, outputs.size());
if (ordered) {
assertEquals(Arrays.asList(KV.of(outputTag, WindowedValue.of(2, elementTime.minus(Duration.millis(1)), WINDOW_1, PaneInfo.NO_FIRING)), KV.of(outputTag, WindowedValue.of(3, elementTime, WINDOW_1, PaneInfo.NO_FIRING))), outputs);
} else {
assertEquals(Arrays.asList(KV.of(outputTag, WindowedValue.of(1, elementTime, WINDOW_1, PaneInfo.NO_FIRING)), KV.of(outputTag, WindowedValue.of(3, elementTime.minus(Duration.millis(1)), WINDOW_1, PaneInfo.NO_FIRING))), outputs);
}
outputs.clear();
// another window
elementTime = elementTime.plus(Duration.millis(WINDOW_SIZE));
runner.processElement(WindowedValue.of(KV.of("hello", 1), elementTime, WINDOW_2, PaneInfo.NO_FIRING));
runner.processElement(WindowedValue.of(KV.of("hello", 2), elementTime.minus(Duration.millis(1)), WINDOW_2, PaneInfo.NO_FIRING));
runner.processElement(WindowedValue.of(KV.of("hello", 3), elementTime.minus(Duration.millis(2)), WINDOW_2, PaneInfo.NO_FIRING));
if (ordered) {
// move forward in time so that the input might get flushed
advanceInputWatermark(timerInternals, elementTime.plus(Duration.millis(ALLOWED_LATENESS + 1)), runner);
}
assertEquals(6, (int) stateInternals.state(windowNamespace(WINDOW_2), stateTag).read());
assertEquals(3, outputs.size());
if (ordered) {
assertEquals(Arrays.asList(KV.of(outputTag, WindowedValue.of(3, elementTime.minus(Duration.millis(2)), WINDOW_2, PaneInfo.NO_FIRING)), KV.of(outputTag, WindowedValue.of(5, elementTime.minus(Duration.millis(1)), WINDOW_2, PaneInfo.NO_FIRING)), KV.of(outputTag, WindowedValue.of(6, elementTime, WINDOW_2, PaneInfo.NO_FIRING))), outputs);
} else {
assertEquals(Arrays.asList(KV.of(outputTag, WindowedValue.of(1, elementTime, WINDOW_2, PaneInfo.NO_FIRING)), KV.of(outputTag, WindowedValue.of(3, elementTime.minus(Duration.millis(1)), WINDOW_2, PaneInfo.NO_FIRING)), KV.of(outputTag, WindowedValue.of(6, elementTime.minus(Duration.millis(2)), WINDOW_2, PaneInfo.NO_FIRING))), outputs);
}
}
use of org.apache.beam.runners.core.DoFnRunners.OutputManager in project beam by apache.
the class SplittableProcessElementsEvaluatorFactory method createEvaluator.
@SuppressWarnings({ "unchecked", "rawtypes" })
private TransformEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> createEvaluator(AppliedPTransform<PCollection<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>, PCollectionTuple, ProcessElements<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT>> application, CommittedBundle<InputT> inputBundle) throws Exception {
final ProcessElements<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> transform = application.getTransform();
final DoFnLifecycleManagerRemovingTransformEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> evaluator = delegateFactory.createEvaluator((AppliedPTransform) application, (PCollection<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>>) inputBundle.getPCollection(), inputBundle.getKey(), application.getTransform().getSideInputs(), application.getTransform().getMainOutputTag(), application.getTransform().getAdditionalOutputTags().getAll(), DoFnSchemaInformation.create(), application.getTransform().getSideInputMapping());
final ParDoEvaluator<KeyedWorkItem<byte[], KV<InputT, RestrictionT>>> pde = evaluator.getParDoEvaluator();
final ProcessFn<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> processFn = (ProcessFn<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT>) ProcessFnRunner.class.cast(pde.getFnRunner()).getFn();
final DirectExecutionContext.DirectStepContext stepContext = pde.getStepContext();
processFn.setStateInternalsFactory(key -> stepContext.stateInternals());
processFn.setTimerInternalsFactory(key -> stepContext.timerInternals());
OutputWindowedValue<OutputT> outputWindowedValue = new OutputWindowedValue<OutputT>() {
private final OutputManager outputManager = pde.getOutputManager();
@Override
public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
}
@Override
public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
}
};
SideInputReader sideInputReader = evaluationContext.createSideInputReader(transform.getSideInputs());
processFn.setSideInputReader(sideInputReader);
processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(transform.getFn(), options, outputWindowedValue, sideInputReader, ses, // splittable DoFn's in that respect.
100, Duration.standardSeconds(1), stepContext::bundleFinalizer));
return evaluator;
}
Aggregations