use of org.apache.beam.runners.spark.util.SparkSideInputReader in project beam by apache.
the class MultiDoFnFunction method call.
@Override
public Iterator<Tuple2<TupleTag<?>, WindowedValue<?>>> call(Iterator<WindowedValue<InputT>> iter) throws Exception {
if (!wasSetupCalled && iter.hasNext()) {
DoFnInvokers.tryInvokeSetupFor(doFn, options.get());
wasSetupCalled = true;
}
DoFnOutputManager outputManager = new DoFnOutputManager();
final InMemoryTimerInternals timerInternals;
final StepContext context;
// Now only implements the StatefulParDo in Batch mode.
Object key = null;
if (stateful) {
if (iter.hasNext()) {
WindowedValue<InputT> currentValue = iter.next();
key = ((KV) currentValue.getValue()).getKey();
iter = Iterators.concat(Iterators.singletonIterator(currentValue), iter);
}
final InMemoryStateInternals<?> stateInternals = InMemoryStateInternals.forKey(key);
timerInternals = new InMemoryTimerInternals();
context = new StepContext() {
@Override
public StateInternals stateInternals() {
return stateInternals;
}
@Override
public TimerInternals timerInternals() {
return timerInternals;
}
};
} else {
timerInternals = null;
context = new SparkProcessContext.NoOpStepContext();
}
final DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(options.get(), doFn, CachedSideInputReader.of(new SparkSideInputReader(sideInputs)), outputManager, mainOutputTag, additionalOutputTags, context, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
DoFnRunnerWithMetrics<InputT, OutputT> doFnRunnerWithMetrics = new DoFnRunnerWithMetrics<>(stepName, doFnRunner, metricsAccum);
return new SparkProcessContext<>(doFn, doFnRunnerWithMetrics, outputManager, key, stateful ? new TimerDataIterator(timerInternals) : Collections.emptyIterator()).processPartition(iter).iterator();
}
Aggregations