use of org.apache.beam.runners.core.SideInputHandler in project beam by apache.
the class DoFnOperator method initializeState.
@Override
public void initializeState(StateInitializationContext context) throws Exception {
super.initializeState(context);
ListStateDescriptor<WindowedValue<InputT>> pushedBackStateDescriptor = new ListStateDescriptor<>("pushed-back-elements", new CoderTypeSerializer<>(windowedInputCoder, serializedOptions));
if (keySelector != null) {
pushedBackElementsHandler = KeyedPushedBackElementsHandler.create(keySelector, getKeyedStateBackend(), pushedBackStateDescriptor);
} else {
ListState<WindowedValue<InputT>> listState = getOperatorStateBackend().getListState(pushedBackStateDescriptor);
pushedBackElementsHandler = NonKeyedPushedBackElementsHandler.create(listState);
}
currentInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis();
currentSideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis();
currentOutputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis();
sideInputReader = NullSideInputReader.of(sideInputs);
if (!sideInputs.isEmpty()) {
FlinkBroadcastStateInternals sideInputStateInternals = new FlinkBroadcastStateInternals<>(getContainingTask().getIndexInSubtaskGroup(), getOperatorStateBackend(), serializedOptions);
sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
sideInputReader = sideInputHandler;
Stream<WindowedValue<InputT>> pushedBack = pushedBackElementsHandler.getElements();
long min = pushedBack.map(v -> v.getTimestamp().getMillis()).reduce(Long.MAX_VALUE, Math::min);
pushedBackWatermark = min;
} else {
pushedBackWatermark = Long.MAX_VALUE;
}
// StatefulPardo or WindowDoFn
if (keyCoder != null) {
keyedStateInternals = new FlinkStateInternals<>((KeyedStateBackend) getKeyedStateBackend(), keyCoder, serializedOptions);
if (timerService == null) {
timerService = getInternalTimerService("beam-timer", new CoderTypeSerializer<>(timerCoder, serializedOptions), this);
}
timerInternals = new FlinkTimerInternals();
timeServiceManagerCompat = getTimeServiceManagerCompat();
}
outputManager = outputManagerFactory.create(output, getLockToAcquireForStateAccessDuringBundles(), getOperatorStateBackend());
}
use of org.apache.beam.runners.core.SideInputHandler in project beam by apache.
the class DoFnOp method open.
@Override
@SuppressWarnings("unchecked")
public void open(Config config, Context context, Scheduler<KeyedTimerData<Void>> timerRegistry, OpEmitter<OutT> emitter) {
this.inputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
this.sideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
this.pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
final SamzaExecutionContext samzaExecutionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
this.samzaPipelineOptions = samzaExecutionContext.getPipelineOptions();
this.bundleDisabled = samzaPipelineOptions.getMaxBundleSize() <= 1;
final String stateId = "pardo-" + transformId;
final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(stateId, context.getTaskContext(), samzaPipelineOptions);
final FutureCollector<OutT> outputFutureCollector = createFutureCollector();
this.bundleManager = new BundleManager<>(createBundleProgressListener(), outputFutureCollector, samzaPipelineOptions.getMaxBundleSize(), samzaPipelineOptions.getMaxBundleTimeMs(), timerRegistry, bundleCheckTimerId);
this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(keyCoder, (Scheduler) timerRegistry, getTimerStateId(signature), nonKeyedStateInternalsFactory, windowingStrategy, isBounded, samzaPipelineOptions);
this.sideInputHandler = new SideInputHandler(sideInputs, nonKeyedStateInternalsFactory.stateInternalsForKey(null));
if (isPortable) {
final ExecutableStage executableStage = ExecutableStage.fromPayload(stagePayload);
stageContext = SamzaExecutableStageContextFactory.getInstance().get(jobInfo);
stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
this.fnRunner = SamzaDoFnRunners.createPortable(transformId, bundleStateId, windowedValueCoder, executableStage, sideInputMapping, sideInputHandler, nonKeyedStateInternalsFactory, timerInternalsFactory, samzaPipelineOptions, outputManagerFactory.create(emitter, outputFutureCollector), stageBundleFactory, mainOutputTag, idToTupleTagMap, context, transformFullName);
} else {
this.fnRunner = SamzaDoFnRunners.create(samzaPipelineOptions, doFn, windowingStrategy, transformFullName, stateId, context, mainOutputTag, sideInputHandler, timerInternalsFactory, keyCoder, outputManagerFactory.create(emitter, outputFutureCollector), inputCoder, sideOutputTags, outputCoders, doFnSchemaInformation, (Map<String, PCollectionView<?>>) sideInputMapping);
}
this.pushbackFnRunner = SimplePushbackSideInputDoFnRunner.create(fnRunner, sideInputs, sideInputHandler);
this.pushbackValues = new ArrayList<>();
final Iterator<SamzaDoFnInvokerRegistrar> invokerReg = ServiceLoader.load(SamzaDoFnInvokerRegistrar.class).iterator();
if (!invokerReg.hasNext()) {
// use the default invoker here
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, samzaPipelineOptions);
} else {
doFnInvoker = Iterators.getOnlyElement(invokerReg).invokerSetupFor(doFn, samzaPipelineOptions, context);
}
}
use of org.apache.beam.runners.core.SideInputHandler in project beam by apache.
the class AbstractParDoP method init.
@Override
public void init(@Nonnull Outbox outbox, @Nonnull Context context) {
this.outbox = outbox;
this.metricsContainer = new JetMetricsContainer(stepId, ownerId, context);
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, pipelineOptions.get());
if (ordinalToSideInput.isEmpty()) {
sideInputReader = NullSideInputReader.of(Collections.emptyList());
} else {
bufferedItems = new SimpleInbox();
sideInputHandler = new SideInputHandler(ordinalToSideInput.values(), InMemoryStateInternals.forKey(null));
sideInputReader = sideInputHandler;
}
outputManager = new JetOutputManager(outbox, outputCoders, outputCollToOrdinals);
doFnRunner = getDoFnRunner(pipelineOptions.get(), doFn, sideInputReader, outputManager, mainOutputTag, Lists.newArrayList(outputCollToOrdinals.keySet()), inputValueCoder, outputValueCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
}
use of org.apache.beam.runners.core.SideInputHandler in project beam by apache.
the class ApexParDoOperator method setup.
@Override
public void setup(OperatorContext context) {
this.traceTuples = ApexStreamTuple.Logging.isDebugEnabled(pipelineOptions.get(), this);
SideInputReader sideInputReader = NullSideInputReader.of(sideInputs);
if (!sideInputs.isEmpty()) {
sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
sideInputReader = sideInputHandler;
}
for (int i = 0; i < additionalOutputTags.size(); i++) {
@SuppressWarnings("unchecked") DefaultOutputPort<ApexStreamTuple<?>> port = (DefaultOutputPort<ApexStreamTuple<?>>) additionalOutputPorts[i];
additionalOutputPortMapping.put(additionalOutputTags.get(i), port);
}
NoOpStepContext stepContext = new NoOpStepContext() {
@Override
public StateInternals stateInternals() {
return currentKeyStateInternals;
}
@Override
public TimerInternals timerInternals() {
return currentKeyTimerInternals;
}
};
DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(pipelineOptions.get(), doFn, sideInputReader, this, mainOutputTag, additionalOutputTags, stepContext, windowingStrategy);
doFnInvoker = DoFnInvokers.invokerFor(doFn);
doFnInvoker.invokeSetup();
if (this.currentKeyStateInternals != null) {
StatefulDoFnRunner.CleanupTimer cleanupTimer = new StatefulDoFnRunner.TimeInternalsCleanupTimer(stepContext.timerInternals(), windowingStrategy);
@SuppressWarnings({ "rawtypes" }) Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
@SuppressWarnings({ "unchecked" }) StatefulDoFnRunner.StateCleaner<?> stateCleaner = new StatefulDoFnRunner.StateInternalsStateCleaner<>(doFn, stepContext.stateInternals(), windowCoder);
doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, doFnRunner, windowingStrategy, cleanupTimer, stateCleaner);
}
pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
if (doFn instanceof ProcessFn) {
@SuppressWarnings("unchecked") StateInternalsFactory<String> stateInternalsFactory = (StateInternalsFactory<String>) this.currentKeyStateInternals.getFactory();
@SuppressWarnings({ "rawtypes", "unchecked" }) ProcessFn<InputT, OutputT, Object, RestrictionTracker<Object>> splittableDoFn = (ProcessFn) doFn;
splittableDoFn.setStateInternalsFactory(stateInternalsFactory);
TimerInternalsFactory<String> timerInternalsFactory = new TimerInternalsFactory<String>() {
@Override
public TimerInternals timerInternalsForKey(String key) {
return currentKeyTimerInternals;
}
};
splittableDoFn.setTimerInternalsFactory(timerInternalsFactory);
splittableDoFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(doFn, pipelineOptions.get(), new OutputWindowedValue<OutputT>() {
@Override
public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
output(mainOutputTag, WindowedValue.of(output, timestamp, windows, pane));
}
@Override
public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
output(tag, WindowedValue.of(output, timestamp, windows, pane));
}
}, sideInputReader, Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), 10000, Duration.standardSeconds(10)));
}
}
use of org.apache.beam.runners.core.SideInputHandler in project beam by apache.
the class DoFnOperator method open.
@Override
public void open() throws Exception {
// WindowDoFnOperator need use state and timer to get DoFn.
// So must wait StateInternals and TimerInternals ready.
// This will be called after initializeState()
this.doFn = getDoFn();
FlinkPipelineOptions options = serializedOptions.get().as(FlinkPipelineOptions.class);
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, options);
StepContext stepContext = new FlinkStepContext();
doFnRunner = DoFnRunners.simpleRunner(options, doFn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, getInputCoder(), outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
if (requiresStableInput) {
// put this in front of the root FnRunner before any additional wrappers
doFnRunner = bufferingDoFnRunner = BufferingDoFnRunner.create(doFnRunner, "stable-input-buffer", windowedInputCoder, windowingStrategy.getWindowFn().windowCoder(), getOperatorStateBackend(), getKeyedStateBackend(), options.getNumConcurrentCheckpoints(), serializedOptions);
}
doFnRunner = createWrappingDoFnRunner(doFnRunner, stepContext);
earlyBindStateIfNeeded();
if (!options.getDisableMetrics()) {
flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext());
doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer);
String checkpointMetricNamespace = options.getReportCheckpointDuration();
if (checkpointMetricNamespace != null) {
MetricName checkpointMetric = MetricName.named(checkpointMetricNamespace, "checkpoint_duration");
checkpointStats = new CheckpointStats(() -> flinkMetricContainer.getMetricsContainer(stepName).getDistribution(checkpointMetric));
}
}
elementCount = 0L;
lastFinishBundleTime = getProcessingTimeService().getCurrentProcessingTime();
// Schedule timer to check timeout of finish bundle.
long bundleCheckPeriod = Math.max(maxBundleTimeMills / 2, 1);
checkFinishBundleTimer = getProcessingTimeService().scheduleAtFixedRate(timestamp -> checkInvokeFinishBundleByTime(), bundleCheckPeriod, bundleCheckPeriod);
if (doFn instanceof SplittableParDoViaKeyedWorkItems.ProcessFn) {
pushbackDoFnRunner = new ProcessFnRunner<>((DoFnRunner) doFnRunner, sideInputs, sideInputHandler);
} else {
pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
}
bundleFinalizer = new InMemoryBundleFinalizer();
pendingFinalizations = new LinkedHashMap<>();
}
Aggregations