Search in sources :

Example 1 with InMemoryBagUserStateFactory

use of org.apache.beam.runners.fnexecution.state.InMemoryBagUserStateFactory in project beam by apache.

the class SparkExecutableStageFunction method getStateRequestHandler.

private StateRequestHandler getStateRequestHandler(ExecutableStage executableStage, ProcessBundleDescriptors.ExecutableProcessBundleDescriptor processBundleDescriptor) {
    EnumMap<TypeCase, StateRequestHandler> handlerMap = new EnumMap<>(StateKey.TypeCase.class);
    final StateRequestHandler sideInputHandler;
    StateRequestHandlers.SideInputHandlerFactory sideInputHandlerFactory = BatchSideInputHandlerFactory.forStage(executableStage, new BatchSideInputHandlerFactory.SideInputGetter() {

        @Override
        public <T> List<T> getSideInput(String pCollectionId) {
            Tuple2<Broadcast<List<byte[]>>, WindowedValueCoder<SideInputT>> tuple2 = sideInputs.get(pCollectionId);
            Broadcast<List<byte[]>> broadcast = tuple2._1;
            WindowedValueCoder<SideInputT> coder = tuple2._2;
            return (List<T>) broadcast.value().stream().map(bytes -> CoderHelpers.fromByteArray(bytes, coder)).collect(Collectors.toList());
        }
    });
    try {
        sideInputHandler = StateRequestHandlers.forSideInputHandlerFactory(ProcessBundleDescriptors.getSideInputs(executableStage), sideInputHandlerFactory);
    } catch (IOException e) {
        throw new RuntimeException("Failed to setup state handler", e);
    }
    if (bagUserStateHandlerFactory == null) {
        bagUserStateHandlerFactory = new InMemoryBagUserStateFactory();
    }
    final StateRequestHandler userStateHandler;
    if (executableStage.getUserStates().size() > 0) {
        // Need to discard the old key's state
        bagUserStateHandlerFactory.resetForNewKey();
        userStateHandler = StateRequestHandlers.forBagUserStateHandlerFactory(processBundleDescriptor, bagUserStateHandlerFactory);
    } else {
        userStateHandler = StateRequestHandler.unsupported();
    }
    handlerMap.put(StateKey.TypeCase.ITERABLE_SIDE_INPUT, sideInputHandler);
    handlerMap.put(StateKey.TypeCase.MULTIMAP_SIDE_INPUT, sideInputHandler);
    handlerMap.put(StateKey.TypeCase.MULTIMAP_KEYS_SIDE_INPUT, sideInputHandler);
    handlerMap.put(StateKey.TypeCase.BAG_USER_STATE, userStateHandler);
    return StateRequestHandlers.delegateBasedUponType(handlerMap);
}
Also used : WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) TimerInternals(org.apache.beam.runners.core.TimerInternals) BatchSideInputHandlerFactory(org.apache.beam.runners.fnexecution.translation.BatchSideInputHandlerFactory) Locale(java.util.Locale) JobBundleFactory(org.apache.beam.runners.fnexecution.control.JobBundleFactory) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) Broadcast(org.apache.spark.broadcast.Broadcast) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) EnumMap(java.util.EnumMap) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) List(java.util.List) ByteArray(org.apache.beam.runners.spark.util.ByteArray) SparkPipelineOptions(org.apache.beam.runners.spark.SparkPipelineOptions) StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse) Coder(org.apache.beam.sdk.coders.Coder) CoderHelpers(org.apache.beam.runners.spark.coders.CoderHelpers) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) InMemoryBagUserStateFactory(org.apache.beam.runners.fnexecution.state.InMemoryBagUserStateFactory) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) ProcessBundleProgressResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Iterator(java.util.Iterator) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) PipelineTranslatorUtils(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils) StateRequestHandlers(org.apache.beam.runners.fnexecution.state.StateRequestHandlers) IOException(java.io.IOException) MetricsContainerStepMapAccumulator(org.apache.beam.runners.spark.metrics.MetricsContainerStepMapAccumulator) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) Timer(org.apache.beam.runners.core.construction.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) FileSystems(org.apache.beam.sdk.io.FileSystems) Collections(java.util.Collections) TypeCase(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey.TypeCase) ExecutableStageContext(org.apache.beam.runners.fnexecution.control.ExecutableStageContext) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) TypeCase(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey.TypeCase) IOException(java.io.IOException) InMemoryBagUserStateFactory(org.apache.beam.runners.fnexecution.state.InMemoryBagUserStateFactory) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) Broadcast(org.apache.spark.broadcast.Broadcast) BatchSideInputHandlerFactory(org.apache.beam.runners.fnexecution.translation.BatchSideInputHandlerFactory) Tuple2(scala.Tuple2) List(java.util.List) StateRequestHandlers(org.apache.beam.runners.fnexecution.state.StateRequestHandlers) EnumMap(java.util.EnumMap)

Aggregations

IOException (java.io.IOException)1 Serializable (java.io.Serializable)1 Collections (java.util.Collections)1 EnumMap (java.util.EnumMap)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Locale (java.util.Locale)1 Map (java.util.Map)1 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)1 Collectors (java.util.stream.Collectors)1 ProcessBundleProgressResponse (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse)1 ProcessBundleResponse (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse)1 StateKey (org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey)1 TypeCase (org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey.TypeCase)1 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)1 InMemoryTimerInternals (org.apache.beam.runners.core.InMemoryTimerInternals)1 TimerInternals (org.apache.beam.runners.core.TimerInternals)1 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)1 Timer (org.apache.beam.runners.core.construction.Timer)1 ExecutableStage (org.apache.beam.runners.core.construction.graph.ExecutableStage)1