Search in sources :

Example 1 with SamzaExecutionContext

use of org.apache.beam.runners.samza.SamzaExecutionContext in project beam by apache.

the class DoFnOp method open.

@Override
@SuppressWarnings("unchecked")
public void open(Config config, Context context, Scheduler<KeyedTimerData<Void>> timerRegistry, OpEmitter<OutT> emitter) {
    this.inputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
    this.sideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
    this.pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
    final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    final SamzaExecutionContext samzaExecutionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
    this.samzaPipelineOptions = samzaExecutionContext.getPipelineOptions();
    this.bundleDisabled = samzaPipelineOptions.getMaxBundleSize() <= 1;
    final String stateId = "pardo-" + transformId;
    final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(stateId, context.getTaskContext(), samzaPipelineOptions);
    final FutureCollector<OutT> outputFutureCollector = createFutureCollector();
    this.bundleManager = new BundleManager<>(createBundleProgressListener(), outputFutureCollector, samzaPipelineOptions.getMaxBundleSize(), samzaPipelineOptions.getMaxBundleTimeMs(), timerRegistry, bundleCheckTimerId);
    this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(keyCoder, (Scheduler) timerRegistry, getTimerStateId(signature), nonKeyedStateInternalsFactory, windowingStrategy, isBounded, samzaPipelineOptions);
    this.sideInputHandler = new SideInputHandler(sideInputs, nonKeyedStateInternalsFactory.stateInternalsForKey(null));
    if (isPortable) {
        final ExecutableStage executableStage = ExecutableStage.fromPayload(stagePayload);
        stageContext = SamzaExecutableStageContextFactory.getInstance().get(jobInfo);
        stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
        this.fnRunner = SamzaDoFnRunners.createPortable(transformId, bundleStateId, windowedValueCoder, executableStage, sideInputMapping, sideInputHandler, nonKeyedStateInternalsFactory, timerInternalsFactory, samzaPipelineOptions, outputManagerFactory.create(emitter, outputFutureCollector), stageBundleFactory, mainOutputTag, idToTupleTagMap, context, transformFullName);
    } else {
        this.fnRunner = SamzaDoFnRunners.create(samzaPipelineOptions, doFn, windowingStrategy, transformFullName, stateId, context, mainOutputTag, sideInputHandler, timerInternalsFactory, keyCoder, outputManagerFactory.create(emitter, outputFutureCollector), inputCoder, sideOutputTags, outputCoders, doFnSchemaInformation, (Map<String, PCollectionView<?>>) sideInputMapping);
    }
    this.pushbackFnRunner = SimplePushbackSideInputDoFnRunner.create(fnRunner, sideInputs, sideInputHandler);
    this.pushbackValues = new ArrayList<>();
    final Iterator<SamzaDoFnInvokerRegistrar> invokerReg = ServiceLoader.load(SamzaDoFnInvokerRegistrar.class).iterator();
    if (!invokerReg.hasNext()) {
        // use the default invoker here
        doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, samzaPipelineOptions);
    } else {
        doFnInvoker = Iterators.getOnlyElement(invokerReg).invokerSetupFor(doFn, samzaPipelineOptions, context);
    }
}
Also used : SamzaExecutionContext(org.apache.beam.runners.samza.SamzaExecutionContext) Scheduler(org.apache.samza.operators.Scheduler) SideInputHandler(org.apache.beam.runners.core.SideInputHandler) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Map(java.util.Map) HashMap(java.util.HashMap) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 2 with SamzaExecutionContext

use of org.apache.beam.runners.samza.SamzaExecutionContext in project beam by apache.

the class SamzaDoFnRunners method createPortable.

/**
 * Create DoFnRunner for portable runner.
 */
@SuppressWarnings("unchecked")
public static <InT, FnOutT> DoFnRunner<InT, FnOutT> createPortable(String transformId, String bundleStateId, Coder<WindowedValue<InT>> windowedValueCoder, ExecutableStage executableStage, Map<?, PCollectionView<?>> sideInputMapping, SideInputHandler sideInputHandler, SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory, SamzaTimerInternalsFactory<?> timerInternalsFactory, SamzaPipelineOptions pipelineOptions, DoFnRunners.OutputManager outputManager, StageBundleFactory stageBundleFactory, TupleTag<FnOutT> mainOutputTag, Map<String, TupleTag<?>> idToTupleTagMap, Context context, String transformFullName) {
    // storing events within a bundle in states
    final BagState<WindowedValue<InT>> bundledEventsBag = nonKeyedStateInternalsFactory.stateInternalsForKey(null).state(StateNamespaces.global(), StateTags.bag(bundleStateId, windowedValueCoder));
    final StateRequestHandler stateRequestHandler = SamzaStateRequestHandlers.of(transformId, context.getTaskContext(), pipelineOptions, executableStage, stageBundleFactory, (Map<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>>) sideInputMapping, sideInputHandler);
    final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
    final DoFnRunner<InT, FnOutT> underlyingRunner = new SdkHarnessDoFnRunner<>(timerInternalsFactory, WindowUtils.getWindowStrategy(executableStage.getInputPCollection().getId(), executableStage.getComponents()), outputManager, stageBundleFactory, idToTupleTagMap, bundledEventsBag, stateRequestHandler);
    return pipelineOptions.getEnableMetrics() ? DoFnRunnerWithMetrics.wrap(underlyingRunner, executionContext.getMetricsContainer(), transformFullName) : underlyingRunner;
}
Also used : SamzaExecutionContext(org.apache.beam.runners.samza.SamzaExecutionContext) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) PCollectionView(org.apache.beam.sdk.values.PCollectionView) WindowedValue(org.apache.beam.sdk.util.WindowedValue)

Example 3 with SamzaExecutionContext

use of org.apache.beam.runners.samza.SamzaExecutionContext in project beam by apache.

the class GroupByKeyOp method open.

@Override
public void open(Config config, Context context, Scheduler<KeyedTimerData<K>> timerRegistry, OpEmitter<KV<K, OutputT>> emitter) {
    final SamzaExecutionContext samzaExecutionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
    this.pipelineOptions = samzaExecutionContext.getPipelineOptions();
    final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(transformId, context.getTaskContext(), pipelineOptions);
    final DoFnRunners.OutputManager outputManager = outputManagerFactory.create(emitter);
    this.stateInternalsFactory = new SamzaStoreStateInternals.Factory<>(transformId, Collections.singletonMap(SamzaStoreStateInternals.BEAM_STORE, SamzaStoreStateInternals.getBeamStore(context.getTaskContext())), keyCoder, pipelineOptions.getStoreBatchGetSize());
    this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(keyCoder, timerRegistry, TIMER_STATE_ID, nonKeyedStateInternalsFactory, windowingStrategy, isBounded, pipelineOptions);
    final DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFn = GroupAlsoByWindowViaWindowSetNewDoFn.create(windowingStrategy, stateInternalsFactory, timerInternalsFactory, NullSideInputReader.of(Collections.emptyList()), reduceFn, outputManager, mainOutputTag);
    final KeyedInternals<K> keyedInternals = new KeyedInternals<>(stateInternalsFactory, timerInternalsFactory);
    final StepContext stepContext = new StepContext() {

        @Override
        public StateInternals stateInternals() {
            return keyedInternals.stateInternals();
        }

        @Override
        public TimerInternals timerInternals() {
            return keyedInternals.timerInternals();
        }
    };
    final DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFnRunner = DoFnRunners.simpleRunner(PipelineOptionsFactory.create(), doFn, NullSideInputReader.of(Collections.emptyList()), outputManager, mainOutputTag, Collections.emptyList(), stepContext, null, Collections.emptyMap(), windowingStrategy, DoFnSchemaInformation.create(), Collections.emptyMap());
    final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
    this.fnRunner = DoFnRunnerWithMetrics.wrap(doFnRunner, executionContext.getMetricsContainer(), transformFullName);
}
Also used : SamzaExecutionContext(org.apache.beam.runners.samza.SamzaExecutionContext) StepContext(org.apache.beam.runners.core.StepContext) DoFnRunners(org.apache.beam.runners.core.DoFnRunners) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem)

Example 4 with SamzaExecutionContext

use of org.apache.beam.runners.samza.SamzaExecutionContext in project beam by apache.

the class SamzaDoFnRunners method create.

/**
 * Create DoFnRunner for java runner.
 */
public static <InT, FnOutT> DoFnRunner<InT, FnOutT> create(SamzaPipelineOptions pipelineOptions, DoFn<InT, FnOutT> doFn, WindowingStrategy<?, ?> windowingStrategy, String transformFullName, String transformId, Context context, TupleTag<FnOutT> mainOutputTag, SideInputHandler sideInputHandler, SamzaTimerInternalsFactory<?> timerInternalsFactory, Coder<?> keyCoder, DoFnRunners.OutputManager outputManager, Coder<InT> inputCoder, List<TupleTag<?>> sideOutputTags, Map<TupleTag<?>, Coder<?>> outputCoders, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping) {
    final KeyedInternals keyedInternals;
    final TimerInternals timerInternals;
    final StateInternals stateInternals;
    final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    final SamzaStoreStateInternals.Factory<?> stateInternalsFactory = SamzaStoreStateInternals.createStateInternalsFactory(transformId, keyCoder, context.getTaskContext(), pipelineOptions, signature);
    final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
    if (StateUtils.isStateful(doFn)) {
        keyedInternals = new KeyedInternals(stateInternalsFactory, timerInternalsFactory);
        stateInternals = keyedInternals.stateInternals();
        timerInternals = keyedInternals.timerInternals();
    } else {
        keyedInternals = null;
        stateInternals = stateInternalsFactory.stateInternalsForKey(null);
        timerInternals = timerInternalsFactory.timerInternalsForKey(null);
    }
    final StepContext stepContext = createStepContext(stateInternals, timerInternals);
    final DoFnRunner<InT, FnOutT> underlyingRunner = DoFnRunners.simpleRunner(pipelineOptions, doFn, sideInputHandler, outputManager, mainOutputTag, sideOutputTags, stepContext, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
    final DoFnRunner<InT, FnOutT> doFnRunnerWithMetrics = pipelineOptions.getEnableMetrics() ? DoFnRunnerWithMetrics.wrap(underlyingRunner, executionContext.getMetricsContainer(), transformFullName) : underlyingRunner;
    if (keyedInternals != null) {
        final DoFnRunner<InT, FnOutT> statefulDoFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, inputCoder, doFnRunnerWithMetrics, stepContext, windowingStrategy, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, windowingStrategy), createStateCleaner(doFn, windowingStrategy, keyedInternals.stateInternals()));
        return new DoFnRunnerWithKeyedInternals<>(statefulDoFnRunner, keyedInternals);
    } else {
        return doFnRunnerWithMetrics;
    }
}
Also used : SamzaExecutionContext(org.apache.beam.runners.samza.SamzaExecutionContext) StepContext(org.apache.beam.runners.core.StepContext) TimerInternals(org.apache.beam.runners.core.TimerInternals) StateInternals(org.apache.beam.runners.core.StateInternals) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Aggregations

SamzaExecutionContext (org.apache.beam.runners.samza.SamzaExecutionContext)4 StepContext (org.apache.beam.runners.core.StepContext)2 DoFnSignature (org.apache.beam.sdk.transforms.reflect.DoFnSignature)2 HashMap (java.util.HashMap)1 Map (java.util.Map)1 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)1 DoFnRunners (org.apache.beam.runners.core.DoFnRunners)1 KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)1 SideInputHandler (org.apache.beam.runners.core.SideInputHandler)1 StateInternals (org.apache.beam.runners.core.StateInternals)1 StatefulDoFnRunner (org.apache.beam.runners.core.StatefulDoFnRunner)1 TimerInternals (org.apache.beam.runners.core.TimerInternals)1 ExecutableStage (org.apache.beam.runners.core.construction.graph.ExecutableStage)1 StateRequestHandler (org.apache.beam.runners.fnexecution.state.StateRequestHandler)1 WindowedValue (org.apache.beam.sdk.util.WindowedValue)1 KV (org.apache.beam.sdk.values.KV)1 PCollectionView (org.apache.beam.sdk.values.PCollectionView)1 Scheduler (org.apache.samza.operators.Scheduler)1