use of org.apache.beam.runners.samza.SamzaExecutionContext in project beam by apache.
the class DoFnOp method open.
@Override
@SuppressWarnings("unchecked")
public void open(Config config, Context context, Scheduler<KeyedTimerData<Void>> timerRegistry, OpEmitter<OutT> emitter) {
this.inputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
this.sideInputWatermark = BoundedWindow.TIMESTAMP_MIN_VALUE;
this.pushbackWatermarkHold = BoundedWindow.TIMESTAMP_MAX_VALUE;
final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
final SamzaExecutionContext samzaExecutionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
this.samzaPipelineOptions = samzaExecutionContext.getPipelineOptions();
this.bundleDisabled = samzaPipelineOptions.getMaxBundleSize() <= 1;
final String stateId = "pardo-" + transformId;
final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(stateId, context.getTaskContext(), samzaPipelineOptions);
final FutureCollector<OutT> outputFutureCollector = createFutureCollector();
this.bundleManager = new BundleManager<>(createBundleProgressListener(), outputFutureCollector, samzaPipelineOptions.getMaxBundleSize(), samzaPipelineOptions.getMaxBundleTimeMs(), timerRegistry, bundleCheckTimerId);
this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(keyCoder, (Scheduler) timerRegistry, getTimerStateId(signature), nonKeyedStateInternalsFactory, windowingStrategy, isBounded, samzaPipelineOptions);
this.sideInputHandler = new SideInputHandler(sideInputs, nonKeyedStateInternalsFactory.stateInternalsForKey(null));
if (isPortable) {
final ExecutableStage executableStage = ExecutableStage.fromPayload(stagePayload);
stageContext = SamzaExecutableStageContextFactory.getInstance().get(jobInfo);
stageBundleFactory = stageContext.getStageBundleFactory(executableStage);
this.fnRunner = SamzaDoFnRunners.createPortable(transformId, bundleStateId, windowedValueCoder, executableStage, sideInputMapping, sideInputHandler, nonKeyedStateInternalsFactory, timerInternalsFactory, samzaPipelineOptions, outputManagerFactory.create(emitter, outputFutureCollector), stageBundleFactory, mainOutputTag, idToTupleTagMap, context, transformFullName);
} else {
this.fnRunner = SamzaDoFnRunners.create(samzaPipelineOptions, doFn, windowingStrategy, transformFullName, stateId, context, mainOutputTag, sideInputHandler, timerInternalsFactory, keyCoder, outputManagerFactory.create(emitter, outputFutureCollector), inputCoder, sideOutputTags, outputCoders, doFnSchemaInformation, (Map<String, PCollectionView<?>>) sideInputMapping);
}
this.pushbackFnRunner = SimplePushbackSideInputDoFnRunner.create(fnRunner, sideInputs, sideInputHandler);
this.pushbackValues = new ArrayList<>();
final Iterator<SamzaDoFnInvokerRegistrar> invokerReg = ServiceLoader.load(SamzaDoFnInvokerRegistrar.class).iterator();
if (!invokerReg.hasNext()) {
// use the default invoker here
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, samzaPipelineOptions);
} else {
doFnInvoker = Iterators.getOnlyElement(invokerReg).invokerSetupFor(doFn, samzaPipelineOptions, context);
}
}
use of org.apache.beam.runners.samza.SamzaExecutionContext in project beam by apache.
the class SamzaDoFnRunners method createPortable.
/**
* Create DoFnRunner for portable runner.
*/
@SuppressWarnings("unchecked")
public static <InT, FnOutT> DoFnRunner<InT, FnOutT> createPortable(String transformId, String bundleStateId, Coder<WindowedValue<InT>> windowedValueCoder, ExecutableStage executableStage, Map<?, PCollectionView<?>> sideInputMapping, SideInputHandler sideInputHandler, SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory, SamzaTimerInternalsFactory<?> timerInternalsFactory, SamzaPipelineOptions pipelineOptions, DoFnRunners.OutputManager outputManager, StageBundleFactory stageBundleFactory, TupleTag<FnOutT> mainOutputTag, Map<String, TupleTag<?>> idToTupleTagMap, Context context, String transformFullName) {
// storing events within a bundle in states
final BagState<WindowedValue<InT>> bundledEventsBag = nonKeyedStateInternalsFactory.stateInternalsForKey(null).state(StateNamespaces.global(), StateTags.bag(bundleStateId, windowedValueCoder));
final StateRequestHandler stateRequestHandler = SamzaStateRequestHandlers.of(transformId, context.getTaskContext(), pipelineOptions, executableStage, stageBundleFactory, (Map<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>>) sideInputMapping, sideInputHandler);
final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
final DoFnRunner<InT, FnOutT> underlyingRunner = new SdkHarnessDoFnRunner<>(timerInternalsFactory, WindowUtils.getWindowStrategy(executableStage.getInputPCollection().getId(), executableStage.getComponents()), outputManager, stageBundleFactory, idToTupleTagMap, bundledEventsBag, stateRequestHandler);
return pipelineOptions.getEnableMetrics() ? DoFnRunnerWithMetrics.wrap(underlyingRunner, executionContext.getMetricsContainer(), transformFullName) : underlyingRunner;
}
use of org.apache.beam.runners.samza.SamzaExecutionContext in project beam by apache.
the class GroupByKeyOp method open.
@Override
public void open(Config config, Context context, Scheduler<KeyedTimerData<K>> timerRegistry, OpEmitter<KV<K, OutputT>> emitter) {
final SamzaExecutionContext samzaExecutionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
this.pipelineOptions = samzaExecutionContext.getPipelineOptions();
final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(transformId, context.getTaskContext(), pipelineOptions);
final DoFnRunners.OutputManager outputManager = outputManagerFactory.create(emitter);
this.stateInternalsFactory = new SamzaStoreStateInternals.Factory<>(transformId, Collections.singletonMap(SamzaStoreStateInternals.BEAM_STORE, SamzaStoreStateInternals.getBeamStore(context.getTaskContext())), keyCoder, pipelineOptions.getStoreBatchGetSize());
this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(keyCoder, timerRegistry, TIMER_STATE_ID, nonKeyedStateInternalsFactory, windowingStrategy, isBounded, pipelineOptions);
final DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFn = GroupAlsoByWindowViaWindowSetNewDoFn.create(windowingStrategy, stateInternalsFactory, timerInternalsFactory, NullSideInputReader.of(Collections.emptyList()), reduceFn, outputManager, mainOutputTag);
final KeyedInternals<K> keyedInternals = new KeyedInternals<>(stateInternalsFactory, timerInternalsFactory);
final StepContext stepContext = new StepContext() {
@Override
public StateInternals stateInternals() {
return keyedInternals.stateInternals();
}
@Override
public TimerInternals timerInternals() {
return keyedInternals.timerInternals();
}
};
final DoFnRunner<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFnRunner = DoFnRunners.simpleRunner(PipelineOptionsFactory.create(), doFn, NullSideInputReader.of(Collections.emptyList()), outputManager, mainOutputTag, Collections.emptyList(), stepContext, null, Collections.emptyMap(), windowingStrategy, DoFnSchemaInformation.create(), Collections.emptyMap());
final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
this.fnRunner = DoFnRunnerWithMetrics.wrap(doFnRunner, executionContext.getMetricsContainer(), transformFullName);
}
use of org.apache.beam.runners.samza.SamzaExecutionContext in project beam by apache.
the class SamzaDoFnRunners method create.
/**
* Create DoFnRunner for java runner.
*/
public static <InT, FnOutT> DoFnRunner<InT, FnOutT> create(SamzaPipelineOptions pipelineOptions, DoFn<InT, FnOutT> doFn, WindowingStrategy<?, ?> windowingStrategy, String transformFullName, String transformId, Context context, TupleTag<FnOutT> mainOutputTag, SideInputHandler sideInputHandler, SamzaTimerInternalsFactory<?> timerInternalsFactory, Coder<?> keyCoder, DoFnRunners.OutputManager outputManager, Coder<InT> inputCoder, List<TupleTag<?>> sideOutputTags, Map<TupleTag<?>, Coder<?>> outputCoders, DoFnSchemaInformation doFnSchemaInformation, Map<String, PCollectionView<?>> sideInputMapping) {
final KeyedInternals keyedInternals;
final TimerInternals timerInternals;
final StateInternals stateInternals;
final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
final SamzaStoreStateInternals.Factory<?> stateInternalsFactory = SamzaStoreStateInternals.createStateInternalsFactory(transformId, keyCoder, context.getTaskContext(), pipelineOptions, signature);
final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext();
if (StateUtils.isStateful(doFn)) {
keyedInternals = new KeyedInternals(stateInternalsFactory, timerInternalsFactory);
stateInternals = keyedInternals.stateInternals();
timerInternals = keyedInternals.timerInternals();
} else {
keyedInternals = null;
stateInternals = stateInternalsFactory.stateInternalsForKey(null);
timerInternals = timerInternalsFactory.timerInternalsForKey(null);
}
final StepContext stepContext = createStepContext(stateInternals, timerInternals);
final DoFnRunner<InT, FnOutT> underlyingRunner = DoFnRunners.simpleRunner(pipelineOptions, doFn, sideInputHandler, outputManager, mainOutputTag, sideOutputTags, stepContext, inputCoder, outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
final DoFnRunner<InT, FnOutT> doFnRunnerWithMetrics = pipelineOptions.getEnableMetrics() ? DoFnRunnerWithMetrics.wrap(underlyingRunner, executionContext.getMetricsContainer(), transformFullName) : underlyingRunner;
if (keyedInternals != null) {
final DoFnRunner<InT, FnOutT> statefulDoFnRunner = DoFnRunners.defaultStatefulDoFnRunner(doFn, inputCoder, doFnRunnerWithMetrics, stepContext, windowingStrategy, new StatefulDoFnRunner.TimeInternalsCleanupTimer(timerInternals, windowingStrategy), createStateCleaner(doFn, windowingStrategy, keyedInternals.stateInternals()));
return new DoFnRunnerWithKeyedInternals<>(statefulDoFnRunner, keyedInternals);
} else {
return doFnRunnerWithMetrics;
}
}
Aggregations