use of org.apache.beam.runners.core.StepContext in project beam by apache.
the class BatchGroupAlsoByWindowReshuffleDoFnTest method makeRunner.
private static <K, InputT, OutputT, W extends BoundedWindow> DoFnRunner<KV<K, Iterable<WindowedValue<InputT>>>, KV<K, OutputT>> makeRunner(GroupAlsoByWindowDoFnFactory<K, InputT, OutputT> fnFactory, WindowingStrategy<?, W> windowingStrategy, TupleTag<KV<K, OutputT>> outputTag, DoFnRunners.OutputManager outputManager) {
final StepContext stepContext = new TestStepContext(STEP_NAME);
StateInternalsFactory<K> stateInternalsFactory = key -> stepContext.stateInternals();
BatchGroupAlsoByWindowFn<K, InputT, OutputT> fn = fnFactory.forStrategy(windowingStrategy, stateInternalsFactory);
return new GroupAlsoByWindowFnRunner<>(PipelineOptionsFactory.create(), fn, NullSideInputReader.empty(), outputManager, outputTag, stepContext);
}
use of org.apache.beam.runners.core.StepContext in project beam by apache.
the class SplittableParDoProcessKeyedElementsOp method open.
@Override
public void open(Config config, Context context, Scheduler<KeyedTimerData<byte[]>> timerRegistry, OpEmitter<RawUnionValue> emitter) {
this.pipelineOptions = Base64Serializer.deserializeUnchecked(config.get("beamPipelineOptions"), SerializablePipelineOptions.class).get().as(SamzaPipelineOptions.class);
final SamzaStoreStateInternals.Factory<?> nonKeyedStateInternalsFactory = SamzaStoreStateInternals.createNonKeyedStateInternalsFactory(transformId, context.getTaskContext(), pipelineOptions);
final DoFnRunners.OutputManager outputManager = outputManagerFactory.create(emitter);
this.stateInternalsFactory = new SamzaStoreStateInternals.Factory<>(transformId, Collections.singletonMap(SamzaStoreStateInternals.BEAM_STORE, SamzaStoreStateInternals.getBeamStore(context.getTaskContext())), ByteArrayCoder.of(), pipelineOptions.getStoreBatchGetSize());
this.timerInternalsFactory = SamzaTimerInternalsFactory.createTimerInternalFactory(ByteArrayCoder.of(), timerRegistry, TIMER_STATE_ID, nonKeyedStateInternalsFactory, windowingStrategy, isBounded, pipelineOptions);
final KeyedInternals<byte[]> keyedInternals = new KeyedInternals<>(stateInternalsFactory, timerInternalsFactory);
SplittableParDoViaKeyedWorkItems.ProcessFn<InputT, OutputT, RestrictionT, PositionT, WatermarkEstimatorStateT> processFn = processElements.newProcessFn(processElements.getFn());
DoFnInvokers.tryInvokeSetupFor(processFn, pipelineOptions);
processFn.setStateInternalsFactory(stateInternalsFactory);
processFn.setTimerInternalsFactory(timerInternalsFactory);
processFn.setSideInputReader(NullSideInputReader.empty());
processFn.setProcessElementInvoker(new OutputAndTimeBoundedSplittableProcessElementInvoker<>(processElements.getFn(), pipelineOptions, new OutputWindowedValue<OutputT>() {
@Override
public void outputWindowedValue(OutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputWindowedValue(mainOutputTag, output, timestamp, windows, pane);
}
@Override
public <AdditionalOutputT> void outputWindowedValue(TupleTag<AdditionalOutputT> tag, AdditionalOutputT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
}
}, NullSideInputReader.empty(), Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()), 10000, Duration.standardSeconds(10), () -> {
throw new UnsupportedOperationException("BundleFinalizer unsupported in Samza");
}));
final StepContext stepContext = new StepContext() {
@Override
public StateInternals stateInternals() {
return keyedInternals.stateInternals();
}
@Override
public TimerInternals timerInternals() {
return keyedInternals.timerInternals();
}
};
this.fnRunner = DoFnRunners.simpleRunner(pipelineOptions, processFn, NullSideInputReader.of(Collections.emptyList()), outputManager, mainOutputTag, Collections.emptyList(), stepContext, null, Collections.emptyMap(), windowingStrategy, DoFnSchemaInformation.create(), Collections.emptyMap());
}
use of org.apache.beam.runners.core.StepContext in project beam by apache.
the class DoFnOperatorTest method testWatermarkUpdateAfterWatermarkHoldRelease.
@Test
public void testWatermarkUpdateAfterWatermarkHoldRelease() throws Exception {
Coder<WindowedValue<KV<String, String>>> coder = WindowedValue.getValueOnlyCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()));
TupleTag<KV<String, String>> outputTag = new TupleTag<>("main-output");
List<Long> emittedWatermarkHolds = new ArrayList<>();
KeySelector<WindowedValue<KV<String, String>>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue().getKey(), StringUtf8Coder.of());
DoFnOperator<KV<String, String>, KV<String, String>> doFnOperator = new DoFnOperator<KV<String, String>, KV<String, String>>(new IdentityDoFn<>(), "stepName", coder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), WindowingStrategy.globalDefault(), new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), StringUtf8Coder.of(), keySelector, DoFnSchemaInformation.create(), Collections.emptyMap()) {
@Override
protected DoFnRunner<KV<String, String>, KV<String, String>> createWrappingDoFnRunner(DoFnRunner<KV<String, String>, KV<String, String>> wrappedRunner, StepContext stepContext) {
StateNamespace namespace = StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE);
StateTag<WatermarkHoldState> holdTag = StateTags.watermarkStateInternal("hold", TimestampCombiner.LATEST);
WatermarkHoldState holdState = stepContext.stateInternals().state(namespace, holdTag);
TimerInternals timerInternals = stepContext.timerInternals();
return new DoFnRunner<KV<String, String>, KV<String, String>>() {
@Override
public void startBundle() {
wrappedRunner.startBundle();
}
@Override
public void processElement(WindowedValue<KV<String, String>> elem) {
wrappedRunner.processElement(elem);
holdState.add(elem.getTimestamp());
timerInternals.setTimer(namespace, "timer", "family", elem.getTimestamp().plus(Duration.millis(1)), elem.getTimestamp().plus(Duration.millis(1)), TimeDomain.EVENT_TIME);
timerInternals.setTimer(namespace, "cleanup", "", GlobalWindow.INSTANCE.maxTimestamp(), GlobalWindow.INSTANCE.maxTimestamp(), TimeDomain.EVENT_TIME);
}
@Override
public <KeyT> void onTimer(String timerId, String timerFamilyId, KeyT key, BoundedWindow window, Instant timestamp, Instant outputTimestamp, TimeDomain timeDomain) {
if ("cleanup".equals(timerId)) {
holdState.clear();
} else {
holdState.add(outputTimestamp);
}
}
@Override
public void finishBundle() {
wrappedRunner.finishBundle();
}
@Override
public <KeyT> void onWindowExpiration(BoundedWindow window, Instant timestamp, KeyT key) {
wrappedRunner.onWindowExpiration(window, timestamp, key);
}
@Override
public DoFn<KV<String, String>, KV<String, String>> getFn() {
return doFn;
}
};
}
@Override
void emitWatermarkIfHoldChanged(long currentWatermarkHold) {
emittedWatermarkHolds.add(keyedStateInternals.minWatermarkHoldMs());
}
};
OneInputStreamOperatorTestHarness<WindowedValue<KV<String, String>>, WindowedValue<KV<String, String>>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.setup();
Instant now = Instant.now();
testHarness.open();
// process first element, set hold to `now', setup timer for `now + 1'
testHarness.processElement(new StreamRecord<>(WindowedValue.timestampedValueInGlobalWindow(KV.of("Key", "Hello"), now)));
assertThat(emittedWatermarkHolds, is(equalTo(Collections.singletonList(now.getMillis()))));
// fire timer, change hold to `now + 2'
testHarness.processWatermark(now.getMillis() + 2);
assertThat(emittedWatermarkHolds, is(equalTo(Arrays.asList(now.getMillis(), now.getMillis() + 1))));
// process second element, verify we emitted changed hold
testHarness.processElement(new StreamRecord<>(WindowedValue.timestampedValueInGlobalWindow(KV.of("Key", "Hello"), now.plus(Duration.millis(2)))));
assertThat(emittedWatermarkHolds, is(equalTo(Arrays.asList(now.getMillis(), now.getMillis() + 1, now.getMillis() + 2))));
testHarness.processWatermark(GlobalWindow.INSTANCE.maxTimestamp().plus(Duration.millis(1)).getMillis());
testHarness.processWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis());
testHarness.close();
}
use of org.apache.beam.runners.core.StepContext in project beam by apache.
the class DoFnOperator method open.
@Override
public void open() throws Exception {
// WindowDoFnOperator need use state and timer to get DoFn.
// So must wait StateInternals and TimerInternals ready.
// This will be called after initializeState()
this.doFn = getDoFn();
FlinkPipelineOptions options = serializedOptions.get().as(FlinkPipelineOptions.class);
doFnInvoker = DoFnInvokers.tryInvokeSetupFor(doFn, options);
StepContext stepContext = new FlinkStepContext();
doFnRunner = DoFnRunners.simpleRunner(options, doFn, sideInputReader, outputManager, mainOutputTag, additionalOutputTags, stepContext, getInputCoder(), outputCoders, windowingStrategy, doFnSchemaInformation, sideInputMapping);
if (requiresStableInput) {
// put this in front of the root FnRunner before any additional wrappers
doFnRunner = bufferingDoFnRunner = BufferingDoFnRunner.create(doFnRunner, "stable-input-buffer", windowedInputCoder, windowingStrategy.getWindowFn().windowCoder(), getOperatorStateBackend(), getKeyedStateBackend(), options.getNumConcurrentCheckpoints(), serializedOptions);
}
doFnRunner = createWrappingDoFnRunner(doFnRunner, stepContext);
earlyBindStateIfNeeded();
if (!options.getDisableMetrics()) {
flinkMetricContainer = new FlinkMetricContainer(getRuntimeContext());
doFnRunner = new DoFnRunnerWithMetricsUpdate<>(stepName, doFnRunner, flinkMetricContainer);
String checkpointMetricNamespace = options.getReportCheckpointDuration();
if (checkpointMetricNamespace != null) {
MetricName checkpointMetric = MetricName.named(checkpointMetricNamespace, "checkpoint_duration");
checkpointStats = new CheckpointStats(() -> flinkMetricContainer.getMetricsContainer(stepName).getDistribution(checkpointMetric));
}
}
elementCount = 0L;
lastFinishBundleTime = getProcessingTimeService().getCurrentProcessingTime();
// Schedule timer to check timeout of finish bundle.
long bundleCheckPeriod = Math.max(maxBundleTimeMills / 2, 1);
checkFinishBundleTimer = getProcessingTimeService().scheduleAtFixedRate(timestamp -> checkInvokeFinishBundleByTime(), bundleCheckPeriod, bundleCheckPeriod);
if (doFn instanceof SplittableParDoViaKeyedWorkItems.ProcessFn) {
pushbackDoFnRunner = new ProcessFnRunner<>((DoFnRunner) doFnRunner, sideInputs, sideInputHandler);
} else {
pushbackDoFnRunner = SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
}
bundleFinalizer = new InMemoryBundleFinalizer();
pendingFinalizations = new LinkedHashMap<>();
}
use of org.apache.beam.runners.core.StepContext in project beam by apache.
the class PartialGroupByKeyParDoFns method create.
public static <K, InputT, AccumT> ParDoFn create(PipelineOptions options, KvCoder<K, ?> inputElementCoder, @Nullable CloudObject cloudUserFn, @Nullable List<SideInputInfo> sideInputInfos, List<Receiver> receivers, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
AppliedCombineFn<K, InputT, AccumT, ?> combineFn;
SideInputReader sideInputReader;
StepContext stepContext;
if (cloudUserFn == null) {
combineFn = null;
sideInputReader = NullSideInputReader.empty();
stepContext = null;
} else {
Object deserializedFn = SerializableUtils.deserializeFromByteArray(getBytes(cloudUserFn, PropertyNames.SERIALIZED_FN), "serialized combine fn");
@SuppressWarnings("unchecked") AppliedCombineFn<K, InputT, AccumT, ?> combineFnUnchecked = ((AppliedCombineFn<K, InputT, AccumT, ?>) deserializedFn);
combineFn = combineFnUnchecked;
sideInputReader = executionContext.getSideInputReader(sideInputInfos, combineFn.getSideInputViews(), operationContext);
stepContext = executionContext.getStepContext(operationContext);
}
return create(options, inputElementCoder, combineFn, sideInputReader, receivers.get(0), stepContext);
}
Aggregations