use of org.apache.beam.runners.fnexecution.control.RemoteBundle in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testEnsureDeferredStateCleanupTimerFiring.
private void testEnsureDeferredStateCleanupTimerFiring(boolean withCheckpointing) throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
StringUtf8Coder keyCoder = StringUtf8Coder.of();
WindowingStrategy windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(1000)));
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowingStrategy.getWindowFn().windowCoder()));
@SuppressWarnings("unchecked") RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
KV<String, String> timerInputKey = KV.of("transformId", "timerId");
AtomicBoolean timerInputReceived = new AtomicBoolean();
IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(1000));
IntervalWindow.IntervalWindowCoder windowCoder = IntervalWindow.IntervalWindowCoder.of();
WindowedValue<KV<String, Integer>> windowedValue = WindowedValue.of(KV.of("one", 1), window.maxTimestamp(), ImmutableList.of(window), PaneInfo.NO_FIRING);
FnDataReceiver receiver = Mockito.mock(FnDataReceiver.class);
FnDataReceiver<Timer> timerReceiver = Mockito.mock(FnDataReceiver.class);
doAnswer((invocation) -> {
timerInputReceived.set(true);
return null;
}).when(timerReceiver).accept(any());
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));
when(bundle.getTimerReceivers()).thenReturn(ImmutableMap.of(timerInputKey, timerReceiver));
KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.open();
Lock stateBackendLock = Whitebox.getInternalState(operator, "stateBackendLock");
stateBackendLock.lock();
KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
ByteBuffer key = FlinkKeyUtils.encodeKey(windowedValue.getValue().getKey(), keyCoder);
keyedStateBackend.setCurrentKey(key);
DoFnOperator.FlinkTimerInternals timerInternals = Whitebox.getInternalState(operator, "timerInternals");
Object doFnRunner = Whitebox.getInternalState(operator, "doFnRunner");
Object delegate = Whitebox.getInternalState(doFnRunner, "delegate");
Object stateCleaner = Whitebox.getInternalState(delegate, "stateCleaner");
Collection<?> cleanupQueue = Whitebox.getInternalState(stateCleaner, "cleanupQueue");
// create some state which can be cleaned up
assertThat(testHarness.numKeyedStateEntries(), is(0));
StateNamespace stateNamespace = StateNamespaces.window(windowCoder, window);
// State from the SDK Harness is stored as ByteStrings
BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
assertThat(testHarness.numKeyedStateEntries(), is(1));
// user timer that fires after the end of the window and after state cleanup
TimerInternals.TimerData userTimer = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
timerInternals.setTimer(userTimer);
// start of bundle
testHarness.processElement(new StreamRecord<>(windowedValue));
verify(receiver).accept(windowedValue);
// move watermark past user timer while bundle is in progress
testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(1)).getMillis()));
// Output watermark is held back and timers do not yet fire (they can still be changed!)
assertThat(timerInputReceived.get(), is(false));
assertThat(operator.getCurrentOutputWatermark(), is(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis()));
// The timer fires on bundle finish
operator.invokeFinishBundle();
assertThat(timerInputReceived.getAndSet(false), is(true));
// Move watermark past the cleanup timer
testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(2)).getMillis()));
operator.invokeFinishBundle();
// Cleanup timer has fired and cleanup queue is prepared for bundle finish
assertThat(testHarness.numEventTimeTimers(), is(0));
assertThat(testHarness.numKeyedStateEntries(), is(1));
assertThat(cleanupQueue, hasSize(1));
// Cleanup timer are rescheduled if a new timer is created during the bundle
TimerInternals.TimerData userTimer2 = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
operator.setTimer(Timer.of(windowedValue.getValue().getKey(), "", windowedValue.getWindows(), window.maxTimestamp(), window.maxTimestamp(), PaneInfo.NO_FIRING), userTimer2);
assertThat(testHarness.numEventTimeTimers(), is(1));
if (withCheckpointing) {
// Upon checkpointing, the bundle will be finished.
testHarness.snapshot(0, 0);
} else {
operator.invokeFinishBundle();
}
// Cleanup queue has been processed and cleanup timer has been re-added due to pending timers
// for the window.
assertThat(cleanupQueue, hasSize(0));
verifyNoMoreInteractions(receiver);
assertThat(testHarness.numKeyedStateEntries(), is(2));
assertThat(testHarness.numEventTimeTimers(), is(2));
// No timer has been fired but bundle should be ended
assertThat(timerInputReceived.get(), is(false));
assertThat(Whitebox.getInternalState(operator, "bundleStarted"), is(false));
// Allow user timer and cleanup timer to fire by triggering watermark advancement
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
assertThat(timerInputReceived.getAndSet(false), is(true));
assertThat(cleanupQueue, hasSize(1));
// Cleanup will be executed after the bundle is complete because there are no more pending
// timers for the window
operator.invokeFinishBundle();
assertThat(cleanupQueue, hasSize(0));
assertThat(testHarness.numKeyedStateEntries(), is(0));
testHarness.close();
verifyNoMoreInteractions(receiver);
}
use of org.apache.beam.runners.fnexecution.control.RemoteBundle in project beam by apache.
the class ExecutableStageDoFnOperatorTest method outputsAreTaggedCorrectly.
@Test
public void outputsAreTaggedCorrectly() throws Exception {
WindowedValue.ValueOnlyWindowedValueCoder<Integer> coder = WindowedValue.getValueOnlyCoder(VarIntCoder.of());
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
TupleTag<Integer> additionalOutput1 = new TupleTag<>("output-1");
TupleTag<Integer> additionalOutput2 = new TupleTag<>("output-2");
ImmutableMap<TupleTag<?>, OutputTag<?>> tagsToOutputTags = ImmutableMap.<TupleTag<?>, OutputTag<?>>builder().put(additionalOutput1, new OutputTag<WindowedValue<String>>(additionalOutput1.getId()) {
}).put(additionalOutput2, new OutputTag<WindowedValue<String>>(additionalOutput2.getId()) {
}).build();
ImmutableMap<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders = ImmutableMap.<TupleTag<?>, Coder<WindowedValue<?>>>builder().put(mainOutput, (Coder) coder).put(additionalOutput1, coder).put(additionalOutput2, coder).build();
ImmutableMap<TupleTag<?>, Integer> tagsToIds = ImmutableMap.<TupleTag<?>, Integer>builder().put(mainOutput, 0).put(additionalOutput1, 1).put(additionalOutput2, 2).build();
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, tagsToOutputTags, tagsToCoders, tagsToIds, new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
WindowedValue<Integer> zero = WindowedValue.valueInGlobalWindow(0);
WindowedValue<Integer> three = WindowedValue.valueInGlobalWindow(3);
WindowedValue<Integer> four = WindowedValue.valueInGlobalWindow(4);
WindowedValue<Integer> five = WindowedValue.valueInGlobalWindow(5);
// We use a real StageBundleFactory here in order to exercise the output receiver factory.
StageBundleFactory stageBundleFactory = new StageBundleFactory() {
private boolean onceEmitted;
@Override
public RemoteBundle getBundle(OutputReceiverFactory receiverFactory, TimerReceiverFactory timerReceiverFactory, StateRequestHandler stateRequestHandler, BundleProgressHandler progressHandler, BundleFinalizationHandler finalizationHandler, BundleCheckpointHandler checkpointHandler) {
return new RemoteBundle() {
@Override
public String getId() {
return "bundle-id";
}
@Override
public Map<String, FnDataReceiver> getInputReceivers() {
return ImmutableMap.of("input", input -> {
/* Ignore input*/
});
}
@Override
public Map<KV<String, String>, FnDataReceiver<Timer>> getTimerReceivers() {
return Collections.emptyMap();
}
@Override
public void requestProgress() {
throw new UnsupportedOperationException();
}
@Override
public void split(double fractionOfRemainder) {
throw new UnsupportedOperationException();
}
@Override
public void close() throws Exception {
if (onceEmitted) {
return;
}
// Emit all values to the runner when the bundle is closed.
receiverFactory.create(mainOutput.getId()).accept(three);
receiverFactory.create(additionalOutput1.getId()).accept(four);
receiverFactory.create(additionalOutput2.getId()).accept(five);
onceEmitted = true;
}
};
}
@Override
public ProcessBundleDescriptors.ExecutableProcessBundleDescriptor getProcessBundleDescriptor() {
return processBundleDescriptor;
}
@Override
public InstructionRequestHandler getInstructionRequestHandler() {
return null;
}
@Override
public void close() {
}
};
// Wire the stage bundle factory into our context.
when(stageContext.getStageBundleFactory(any())).thenReturn(stageBundleFactory);
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, ImmutableList.of(additionalOutput1, additionalOutput2), outputManagerFactory);
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<Integer>> testHarness = new OneInputStreamOperatorTestHarness<>(operator);
long watermark = testHarness.getCurrentWatermark() + 1;
testHarness.open();
testHarness.processElement(new StreamRecord<>(zero));
testHarness.processWatermark(watermark);
watermark++;
testHarness.processWatermark(watermark);
assertEquals(watermark, testHarness.getCurrentWatermark());
// watermark hold until bundle complete
assertEquals(0, testHarness.getOutput().size());
// triggers finish bundle
testHarness.close();
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(three));
assertThat(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput1)), contains(new StreamRecord<>(four)));
assertThat(testHarness.getSideOutput(tagsToOutputTags.get(additionalOutput2)), contains(new StreamRecord<>(five)));
}
use of org.apache.beam.runners.fnexecution.control.RemoteBundle in project beam by apache.
the class FlinkExecutableStageFunction method mapPartition.
/**
* For non-stateful processing via a simple MapPartitionFunction.
*/
@Override
public void mapPartition(Iterable<WindowedValue<InputT>> iterable, Collector<RawUnionValue> collector) throws Exception {
ReceiverFactory receiverFactory = new ReceiverFactory(collector, outputMap);
if (sdfStateInternals != null) {
sdfTimerInternals.advanceProcessingTime(Instant.now());
sdfTimerInternals.advanceSynchronizedProcessingTime(Instant.now());
}
try (RemoteBundle bundle = stageBundleFactory.getBundle(receiverFactory, stateRequestHandler, progressHandler, finalizationHandler, bundleCheckpointHandler)) {
processElements(iterable, bundle);
}
if (sdfTimerInternals != null) {
// Finally, advance the processing time to infinity to fire any timers.
sdfTimerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
sdfTimerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
// Now we fire the SDF timers and process elements generated by timers.
while (sdfTimerInternals.hasPendingTimers()) {
try (RemoteBundle bundle = stageBundleFactory.getBundle(receiverFactory, stateRequestHandler, progressHandler, finalizationHandler, bundleCheckpointHandler)) {
List<WindowedValue<InputT>> residuals = new ArrayList<>();
TimerInternals.TimerData timer;
while ((timer = sdfTimerInternals.removeNextProcessingTimer()) != null) {
WindowedValue stateValue = sdfStateInternals.state(timer.getNamespace(), StateTags.value(timer.getTimerId(), inputCoder)).read();
residuals.add(stateValue);
}
processElements(residuals, bundle);
}
}
}
}
use of org.apache.beam.runners.fnexecution.control.RemoteBundle in project beam by apache.
the class FlinkExecutableStageFunction method reduce.
/**
* For stateful and timer processing via a GroupReduceFunction.
*/
@Override
public void reduce(Iterable<WindowedValue<InputT>> iterable, Collector<RawUnionValue> collector) throws Exception {
// Need to discard the old key's state
if (bagUserStateHandlerFactory != null) {
bagUserStateHandlerFactory.resetForNewKey();
}
// Used with Batch, we know that all the data is available for this key. We can't use the
// timer manager from the context because it doesn't exist. So we create one and advance
// time to the end after processing all elements.
final InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
timerInternals.advanceProcessingTime(Instant.now());
timerInternals.advanceSynchronizedProcessingTime(Instant.now());
ReceiverFactory receiverFactory = new ReceiverFactory(collector, outputMap);
TimerReceiverFactory timerReceiverFactory = new TimerReceiverFactory(stageBundleFactory, (Timer<?> timer, TimerInternals.TimerData timerData) -> {
currentTimerKey = timer.getUserKey();
if (timer.getClearBit()) {
timerInternals.deleteTimer(timerData);
} else {
timerInternals.setTimer(timerData);
}
}, windowCoder);
// First process all elements and make sure no more elements can arrive
try (RemoteBundle bundle = stageBundleFactory.getBundle(receiverFactory, timerReceiverFactory, stateRequestHandler, progressHandler)) {
processElements(iterable, bundle);
}
// Finish any pending windows by advancing the input watermark to infinity.
timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
// Finally, advance the processing time to infinity to fire any timers.
timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
// Now we fire the timers and process elements generated by timers (which may be timers itself)
while (timerInternals.hasPendingTimers()) {
try (RemoteBundle bundle = stageBundleFactory.getBundle(receiverFactory, timerReceiverFactory, stateRequestHandler, progressHandler)) {
PipelineTranslatorUtils.fireEligibleTimers(timerInternals, bundle.getTimerReceivers(), currentTimerKey);
}
}
}
use of org.apache.beam.runners.fnexecution.control.RemoteBundle in project beam by apache.
the class FlinkExecutableStageFunctionTest method sdkErrorsSurfaceOnClose.
@Test
public void sdkErrorsSurfaceOnClose() throws Exception {
FlinkExecutableStageFunction<Integer> function = getFunction(Collections.emptyMap());
function.open(new Configuration());
@SuppressWarnings("unchecked") RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(stageBundleFactory.getBundle(any(), any(StateRequestHandler.class), any(BundleProgressHandler.class), any(BundleFinalizationHandler.class), any(BundleCheckpointHandler.class))).thenReturn(bundle);
@SuppressWarnings("unchecked") FnDataReceiver<WindowedValue<?>> receiver = Mockito.mock(FnDataReceiver.class);
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));
Exception expected = new Exception();
doThrow(expected).when(bundle).close();
thrown.expect(is(expected));
function.mapPartition(Collections.emptyList(), collector);
}
Aggregations