use of org.apache.beam.runners.core.StateNamespace in project beam by apache.
the class StatefulParDoEvaluatorFactoryTest method windowCleanupScheduled.
@Test
public void windowCleanupScheduled() throws Exception {
// To test the factory, first we set up a pipeline and then we use the constructed
// pipeline to create the right parameters to pass to the factory
final String stateId = "my-state-id";
// For consistency, window it into FixedWindows. Actually we will fabricate an input bundle.
PCollection<KV<String, Integer>> input = pipeline.apply(Create.of(KV.of("hello", 1), KV.of("hello", 2))).apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))));
TupleTag<Integer> mainOutput = new TupleTag<>();
PCollection<Integer> produced = input.apply(new ParDoMultiOverrideFactory.GbkThenStatefulParDo<>(ParDo.of(new DoFn<KV<String, Integer>, Integer>() {
@StateId(stateId)
private final StateSpec<ValueState<String>> spec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void process(ProcessContext c) {
}
}).withOutputTags(mainOutput, TupleTagList.empty()))).get(mainOutput).setCoder(VarIntCoder.of());
StatefulParDoEvaluatorFactory<String, Integer, Integer> factory = new StatefulParDoEvaluatorFactory(mockEvaluationContext);
AppliedPTransform<PCollection<? extends KeyedWorkItem<String, KV<String, Integer>>>, PCollectionTuple, StatefulParDo<String, Integer, Integer>> producingTransform = (AppliedPTransform) DirectGraphs.getProducer(produced);
// Then there will be a digging down to the step context to get the state internals
when(mockEvaluationContext.getExecutionContext(eq(producingTransform), Mockito.<StructuralKey>any())).thenReturn(mockExecutionContext);
when(mockExecutionContext.getStepContext(anyString())).thenReturn(mockStepContext);
IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
StateNamespace firstWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), firstWindow);
StateNamespace secondWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), secondWindow);
StateTag<ValueState<String>> tag = StateTags.tagForSpec(stateId, StateSpecs.value(StringUtf8Coder.of()));
// Set up non-empty state. We don't mock + verify calls to clear() but instead
// check that state is actually empty. We musn't care how it is accomplished.
stateInternals.state(firstWindowNamespace, tag).write("first");
stateInternals.state(secondWindowNamespace, tag).write("second");
// A single bundle with some elements in the global window; it should register cleanup for the
// global window state merely by having the evaluator created. The cleanup logic does not
// depend on the window.
CommittedBundle<KV<String, Integer>> inputBundle = BUNDLE_FACTORY.createBundle(input).add(WindowedValue.of(KV.of("hello", 1), new Instant(3), firstWindow, PaneInfo.NO_FIRING)).add(WindowedValue.of(KV.of("hello", 2), new Instant(11), secondWindow, PaneInfo.NO_FIRING)).commit(Instant.now());
// Merely creating the evaluator should suffice to register the cleanup callback
factory.forApplication(producingTransform, inputBundle);
ArgumentCaptor<Runnable> argumentCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(firstWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
// Should actually clear the state for the first window
argumentCaptor.getValue().run();
assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(secondWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
// Should actually clear the state for the second window
argumentCaptor.getValue().run();
assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
use of org.apache.beam.runners.core.StateNamespace in project beam by apache.
the class DoFnOperator method fireTimer.
// allow overriding this in WindowDoFnOperator
public void fireTimer(InternalTimer<?, TimerData> timer) {
TimerInternals.TimerData timerData = timer.getNamespace();
StateNamespace namespace = timerData.getNamespace();
// This is a user timer, so namespace must be WindowNamespace
checkArgument(namespace instanceof WindowNamespace);
BoundedWindow window = ((WindowNamespace) namespace).getWindow();
pushbackDoFnRunner.onTimer(timerData.getTimerId(), window, timerData.getTimestamp(), timerData.getDomain());
}
use of org.apache.beam.runners.core.StateNamespace in project beam by apache.
the class TriggerStateMachineTester method assertCleared.
/**
* Asserts that the trigger has actually cleared all of its state for the given window. Since the
* trigger under test is the root, this makes the assert for all triggers regardless of their
* position in the trigger tree.
*/
public void assertCleared(W window) {
for (StateNamespace untypedNamespace : stateInternals.getNamespacesInUse()) {
if (untypedNamespace instanceof WindowAndTriggerNamespace) {
@SuppressWarnings("unchecked") WindowAndTriggerNamespace<W> namespace = (WindowAndTriggerNamespace<W>) untypedNamespace;
if (namespace.getWindow().equals(window)) {
Set<?> tagsInUse = stateInternals.getTagsInUse(namespace);
assertTrue("Trigger has not cleared tags: " + tagsInUse, tagsInUse.isEmpty());
}
}
}
}
use of org.apache.beam.runners.core.StateNamespace in project beam by apache.
the class ExecutableStageDoFnOperatorTest method testEnsureDeferredStateCleanupTimerFiring.
private void testEnsureDeferredStateCleanupTimerFiring(boolean withCheckpointing) throws Exception {
TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
StringUtf8Coder keyCoder = StringUtf8Coder.of();
WindowingStrategy windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(1000)));
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowingStrategy.getWindowFn().windowCoder()));
@SuppressWarnings("unchecked") RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
KV<String, String> timerInputKey = KV.of("transformId", "timerId");
AtomicBoolean timerInputReceived = new AtomicBoolean();
IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(1000));
IntervalWindow.IntervalWindowCoder windowCoder = IntervalWindow.IntervalWindowCoder.of();
WindowedValue<KV<String, Integer>> windowedValue = WindowedValue.of(KV.of("one", 1), window.maxTimestamp(), ImmutableList.of(window), PaneInfo.NO_FIRING);
FnDataReceiver receiver = Mockito.mock(FnDataReceiver.class);
FnDataReceiver<Timer> timerReceiver = Mockito.mock(FnDataReceiver.class);
doAnswer((invocation) -> {
timerInputReceived.set(true);
return null;
}).when(timerReceiver).accept(any());
when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));
when(bundle.getTimerReceivers()).thenReturn(ImmutableMap.of(timerInputKey, timerReceiver));
KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.open();
Lock stateBackendLock = Whitebox.getInternalState(operator, "stateBackendLock");
stateBackendLock.lock();
KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
ByteBuffer key = FlinkKeyUtils.encodeKey(windowedValue.getValue().getKey(), keyCoder);
keyedStateBackend.setCurrentKey(key);
DoFnOperator.FlinkTimerInternals timerInternals = Whitebox.getInternalState(operator, "timerInternals");
Object doFnRunner = Whitebox.getInternalState(operator, "doFnRunner");
Object delegate = Whitebox.getInternalState(doFnRunner, "delegate");
Object stateCleaner = Whitebox.getInternalState(delegate, "stateCleaner");
Collection<?> cleanupQueue = Whitebox.getInternalState(stateCleaner, "cleanupQueue");
// create some state which can be cleaned up
assertThat(testHarness.numKeyedStateEntries(), is(0));
StateNamespace stateNamespace = StateNamespaces.window(windowCoder, window);
// State from the SDK Harness is stored as ByteStrings
BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
assertThat(testHarness.numKeyedStateEntries(), is(1));
// user timer that fires after the end of the window and after state cleanup
TimerInternals.TimerData userTimer = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
timerInternals.setTimer(userTimer);
// start of bundle
testHarness.processElement(new StreamRecord<>(windowedValue));
verify(receiver).accept(windowedValue);
// move watermark past user timer while bundle is in progress
testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(1)).getMillis()));
// Output watermark is held back and timers do not yet fire (they can still be changed!)
assertThat(timerInputReceived.get(), is(false));
assertThat(operator.getCurrentOutputWatermark(), is(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis()));
// The timer fires on bundle finish
operator.invokeFinishBundle();
assertThat(timerInputReceived.getAndSet(false), is(true));
// Move watermark past the cleanup timer
testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(2)).getMillis()));
operator.invokeFinishBundle();
// Cleanup timer has fired and cleanup queue is prepared for bundle finish
assertThat(testHarness.numEventTimeTimers(), is(0));
assertThat(testHarness.numKeyedStateEntries(), is(1));
assertThat(cleanupQueue, hasSize(1));
// Cleanup timer are rescheduled if a new timer is created during the bundle
TimerInternals.TimerData userTimer2 = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
operator.setTimer(Timer.of(windowedValue.getValue().getKey(), "", windowedValue.getWindows(), window.maxTimestamp(), window.maxTimestamp(), PaneInfo.NO_FIRING), userTimer2);
assertThat(testHarness.numEventTimeTimers(), is(1));
if (withCheckpointing) {
// Upon checkpointing, the bundle will be finished.
testHarness.snapshot(0, 0);
} else {
operator.invokeFinishBundle();
}
// Cleanup queue has been processed and cleanup timer has been re-added due to pending timers
// for the window.
assertThat(cleanupQueue, hasSize(0));
verifyNoMoreInteractions(receiver);
assertThat(testHarness.numKeyedStateEntries(), is(2));
assertThat(testHarness.numEventTimeTimers(), is(2));
// No timer has been fired but bundle should be ended
assertThat(timerInputReceived.get(), is(false));
assertThat(Whitebox.getInternalState(operator, "bundleStarted"), is(false));
// Allow user timer and cleanup timer to fire by triggering watermark advancement
testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
assertThat(timerInputReceived.getAndSet(false), is(true));
assertThat(cleanupQueue, hasSize(1));
// Cleanup will be executed after the bundle is complete because there are no more pending
// timers for the window
operator.invokeFinishBundle();
assertThat(cleanupQueue, hasSize(0));
assertThat(testHarness.numKeyedStateEntries(), is(0));
testHarness.close();
verifyNoMoreInteractions(receiver);
}
use of org.apache.beam.runners.core.StateNamespace in project beam by apache.
the class StreamingGroupAlsoByWindowFnsTest method addTimer.
private void addTimer(WorkItem.Builder workItem, IntervalWindow window, Instant timestamp, Windmill.Timer.Type type) {
StateNamespace namespace = StateNamespaces.window(windowCoder, window);
workItem.getTimersBuilder().addTimersBuilder().setTag(WindmillTimerInternals.timerTag(WindmillNamespacePrefix.SYSTEM_NAMESPACE_PREFIX, TimerData.of(namespace, timestamp, timestamp, type == Windmill.Timer.Type.WATERMARK ? TimeDomain.EVENT_TIME : TimeDomain.PROCESSING_TIME))).setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(timestamp)).setType(type).setStateFamily(STATE_FAMILY);
}
Aggregations