Search in sources :

Example 1 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class StatefulParDoEvaluatorFactoryTest method windowCleanupScheduled.

@Test
public void windowCleanupScheduled() throws Exception {
    // To test the factory, first we set up a pipeline and then we use the constructed
    // pipeline to create the right parameters to pass to the factory
    final String stateId = "my-state-id";
    // For consistency, window it into FixedWindows. Actually we will fabricate an input bundle.
    PCollection<KV<String, Integer>> input = pipeline.apply(Create.of(KV.of("hello", 1), KV.of("hello", 2))).apply(Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10))));
    TupleTag<Integer> mainOutput = new TupleTag<>();
    PCollection<Integer> produced = input.apply(new ParDoMultiOverrideFactory.GbkThenStatefulParDo<>(ParDo.of(new DoFn<KV<String, Integer>, Integer>() {

        @StateId(stateId)
        private final StateSpec<ValueState<String>> spec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void process(ProcessContext c) {
        }
    }).withOutputTags(mainOutput, TupleTagList.empty()))).get(mainOutput).setCoder(VarIntCoder.of());
    StatefulParDoEvaluatorFactory<String, Integer, Integer> factory = new StatefulParDoEvaluatorFactory(mockEvaluationContext);
    AppliedPTransform<PCollection<? extends KeyedWorkItem<String, KV<String, Integer>>>, PCollectionTuple, StatefulParDo<String, Integer, Integer>> producingTransform = (AppliedPTransform) DirectGraphs.getProducer(produced);
    // Then there will be a digging down to the step context to get the state internals
    when(mockEvaluationContext.getExecutionContext(eq(producingTransform), Mockito.<StructuralKey>any())).thenReturn(mockExecutionContext);
    when(mockExecutionContext.getStepContext(anyString())).thenReturn(mockStepContext);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
    StateNamespace firstWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), firstWindow);
    StateNamespace secondWindowNamespace = StateNamespaces.window(IntervalWindow.getCoder(), secondWindow);
    StateTag<ValueState<String>> tag = StateTags.tagForSpec(stateId, StateSpecs.value(StringUtf8Coder.of()));
    // Set up non-empty state. We don't mock + verify calls to clear() but instead
    // check that state is actually empty. We musn't care how it is accomplished.
    stateInternals.state(firstWindowNamespace, tag).write("first");
    stateInternals.state(secondWindowNamespace, tag).write("second");
    // A single bundle with some elements in the global window; it should register cleanup for the
    // global window state merely by having the evaluator created. The cleanup logic does not
    // depend on the window.
    CommittedBundle<KV<String, Integer>> inputBundle = BUNDLE_FACTORY.createBundle(input).add(WindowedValue.of(KV.of("hello", 1), new Instant(3), firstWindow, PaneInfo.NO_FIRING)).add(WindowedValue.of(KV.of("hello", 2), new Instant(11), secondWindow, PaneInfo.NO_FIRING)).commit(Instant.now());
    // Merely creating the evaluator should suffice to register the cleanup callback
    factory.forApplication(producingTransform, inputBundle);
    ArgumentCaptor<Runnable> argumentCaptor = ArgumentCaptor.forClass(Runnable.class);
    verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(firstWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
    // Should actually clear the state for the first window
    argumentCaptor.getValue().run();
    assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
    verify(mockEvaluationContext).scheduleAfterWindowExpiration(eq(producingTransform), eq(secondWindow), Mockito.<WindowingStrategy<?, ?>>any(), argumentCaptor.capture());
    // Should actually clear the state for the second window
    argumentCaptor.getValue().run();
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) Matchers.anyString(org.mockito.Matchers.anyString) StateSpec(org.apache.beam.sdk.state.StateSpec) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) StateNamespace(org.apache.beam.runners.core.StateNamespace) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) StatefulParDo(org.apache.beam.runners.direct.ParDoMultiOverrideFactory.StatefulParDo) Test(org.junit.Test)

Example 2 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class DoFnOperator method fireTimer.

// allow overriding this in WindowDoFnOperator
public void fireTimer(InternalTimer<?, TimerData> timer) {
    TimerInternals.TimerData timerData = timer.getNamespace();
    StateNamespace namespace = timerData.getNamespace();
    // This is a user timer, so namespace must be WindowNamespace
    checkArgument(namespace instanceof WindowNamespace);
    BoundedWindow window = ((WindowNamespace) namespace).getWindow();
    pushbackDoFnRunner.onTimer(timerData.getTimerId(), window, timerData.getTimestamp(), timerData.getDomain());
}
Also used : TimerInternals(org.apache.beam.runners.core.TimerInternals) WindowNamespace(org.apache.beam.runners.core.StateNamespaces.WindowNamespace) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) StateNamespace(org.apache.beam.runners.core.StateNamespace)

Example 3 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class TriggerStateMachineTester method assertCleared.

/**
 * Asserts that the trigger has actually cleared all of its state for the given window. Since the
 * trigger under test is the root, this makes the assert for all triggers regardless of their
 * position in the trigger tree.
 */
public void assertCleared(W window) {
    for (StateNamespace untypedNamespace : stateInternals.getNamespacesInUse()) {
        if (untypedNamespace instanceof WindowAndTriggerNamespace) {
            @SuppressWarnings("unchecked") WindowAndTriggerNamespace<W> namespace = (WindowAndTriggerNamespace<W>) untypedNamespace;
            if (namespace.getWindow().equals(window)) {
                Set<?> tagsInUse = stateInternals.getTagsInUse(namespace);
                assertTrue("Trigger has not cleared tags: " + tagsInUse, tagsInUse.isEmpty());
            }
        }
    }
}
Also used : WindowAndTriggerNamespace(org.apache.beam.runners.core.StateNamespaces.WindowAndTriggerNamespace) StateNamespace(org.apache.beam.runners.core.StateNamespace)

Example 4 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class ExecutableStageDoFnOperatorTest method testEnsureDeferredStateCleanupTimerFiring.

private void testEnsureDeferredStateCleanupTimerFiring(boolean withCheckpointing) throws Exception {
    TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
    DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
    StringUtf8Coder keyCoder = StringUtf8Coder.of();
    WindowingStrategy windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(1000)));
    KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
    ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowingStrategy.getWindowFn().windowCoder()));
    @SuppressWarnings("unchecked") RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
    when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
    KV<String, String> timerInputKey = KV.of("transformId", "timerId");
    AtomicBoolean timerInputReceived = new AtomicBoolean();
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(1000));
    IntervalWindow.IntervalWindowCoder windowCoder = IntervalWindow.IntervalWindowCoder.of();
    WindowedValue<KV<String, Integer>> windowedValue = WindowedValue.of(KV.of("one", 1), window.maxTimestamp(), ImmutableList.of(window), PaneInfo.NO_FIRING);
    FnDataReceiver receiver = Mockito.mock(FnDataReceiver.class);
    FnDataReceiver<Timer> timerReceiver = Mockito.mock(FnDataReceiver.class);
    doAnswer((invocation) -> {
        timerInputReceived.set(true);
        return null;
    }).when(timerReceiver).accept(any());
    when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));
    when(bundle.getTimerReceivers()).thenReturn(ImmutableMap.of(timerInputKey, timerReceiver));
    KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
    testHarness.open();
    Lock stateBackendLock = Whitebox.getInternalState(operator, "stateBackendLock");
    stateBackendLock.lock();
    KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
    ByteBuffer key = FlinkKeyUtils.encodeKey(windowedValue.getValue().getKey(), keyCoder);
    keyedStateBackend.setCurrentKey(key);
    DoFnOperator.FlinkTimerInternals timerInternals = Whitebox.getInternalState(operator, "timerInternals");
    Object doFnRunner = Whitebox.getInternalState(operator, "doFnRunner");
    Object delegate = Whitebox.getInternalState(doFnRunner, "delegate");
    Object stateCleaner = Whitebox.getInternalState(delegate, "stateCleaner");
    Collection<?> cleanupQueue = Whitebox.getInternalState(stateCleaner, "cleanupQueue");
    // create some state which can be cleaned up
    assertThat(testHarness.numKeyedStateEntries(), is(0));
    StateNamespace stateNamespace = StateNamespaces.window(windowCoder, window);
    // State from the SDK Harness is stored as ByteStrings
    BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
    state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
    assertThat(testHarness.numKeyedStateEntries(), is(1));
    // user timer that fires after the end of the window and after state cleanup
    TimerInternals.TimerData userTimer = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
    timerInternals.setTimer(userTimer);
    // start of bundle
    testHarness.processElement(new StreamRecord<>(windowedValue));
    verify(receiver).accept(windowedValue);
    // move watermark past user timer while bundle is in progress
    testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(1)).getMillis()));
    // Output watermark is held back and timers do not yet fire (they can still be changed!)
    assertThat(timerInputReceived.get(), is(false));
    assertThat(operator.getCurrentOutputWatermark(), is(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis()));
    // The timer fires on bundle finish
    operator.invokeFinishBundle();
    assertThat(timerInputReceived.getAndSet(false), is(true));
    // Move watermark past the cleanup timer
    testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(2)).getMillis()));
    operator.invokeFinishBundle();
    // Cleanup timer has fired and cleanup queue is prepared for bundle finish
    assertThat(testHarness.numEventTimeTimers(), is(0));
    assertThat(testHarness.numKeyedStateEntries(), is(1));
    assertThat(cleanupQueue, hasSize(1));
    // Cleanup timer are rescheduled if a new timer is created during the bundle
    TimerInternals.TimerData userTimer2 = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
    operator.setTimer(Timer.of(windowedValue.getValue().getKey(), "", windowedValue.getWindows(), window.maxTimestamp(), window.maxTimestamp(), PaneInfo.NO_FIRING), userTimer2);
    assertThat(testHarness.numEventTimeTimers(), is(1));
    if (withCheckpointing) {
        // Upon checkpointing, the bundle will be finished.
        testHarness.snapshot(0, 0);
    } else {
        operator.invokeFinishBundle();
    }
    // Cleanup queue has been processed and cleanup timer has been re-added due to pending timers
    // for the window.
    assertThat(cleanupQueue, hasSize(0));
    verifyNoMoreInteractions(receiver);
    assertThat(testHarness.numKeyedStateEntries(), is(2));
    assertThat(testHarness.numEventTimeTimers(), is(2));
    // No timer has been fired but bundle should be ended
    assertThat(timerInputReceived.get(), is(false));
    assertThat(Whitebox.getInternalState(operator, "bundleStarted"), is(false));
    // Allow user timer and cleanup timer to fire by triggering watermark advancement
    testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
    assertThat(timerInputReceived.getAndSet(false), is(true));
    assertThat(cleanupQueue, hasSize(1));
    // Cleanup will be executed after the bundle is complete because there are no more pending
    // timers for the window
    operator.invokeFinishBundle();
    assertThat(cleanupQueue, hasSize(0));
    assertThat(testHarness.numKeyedStateEntries(), is(0));
    testHarness.close();
    verifyNoMoreInteractions(receiver);
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) TupleTag(org.apache.beam.sdk.values.TupleTag) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) ByteBuffer(java.nio.ByteBuffer) StateNamespace(org.apache.beam.runners.core.StateNamespace) NoopLock(org.apache.beam.sdk.util.NoopLock) Lock(java.util.concurrent.locks.Lock) TimerInternals(org.apache.beam.runners.core.TimerInternals) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Timer(org.apache.beam.runners.core.construction.Timer) MutableObject(org.apache.beam.repackaged.core.org.apache.commons.lang3.mutable.MutableObject) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Watermark(org.apache.flink.streaming.api.watermark.Watermark)

Example 5 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method addTimer.

private void addTimer(WorkItem.Builder workItem, IntervalWindow window, Instant timestamp, Windmill.Timer.Type type) {
    StateNamespace namespace = StateNamespaces.window(windowCoder, window);
    workItem.getTimersBuilder().addTimersBuilder().setTag(WindmillTimerInternals.timerTag(WindmillNamespacePrefix.SYSTEM_NAMESPACE_PREFIX, TimerData.of(namespace, timestamp, timestamp, type == Windmill.Timer.Type.WATERMARK ? TimeDomain.EVENT_TIME : TimeDomain.PROCESSING_TIME))).setTimestamp(WindmillTimeUtils.harnessToWindmillTimestamp(timestamp)).setType(type).setStateFamily(STATE_FAMILY);
}
Also used : StateNamespace(org.apache.beam.runners.core.StateNamespace)

Aggregations

StateNamespace (org.apache.beam.runners.core.StateNamespace)43 Test (org.junit.Test)30 Instant (org.joda.time.Instant)20 StateNamespaceForTest (org.apache.beam.runners.core.StateNamespaceForTest)15 TimerInternals (org.apache.beam.runners.core.TimerInternals)15 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)13 SamzaPipelineOptions (org.apache.beam.runners.samza.SamzaPipelineOptions)11 ByteArray (org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray)11 StateValue (org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)11 BagState (org.apache.beam.sdk.state.BagState)10 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)5 ValueState (org.apache.beam.sdk.state.ValueState)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 ByteBuffer (java.nio.ByteBuffer)3 StateNamespaces (org.apache.beam.runners.core.StateNamespaces)3 WindowNamespace (org.apache.beam.runners.core.StateNamespaces.WindowNamespace)3 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)3 StreamRecordStripper.stripStreamRecordFromWindowedValue (org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue)3 Coder (org.apache.beam.sdk.coders.Coder)3