Search in sources :

Example 36 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class ExecutableStageDoFnOperatorTest method testEnsureStateCleanupOnFinalWatermark.

@Test
public void testEnsureStateCleanupOnFinalWatermark() throws Exception {
    TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
    DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
    StringUtf8Coder keyCoder = StringUtf8Coder.of();
    WindowingStrategy windowingStrategy = WindowingStrategy.globalDefault();
    Coder<BoundedWindow> windowCoder = windowingStrategy.getWindowFn().windowCoder();
    KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
    ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowCoder));
    KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
    RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
    when(bundle.getInputReceivers()).thenReturn(ImmutableMap.<String, FnDataReceiver<WindowedValue>>builder().put("input", Mockito.mock(FnDataReceiver.class)).build());
    when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
    testHarness.open();
    KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
    ByteBuffer key = FlinkKeyUtils.encodeKey("key1", keyCoder);
    keyedStateBackend.setCurrentKey(key);
    // create some state which can be cleaned up
    assertThat(testHarness.numKeyedStateEntries(), is(0));
    StateNamespace stateNamespace = StateNamespaces.window(windowCoder, GlobalWindow.INSTANCE);
    // State from the SDK Harness is stored as ByteStrings
    BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
    state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
    // No timers have been set for cleanup
    assertThat(testHarness.numEventTimeTimers(), is(0));
    // State has been created
    assertThat(testHarness.numKeyedStateEntries(), is(1));
    // Generate final watermark to trigger state cleanup
    testHarness.processWatermark(new Watermark(BoundedWindow.TIMESTAMP_MAX_VALUE.plus(Duration.millis(1)).getMillis()));
    assertThat(testHarness.numKeyedStateEntries(), is(0));
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) TupleTag(org.apache.beam.sdk.values.TupleTag) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) ByteBuffer(java.nio.ByteBuffer) StateNamespace(org.apache.beam.runners.core.StateNamespace) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Watermark(org.apache.flink.streaming.api.watermark.Watermark) Test(org.junit.Test) FlinkStateInternalsTest(org.apache.beam.runners.flink.streaming.FlinkStateInternalsTest)

Example 37 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class UserParDoFnFactoryTest method testCleanupWorks.

@Test
public void testCleanupWorks() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.create();
    CounterSet counters = new CounterSet();
    DoFn<?, ?> initialFn = new TestStatefulDoFn();
    CloudObject cloudObject = getCloudObject(initialFn, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
    StateInternals stateInternals = InMemoryStateInternals.forKey("dummy");
    // The overarching step context that only ParDoFn gets
    DataflowStepContext stepContext = mock(DataflowStepContext.class);
    // The user step context that the DoFnRunner gets a handle on
    DataflowStepContext userStepContext = mock(DataflowStepContext.class);
    when(stepContext.namespacedToUser()).thenReturn(userStepContext);
    when(stepContext.stateInternals()).thenReturn(stateInternals);
    when(userStepContext.stateInternals()).thenReturn((StateInternals) stateInternals);
    DataflowExecutionContext<DataflowStepContext> executionContext = mock(DataflowExecutionContext.class);
    TestOperationContext operationContext = TestOperationContext.create(counters);
    when(executionContext.getStepContext(operationContext)).thenReturn(stepContext);
    when(executionContext.getSideInputReader(any(), any(), any())).thenReturn(NullSideInputReader.empty());
    ParDoFn parDoFn = factory.create(options, cloudObject, Collections.emptyList(), MAIN_OUTPUT, ImmutableMap.of(MAIN_OUTPUT, 0), executionContext, operationContext);
    Receiver rcvr = new OutputReceiver();
    parDoFn.startBundle(rcvr);
    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(9));
    IntervalWindow secondWindow = new IntervalWindow(new Instant(10), new Instant(19));
    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
    StateNamespace firstWindowNamespace = StateNamespaces.window(windowCoder, firstWindow);
    StateNamespace secondWindowNamespace = StateNamespaces.window(windowCoder, secondWindow);
    StateTag<ValueState<String>> tag = StateTags.tagForSpec(TestStatefulDoFn.STATE_ID, StateSpecs.value(StringUtf8Coder.of()));
    // Set up non-empty state. We don't mock + verify calls to clear() but instead
    // check that state is actually empty. We musn't care how it is accomplished.
    stateInternals.state(firstWindowNamespace, tag).write("first");
    stateInternals.state(secondWindowNamespace, tag).write("second");
    when(userStepContext.getNextFiredTimer(windowCoder)).thenReturn(null);
    when(stepContext.getNextFiredTimer(windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, firstWindowNamespace, firstWindow.maxTimestamp().plus(Duration.millis(1L)), firstWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
    // This should fire the timer to clean up the first window
    parDoFn.processTimers();
    assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), equalTo("second"));
    when(stepContext.getNextFiredTimer((Coder) windowCoder)).thenReturn(TimerData.of(SimpleParDoFn.CLEANUP_TIMER_ID, secondWindowNamespace, secondWindow.maxTimestamp().plus(Duration.millis(1L)), secondWindow.maxTimestamp().plus(Duration.millis(1L)), TimeDomain.EVENT_TIME)).thenReturn(null);
    // And this should clean up the second window
    parDoFn.processTimers();
    assertThat(stateInternals.state(firstWindowNamespace, tag).read(), nullValue());
    assertThat(stateInternals.state(secondWindowNamespace, tag).read(), nullValue());
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Instant(org.joda.time.Instant) Receiver(org.apache.beam.runners.dataflow.worker.util.common.worker.Receiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) DataflowStepContext(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowStepContext) StateNamespace(org.apache.beam.runners.core.StateNamespace) ValueState(org.apache.beam.sdk.state.ValueState) CounterSet(org.apache.beam.runners.dataflow.worker.counters.CounterSet) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) StateInternals(org.apache.beam.runners.core.StateInternals) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 38 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class WindmillTimerInternalsTest method testTimerDataToFromTimer.

@Test
public void testTimerDataToFromTimer() {
    for (String stateFamily : TEST_STATE_FAMILIES) {
        for (KV<Coder<? extends BoundedWindow>, StateNamespace> coderAndNamespace : TEST_NAMESPACES_WITH_CODERS) {
            @Nullable Coder<? extends BoundedWindow> coder = coderAndNamespace.getKey();
            StateNamespace namespace = coderAndNamespace.getValue();
            for (TimeDomain timeDomain : TimeDomain.values()) {
                for (WindmillNamespacePrefix prefix : WindmillNamespacePrefix.values()) {
                    for (Instant timestamp : TEST_TIMESTAMPS) {
                        List<TimerData> anonymousTimers = ImmutableList.of(TimerData.of(namespace, timestamp, timestamp, timeDomain), TimerData.of(namespace, timestamp, timestamp.minus(Duration.millis(1)), timeDomain));
                        for (TimerData timer : anonymousTimers) {
                            assertThat(WindmillTimerInternals.windmillTimerToTimerData(prefix, WindmillTimerInternals.timerDataToWindmillTimer(stateFamily, prefix, timer), coder), equalTo(timer));
                        }
                        for (String timerId : TEST_TIMER_IDS) {
                            List<TimerData> timers = ImmutableList.of(TimerData.of(timerId, namespace, timestamp, timestamp, timeDomain), TimerData.of(timerId, "family", namespace, timestamp, timestamp, timeDomain), TimerData.of(timerId, namespace, timestamp, timestamp.minus(Duration.millis(1)), timeDomain), TimerData.of(timerId, "family", namespace, timestamp, timestamp.minus(Duration.millis(1)), timeDomain));
                            for (TimerData timer : timers) {
                                assertThat(WindmillTimerInternals.windmillTimerToTimerData(prefix, WindmillTimerInternals.timerDataToWindmillTimer(stateFamily, prefix, timer), coder), equalTo(timer));
                            }
                        }
                    }
                }
            }
        }
    }
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) Instant(org.joda.time.Instant) TimerData(org.apache.beam.runners.core.TimerInternals.TimerData) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TimeDomain(org.apache.beam.sdk.state.TimeDomain) StateNamespace(org.apache.beam.runners.core.StateNamespace) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Test(org.junit.Test)

Example 39 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class PipelineTranslatorUtils method fireTimer.

private static void fireTimer(TimerInternals.TimerData timer, Map<KV<String, String>, FnDataReceiver<Timer>> timerReceivers, Object currentTimerKey) {
    StateNamespace namespace = timer.getNamespace();
    Preconditions.checkArgument(namespace instanceof StateNamespaces.WindowNamespace);
    BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();
    Instant timestamp = timer.getTimestamp();
    Instant outputTimestamp = timer.getOutputTimestamp();
    Timer<?> timerValue = Timer.of(currentTimerKey, timer.getTimerId(), Collections.singletonList(window), timestamp, outputTimestamp, PaneInfo.NO_FIRING);
    KV<String, String> transformAndTimerId = TimerReceiverFactory.decodeTimerDataTimerId(timer.getTimerFamilyId());
    FnDataReceiver<Timer> fnTimerReceiver = timerReceivers.get(transformAndTimerId);
    Preconditions.checkNotNull(fnTimerReceiver, "No FnDataReceiver found for %s", transformAndTimerId);
    try {
        fnTimerReceiver.accept(timerValue);
    } catch (Exception e) {
        throw new RuntimeException(String.format(Locale.ENGLISH, "Failed to process timer: %s", timerValue));
    }
}
Also used : Instant(org.joda.time.Instant) StateNamespace(org.apache.beam.runners.core.StateNamespace) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) IOException(java.io.IOException) StateNamespaces(org.apache.beam.runners.core.StateNamespaces) Timer(org.apache.beam.runners.core.construction.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow)

Example 40 with StateNamespace

use of org.apache.beam.runners.core.StateNamespace in project beam by apache.

the class SamzaTimerInternalsFactoryTest method testNewTimersAreInsertedInOrder.

@Test
public void testNewTimersAreInsertedInOrder() {
    final SamzaPipelineOptions pipelineOptions = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
    pipelineOptions.setEventTimerBufferSize(5);
    final KeyValueStore<ByteArray, StateValue<?>> store = createStore();
    final SamzaTimerInternalsFactory<String> timerInternalsFactory = createTimerInternalsFactory(null, "timer", pipelineOptions, store);
    final StateNamespace nameSpace = StateNamespaces.global();
    final TimerInternals timerInternals = timerInternalsFactory.timerInternalsForKey("testKey");
    // timers in store now are timestamped from 0 - 9.
    for (int i = 0; i < 10; i++) {
        timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
    }
    // fire the first 2 timers.
    // timers in memory now are timestamped from 2 - 4;
    // timers in store now are timestamped from 2 - 9.
    Collection<KeyedTimerData<String>> readyTimers;
    timerInternalsFactory.setInputWatermark(new Instant(1));
    long lastTimestamp = 0;
    readyTimers = timerInternalsFactory.removeReadyTimers();
    for (KeyedTimerData<String> keyedTimerData : readyTimers) {
        final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
        assertTrue(lastTimestamp <= currentTimeStamp);
        lastTimestamp = currentTimeStamp;
    }
    assertEquals(2, readyTimers.size());
    // prefixed with timer, timestamp is in order;
    for (int i = 0; i < 3; i++) {
        timerInternals.setTimer(nameSpace, "timer" + i, "", new Instant(i), new Instant(i), TimeDomain.EVENT_TIME);
    }
    // there are 11 timers in state now.
    // watermark 5 comes, so 6 timers will be evicted because their timestamp is less than 5.
    // memory will be reloaded once to have 5 to 8 left (reload to have 4 to 8, but 4 is evicted), 5
    // to 9 left in store.
    // all of them are in order for firing.
    timerInternalsFactory.setInputWatermark(new Instant(5));
    lastTimestamp = 0;
    readyTimers = timerInternalsFactory.removeReadyTimers();
    for (KeyedTimerData<String> keyedTimerData : readyTimers) {
        final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
        assertTrue(lastTimestamp <= currentTimeStamp);
        lastTimestamp = currentTimeStamp;
    }
    assertEquals(6, readyTimers.size());
    assertEquals(4, timerInternalsFactory.getEventTimeBuffer().size());
    // watermark 10 comes, so all timers will be evicted in order.
    timerInternalsFactory.setInputWatermark(new Instant(10));
    readyTimers = timerInternalsFactory.removeReadyTimers();
    for (KeyedTimerData<String> keyedTimerData : readyTimers) {
        final long currentTimeStamp = keyedTimerData.getTimerData().getTimestamp().getMillis();
        assertTrue(lastTimestamp <= currentTimeStamp);
        lastTimestamp = currentTimeStamp;
    }
    assertEquals(4, readyTimers.size());
    assertEquals(0, timerInternalsFactory.getEventTimeBuffer().size());
    store.close();
}
Also used : Instant(org.joda.time.Instant) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) StateNamespace(org.apache.beam.runners.core.StateNamespace) TimerInternals(org.apache.beam.runners.core.TimerInternals) ByteArray(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray) StateValue(org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) Test(org.junit.Test)

Aggregations

StateNamespace (org.apache.beam.runners.core.StateNamespace)43 Test (org.junit.Test)30 Instant (org.joda.time.Instant)20 StateNamespaceForTest (org.apache.beam.runners.core.StateNamespaceForTest)15 TimerInternals (org.apache.beam.runners.core.TimerInternals)15 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)13 SamzaPipelineOptions (org.apache.beam.runners.samza.SamzaPipelineOptions)11 ByteArray (org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.ByteArray)11 StateValue (org.apache.beam.runners.samza.runtime.SamzaStoreStateInternals.StateValue)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)11 BagState (org.apache.beam.sdk.state.BagState)10 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)5 ValueState (org.apache.beam.sdk.state.ValueState)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 ByteBuffer (java.nio.ByteBuffer)3 StateNamespaces (org.apache.beam.runners.core.StateNamespaces)3 WindowNamespace (org.apache.beam.runners.core.StateNamespaces.WindowNamespace)3 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)3 StreamRecordStripper.stripStreamRecordFromWindowedValue (org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue)3 Coder (org.apache.beam.sdk.coders.Coder)3