Search in sources :

Example 51 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class ExecutableStageDoFnOperatorTest method testWatermarkHandling.

@Test
public void testWatermarkHandling() throws Exception {
    TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
    DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
    ExecutableStageDoFnOperator<KV<String, Integer>, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))), StringUtf8Coder.of(), WindowedValue.getFullCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), IntervalWindow.getCoder()));
    KeyedOneInputStreamOperatorTestHarness<String, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(operator, val -> val.getValue().getKey(), new CoderTypeInformation<>(StringUtf8Coder.of(), FlinkPipelineOptions.defaults()));
    RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
    when(bundle.getInputReceivers()).thenReturn(ImmutableMap.<String, FnDataReceiver<WindowedValue>>builder().put("input", Mockito.mock(FnDataReceiver.class)).build());
    when(bundle.getTimerReceivers()).thenReturn(ImmutableMap.<KV<String, String>, FnDataReceiver<WindowedValue>>builder().put(KV.of("transform", "timer"), Mockito.mock(FnDataReceiver.class)).put(KV.of("transform", "timer2"), Mockito.mock(FnDataReceiver.class)).put(KV.of("transform", "timer3"), Mockito.mock(FnDataReceiver.class)).build());
    when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
    testHarness.open();
    assertThat(operator.getCurrentOutputWatermark(), is(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis()));
    // No bundle has been started, watermark can be freely advanced
    testHarness.processWatermark(0);
    assertThat(operator.getCurrentOutputWatermark(), is(0L));
    // Trigger a new bundle
    IntervalWindow intervalWindow = new IntervalWindow(new Instant(0), new Instant(9));
    WindowedValue<KV<String, Integer>> windowedValue = WindowedValue.of(KV.of("one", 1), Instant.now(), intervalWindow, PaneInfo.NO_FIRING);
    testHarness.processElement(new StreamRecord<>(windowedValue));
    // The output watermark should be held back during the bundle
    testHarness.processWatermark(1);
    assertThat(operator.getEffectiveInputWatermark(), is(1L));
    assertThat(operator.getCurrentOutputWatermark(), is(0L));
    // After the bundle has been finished, the watermark should be advanced
    operator.invokeFinishBundle();
    assertThat(operator.getCurrentOutputWatermark(), is(1L));
    // Bundle finished, watermark can be freely advanced
    testHarness.processWatermark(2);
    assertThat(operator.getEffectiveInputWatermark(), is(2L));
    assertThat(operator.getCurrentOutputWatermark(), is(2L));
    // Trigger a new bundle
    testHarness.processElement(new StreamRecord<>(windowedValue));
    // cleanup timer
    assertThat(testHarness.numEventTimeTimers(), is(1));
    // Set at timer
    Instant timerTarget = new Instant(5);
    Instant timerTarget2 = new Instant(6);
    operator.getLockToAcquireForStateAccessDuringBundles().lock();
    BiConsumer<String, Instant> timerConsumer = (timerId, timestamp) -> operator.setTimer(Timer.of(windowedValue.getValue().getKey(), "", windowedValue.getWindows(), timestamp, timestamp, PaneInfo.NO_FIRING), TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId("transform", timerId), StateNamespaces.window(IntervalWindow.getCoder(), intervalWindow), timestamp, timestamp, TimeDomain.EVENT_TIME));
    timerConsumer.accept("timer", timerTarget);
    timerConsumer.accept("timer2", timerTarget2);
    assertThat(testHarness.numEventTimeTimers(), is(3));
    // Advance input watermark past the timer
    // Check the output watermark is held back
    long targetWatermark = timerTarget.getMillis() + 100;
    testHarness.processWatermark(targetWatermark);
    // Do not yet advance the output watermark because we are still processing a bundle
    assertThat(testHarness.numEventTimeTimers(), is(3));
    assertThat(operator.getCurrentOutputWatermark(), is(2L));
    // Check that the timers are fired but the output watermark is advanced no further than
    // the minimum timer timestamp of the previous bundle because we are still processing a
    // bundle which might contain more timers.
    // Timers can create loops if they keep rescheduling themselves when firing
    // Thus, we advance the watermark asynchronously to allow for checkpointing to run
    operator.invokeFinishBundle();
    assertThat(testHarness.numEventTimeTimers(), is(3));
    testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
    assertThat(testHarness.numEventTimeTimers(), is(0));
    assertThat(operator.getCurrentOutputWatermark(), is(5L));
    // Output watermark is advanced synchronously when the bundle finishes,
    // no more timers are scheduled
    operator.invokeFinishBundle();
    assertThat(operator.getCurrentOutputWatermark(), is(targetWatermark));
    assertThat(testHarness.numEventTimeTimers(), is(0));
    // Watermark is advanced in a blocking fashion on close, not via a timers
    // Create a bundle with a pending timer to simulate that
    testHarness.processElement(new StreamRecord<>(windowedValue));
    timerConsumer.accept("timer3", new Instant(targetWatermark));
    assertThat(testHarness.numEventTimeTimers(), is(1));
    // This should be blocking until the watermark reaches Long.MAX_VALUE.
    testHarness.close();
    assertThat(testHarness.numEventTimeTimers(), is(0));
    assertThat(operator.getCurrentOutputWatermark(), is(Long.MAX_VALUE));
}
Also used : Arrays(java.util.Arrays) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) InMemoryStateInternals(org.apache.beam.runners.core.InMemoryStateInternals) FlinkExecutableStageContextFactory(org.apache.beam.runners.flink.translation.functions.FlinkExecutableStageContextFactory) CoderUtils(org.apache.beam.sdk.util.CoderUtils) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) TimerInternals(org.apache.beam.runners.core.TimerInternals) OneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness) Mockito.doThrow(org.mockito.Mockito.doThrow) MockitoAnnotations(org.mockito.MockitoAnnotations) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Map(java.util.Map) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) CoderTypeInformation(org.apache.beam.runners.flink.translation.types.CoderTypeInformation) KvCoder(org.apache.beam.sdk.coders.KvCoder) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) PAR_DO_TRANSFORM_URN(org.apache.beam.runners.core.construction.PTransformTranslation.PAR_DO_TRANSFORM_URN) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) SerializationUtils(org.apache.beam.repackaged.core.org.apache.commons.lang3.SerializationUtils) Struct(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Struct) OutputTag(org.apache.flink.util.OutputTag) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) StandardCharsets(java.nio.charset.StandardCharsets) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) Matchers.any(org.mockito.Matchers.any) Matchers.is(org.hamcrest.Matchers.is) StatefulDoFnRunner(org.apache.beam.runners.core.StatefulDoFnRunner) KV(org.apache.beam.sdk.values.KV) Mock(org.mockito.Mock) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) ExecutableStagePayload(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload) StateTags(org.apache.beam.runners.core.StateTags) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) TupleTag(org.apache.beam.sdk.values.TupleTag) BiConsumer(java.util.function.BiConsumer) Matchers.hasSize(org.hamcrest.Matchers.hasSize) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) Before(org.junit.Before) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) StateRequestHandlers(org.apache.beam.runners.fnexecution.state.StateRequestHandlers) Test(org.junit.Test) BundleCheckpointHandler(org.apache.beam.runners.fnexecution.control.BundleCheckpointHandler) Assert.assertNotEquals(org.junit.Assert.assertNotEquals) NoopLock(org.apache.beam.sdk.util.NoopLock) Lock(java.util.concurrent.locks.Lock) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) Timer(org.apache.beam.runners.core.construction.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) VarIntCoder(org.apache.beam.sdk.coders.VarIntCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) TimeDomain(org.apache.beam.sdk.state.TimeDomain) Assert.assertEquals(org.junit.Assert.assertEquals) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) StateNamespace(org.apache.beam.runners.core.StateNamespace) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) IsIterableContainingInOrder.contains(org.hamcrest.collection.IsIterableContainingInOrder.contains) FlinkPipelineOptions(org.apache.beam.runners.flink.FlinkPipelineOptions) ByteBuffer(java.nio.ByteBuffer) Mockito.verifyNoMoreInteractions(org.mockito.Mockito.verifyNoMoreInteractions) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) BundleFinalizationHandler(org.apache.beam.runners.fnexecution.control.BundleFinalizationHandler) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) Collection(java.util.Collection) DistributedCache(org.apache.flink.api.common.cache.DistributedCache) List(java.util.List) InstructionRequestHandler(org.apache.beam.runners.fnexecution.control.InstructionRequestHandler) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) Whitebox(org.powermock.reflect.Whitebox) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Coder(org.apache.beam.sdk.coders.Coder) Watermark(org.apache.flink.streaming.api.watermark.Watermark) HashMap(java.util.HashMap) MutableObject(org.apache.beam.repackaged.core.org.apache.commons.lang3.mutable.MutableObject) StateNamespaces(org.apache.beam.runners.core.StateNamespaces) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) StreamRecord(org.apache.flink.streaming.runtime.streamrecord.StreamRecord) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ExpectedException(org.junit.rules.ExpectedException) Nullable(org.checkerframework.checker.nullness.qual.Nullable) DoFnRunnerWithMetricsUpdate(org.apache.beam.runners.flink.metrics.DoFnRunnerWithMetricsUpdate) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) ByteStringCoder(org.apache.beam.runners.fnexecution.wire.ByteStringCoder) Assert.assertNotNull(org.junit.Assert.assertNotNull) Charsets(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Charsets) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) Mockito.when(org.mockito.Mockito.when) JUnit4(org.junit.runners.JUnit4) KeyedStateBackend(org.apache.flink.runtime.state.KeyedStateBackend) Mockito.verify(org.mockito.Mockito.verify) Mockito(org.mockito.Mockito) BagState(org.apache.beam.sdk.state.BagState) Rule(org.junit.Rule) Instant(org.joda.time.Instant) Collections(java.util.Collections) FlinkStateInternalsTest(org.apache.beam.runners.flink.streaming.FlinkStateInternalsTest) ExecutableStageContext(org.apache.beam.runners.fnexecution.control.ExecutableStageContext) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) KV(org.apache.beam.sdk.values.KV) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test) FlinkStateInternalsTest(org.apache.beam.runners.flink.streaming.FlinkStateInternalsTest)

Example 52 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class ExecutableStageDoFnOperatorTest method testEnsureDeferredStateCleanupTimerFiring.

private void testEnsureDeferredStateCleanupTimerFiring(boolean withCheckpointing) throws Exception {
    TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
    DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
    StringUtf8Coder keyCoder = StringUtf8Coder.of();
    WindowingStrategy windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(1000)));
    KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
    ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowingStrategy.getWindowFn().windowCoder()));
    @SuppressWarnings("unchecked") RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
    when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
    KV<String, String> timerInputKey = KV.of("transformId", "timerId");
    AtomicBoolean timerInputReceived = new AtomicBoolean();
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(1000));
    IntervalWindow.IntervalWindowCoder windowCoder = IntervalWindow.IntervalWindowCoder.of();
    WindowedValue<KV<String, Integer>> windowedValue = WindowedValue.of(KV.of("one", 1), window.maxTimestamp(), ImmutableList.of(window), PaneInfo.NO_FIRING);
    FnDataReceiver receiver = Mockito.mock(FnDataReceiver.class);
    FnDataReceiver<Timer> timerReceiver = Mockito.mock(FnDataReceiver.class);
    doAnswer((invocation) -> {
        timerInputReceived.set(true);
        return null;
    }).when(timerReceiver).accept(any());
    when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));
    when(bundle.getTimerReceivers()).thenReturn(ImmutableMap.of(timerInputKey, timerReceiver));
    KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
    testHarness.open();
    Lock stateBackendLock = Whitebox.getInternalState(operator, "stateBackendLock");
    stateBackendLock.lock();
    KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
    ByteBuffer key = FlinkKeyUtils.encodeKey(windowedValue.getValue().getKey(), keyCoder);
    keyedStateBackend.setCurrentKey(key);
    DoFnOperator.FlinkTimerInternals timerInternals = Whitebox.getInternalState(operator, "timerInternals");
    Object doFnRunner = Whitebox.getInternalState(operator, "doFnRunner");
    Object delegate = Whitebox.getInternalState(doFnRunner, "delegate");
    Object stateCleaner = Whitebox.getInternalState(delegate, "stateCleaner");
    Collection<?> cleanupQueue = Whitebox.getInternalState(stateCleaner, "cleanupQueue");
    // create some state which can be cleaned up
    assertThat(testHarness.numKeyedStateEntries(), is(0));
    StateNamespace stateNamespace = StateNamespaces.window(windowCoder, window);
    // State from the SDK Harness is stored as ByteStrings
    BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
    state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
    assertThat(testHarness.numKeyedStateEntries(), is(1));
    // user timer that fires after the end of the window and after state cleanup
    TimerInternals.TimerData userTimer = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
    timerInternals.setTimer(userTimer);
    // start of bundle
    testHarness.processElement(new StreamRecord<>(windowedValue));
    verify(receiver).accept(windowedValue);
    // move watermark past user timer while bundle is in progress
    testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(1)).getMillis()));
    // Output watermark is held back and timers do not yet fire (they can still be changed!)
    assertThat(timerInputReceived.get(), is(false));
    assertThat(operator.getCurrentOutputWatermark(), is(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis()));
    // The timer fires on bundle finish
    operator.invokeFinishBundle();
    assertThat(timerInputReceived.getAndSet(false), is(true));
    // Move watermark past the cleanup timer
    testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(2)).getMillis()));
    operator.invokeFinishBundle();
    // Cleanup timer has fired and cleanup queue is prepared for bundle finish
    assertThat(testHarness.numEventTimeTimers(), is(0));
    assertThat(testHarness.numKeyedStateEntries(), is(1));
    assertThat(cleanupQueue, hasSize(1));
    // Cleanup timer are rescheduled if a new timer is created during the bundle
    TimerInternals.TimerData userTimer2 = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
    operator.setTimer(Timer.of(windowedValue.getValue().getKey(), "", windowedValue.getWindows(), window.maxTimestamp(), window.maxTimestamp(), PaneInfo.NO_FIRING), userTimer2);
    assertThat(testHarness.numEventTimeTimers(), is(1));
    if (withCheckpointing) {
        // Upon checkpointing, the bundle will be finished.
        testHarness.snapshot(0, 0);
    } else {
        operator.invokeFinishBundle();
    }
    // Cleanup queue has been processed and cleanup timer has been re-added due to pending timers
    // for the window.
    assertThat(cleanupQueue, hasSize(0));
    verifyNoMoreInteractions(receiver);
    assertThat(testHarness.numKeyedStateEntries(), is(2));
    assertThat(testHarness.numEventTimeTimers(), is(2));
    // No timer has been fired but bundle should be ended
    assertThat(timerInputReceived.get(), is(false));
    assertThat(Whitebox.getInternalState(operator, "bundleStarted"), is(false));
    // Allow user timer and cleanup timer to fire by triggering watermark advancement
    testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
    assertThat(timerInputReceived.getAndSet(false), is(true));
    assertThat(cleanupQueue, hasSize(1));
    // Cleanup will be executed after the bundle is complete because there are no more pending
    // timers for the window
    operator.invokeFinishBundle();
    assertThat(cleanupQueue, hasSize(0));
    assertThat(testHarness.numKeyedStateEntries(), is(0));
    testHarness.close();
    verifyNoMoreInteractions(receiver);
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) TupleTag(org.apache.beam.sdk.values.TupleTag) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) ByteBuffer(java.nio.ByteBuffer) StateNamespace(org.apache.beam.runners.core.StateNamespace) NoopLock(org.apache.beam.sdk.util.NoopLock) Lock(java.util.concurrent.locks.Lock) TimerInternals(org.apache.beam.runners.core.TimerInternals) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Timer(org.apache.beam.runners.core.construction.Timer) MutableObject(org.apache.beam.repackaged.core.org.apache.commons.lang3.mutable.MutableObject) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Watermark(org.apache.flink.streaming.api.watermark.Watermark)

Example 53 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class WindowDoFnOperatorTest method testRestore.

@Test
public void testRestore() throws Exception {
    // test harness
    KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KeyedWorkItem<Long, Long>>, WindowedValue<KV<Long, Long>>> testHarness = createTestHarness(getWindowDoFnOperator());
    testHarness.open();
    // process elements
    IntervalWindow window = new IntervalWindow(new Instant(0), Duration.millis(10_000));
    testHarness.processWatermark(0L);
    testHarness.processElement(Item.builder().key(1L).timestamp(1L).value(100L).window(window).build().toStreamRecord());
    testHarness.processElement(Item.builder().key(1L).timestamp(2L).value(20L).window(window).build().toStreamRecord());
    testHarness.processElement(Item.builder().key(2L).timestamp(3L).value(77L).window(window).build().toStreamRecord());
    // create snapshot
    OperatorSubtaskState snapshot = testHarness.snapshot(0, 0);
    testHarness.close();
    // restore from the snapshot
    testHarness = createTestHarness(getWindowDoFnOperator());
    testHarness.initializeState(snapshot);
    testHarness.open();
    // close window
    testHarness.processWatermark(10_000L);
    Iterable<WindowedValue<KV<Long, Long>>> output = stripStreamRecordFromWindowedValue(testHarness.getOutput());
    assertEquals(2, Iterables.size(output));
    assertThat(output, containsInAnyOrder(WindowedValue.of(KV.of(1L, 120L), new Instant(9_999), window, PaneInfo.createPane(true, true, ON_TIME)), WindowedValue.of(KV.of(2L, 77L), new Instant(9_999), window, PaneInfo.createPane(true, true, ON_TIME))));
    // cleanup
    testHarness.close();
}
Also used : WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) Instant(org.joda.time.Instant) ByteBuffer(java.nio.ByteBuffer) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) OperatorSubtaskState(org.apache.flink.runtime.checkpoint.OperatorSubtaskState) Test(org.junit.Test)

Example 54 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class WindowDoFnOperatorTest method testTimerCleanupOfPendingTimerList.

@Test
public void testTimerCleanupOfPendingTimerList() throws Exception {
    // test harness
    WindowDoFnOperator<Long, Long, Long> windowDoFnOperator = getWindowDoFnOperator();
    KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KeyedWorkItem<Long, Long>>, WindowedValue<KV<Long, Long>>> testHarness = createTestHarness(windowDoFnOperator);
    testHarness.open();
    DoFnOperator<KeyedWorkItem<Long, Long>, KV<Long, Long>>.FlinkTimerInternals timerInternals = windowDoFnOperator.timerInternals;
    // process elements
    IntervalWindow window = new IntervalWindow(new Instant(0), Duration.millis(100));
    IntervalWindow window2 = new IntervalWindow(new Instant(100), Duration.millis(100));
    testHarness.processWatermark(0L);
    // Use two different keys to check for correct watermark hold calculation
    testHarness.processElement(Item.builder().key(1L).timestamp(1L).value(100L).window(window).build().toStreamRecord());
    testHarness.processElement(Item.builder().key(2L).timestamp(150L).value(150L).window(window2).build().toStreamRecord());
    testHarness.processWatermark(1);
    // Note that the following is 1 because the state is key-partitioned
    assertThat(Iterables.size(timerInternals.pendingTimersById.keys()), is(1));
    assertThat(testHarness.numKeyedStateEntries(), is(6));
    assertThat(windowDoFnOperator.getCurrentOutputWatermark(), is(1L));
    // close window
    testHarness.processWatermark(100L);
    // Note that the following is zero because we only the first key is active
    assertThat(Iterables.size(timerInternals.pendingTimersById.keys()), is(0));
    assertThat(testHarness.numKeyedStateEntries(), is(3));
    assertThat(windowDoFnOperator.getCurrentOutputWatermark(), is(100L));
    testHarness.processWatermark(200L);
    // All the state has been cleaned up
    assertThat(testHarness.numKeyedStateEntries(), is(0));
    assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), containsInAnyOrder(WindowedValue.of(KV.of(1L, 100L), new Instant(99), window, PaneInfo.createPane(true, true, ON_TIME)), WindowedValue.of(KV.of(2L, 150L), new Instant(199), window2, PaneInfo.createPane(true, true, ON_TIME))));
    // cleanup
    testHarness.close();
}
Also used : WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) Instant(org.joda.time.Instant) ByteBuffer(java.nio.ByteBuffer) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 55 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class StreamingSideInputDoFnRunnerTest method testSideInputNotReady.

@Test
public void testSideInputNotReady() throws Exception {
    PCollectionView<String> view = createView();
    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.<Windmill.GlobalDataId>asList());
    when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN))).thenReturn(false);
    ListOutputManager outputManager = new ListOutputManager();
    List<PCollectionView<String>> views = Arrays.asList(view);
    StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(outputManager, views, sideInputFetcher);
    runner.startBundle();
    runner.processElement(createDatum("e", 0));
    runner.finishBundle();
    assertTrue(outputManager.getOutput(mainOutputTag).isEmpty());
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
    // Verify that we added the element to an appropriate tag list, and that we buffered the element
    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState = state.state(StateNamespaces.global(), StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));
    assertEquals(blockedMapState.read(), Collections.singletonMap(window, Collections.singleton(Windmill.GlobalDataRequest.newBuilder().setDataId(Windmill.GlobalDataId.newBuilder().setTag(view.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window))).build()).setExistenceWatermarkDeadline(9000).build())));
    assertThat(sideInputFetcher.elementBag(createWindow(0)).read(), Matchers.contains(createDatum("e", 0)));
    assertEquals(sideInputFetcher.watermarkHold(createWindow(0)).read(), new Instant(0));
}
Also used : Instant(org.joda.time.Instant) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PCollectionView(org.apache.beam.sdk.values.PCollectionView) GlobalDataRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Aggregations

IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)238 Test (org.junit.Test)214 Instant (org.joda.time.Instant)213 WindowedValue (org.apache.beam.sdk.util.WindowedValue)67 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)56 KV (org.apache.beam.sdk.values.KV)56 Duration (org.joda.time.Duration)33 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)32 WindowMatchers.isSingleWindowedValue (org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue)20 WindowMatchers.isWindowedValue (org.apache.beam.runners.core.WindowMatchers.isWindowedValue)20 ArrayList (java.util.ArrayList)16 TupleTag (org.apache.beam.sdk.values.TupleTag)16 HashMap (java.util.HashMap)14 PCollectionView (org.apache.beam.sdk.values.PCollectionView)14 Category (org.junit.experimental.categories.Category)13 MetricsContainerImpl (org.apache.beam.runners.core.metrics.MetricsContainerImpl)12 FixedWindows (org.apache.beam.sdk.transforms.windowing.FixedWindows)12 ByteBuffer (java.nio.ByteBuffer)11 Map (java.util.Map)11 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)11