Search in sources :

Example 11 with WindowingStrategy

use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.

the class ExecutableStageDoFnOperatorTest method testEnsureDeferredStateCleanupTimerFiring.

private void testEnsureDeferredStateCleanupTimerFiring(boolean withCheckpointing) throws Exception {
    TupleTag<Integer> mainOutput = new TupleTag<>("main-output");
    DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory(mainOutput, VoidCoder.of(), new SerializablePipelineOptions(FlinkPipelineOptions.defaults()));
    StringUtf8Coder keyCoder = StringUtf8Coder.of();
    WindowingStrategy windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(1000)));
    KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, VarIntCoder.of());
    ExecutableStageDoFnOperator<Integer, Integer> operator = getOperator(mainOutput, Collections.emptyList(), outputManagerFactory, windowingStrategy, keyCoder, WindowedValue.getFullCoder(kvCoder, windowingStrategy.getWindowFn().windowCoder()));
    @SuppressWarnings("unchecked") RemoteBundle bundle = Mockito.mock(RemoteBundle.class);
    when(stageBundleFactory.getBundle(any(), any(), any(), any(), any(), any())).thenReturn(bundle);
    KV<String, String> timerInputKey = KV.of("transformId", "timerId");
    AtomicBoolean timerInputReceived = new AtomicBoolean();
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(1000));
    IntervalWindow.IntervalWindowCoder windowCoder = IntervalWindow.IntervalWindowCoder.of();
    WindowedValue<KV<String, Integer>> windowedValue = WindowedValue.of(KV.of("one", 1), window.maxTimestamp(), ImmutableList.of(window), PaneInfo.NO_FIRING);
    FnDataReceiver receiver = Mockito.mock(FnDataReceiver.class);
    FnDataReceiver<Timer> timerReceiver = Mockito.mock(FnDataReceiver.class);
    doAnswer((invocation) -> {
        timerInputReceived.set(true);
        return null;
    }).when(timerReceiver).accept(any());
    when(bundle.getInputReceivers()).thenReturn(ImmutableMap.of("input", receiver));
    when(bundle.getTimerReceivers()).thenReturn(ImmutableMap.of(timerInputKey, timerReceiver));
    KeyedOneInputStreamOperatorTestHarness<ByteBuffer, WindowedValue<KV<String, Integer>>, WindowedValue<Integer>> testHarness = new KeyedOneInputStreamOperatorTestHarness(operator, operator.keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
    testHarness.open();
    Lock stateBackendLock = Whitebox.getInternalState(operator, "stateBackendLock");
    stateBackendLock.lock();
    KeyedStateBackend<ByteBuffer> keyedStateBackend = operator.getKeyedStateBackend();
    ByteBuffer key = FlinkKeyUtils.encodeKey(windowedValue.getValue().getKey(), keyCoder);
    keyedStateBackend.setCurrentKey(key);
    DoFnOperator.FlinkTimerInternals timerInternals = Whitebox.getInternalState(operator, "timerInternals");
    Object doFnRunner = Whitebox.getInternalState(operator, "doFnRunner");
    Object delegate = Whitebox.getInternalState(doFnRunner, "delegate");
    Object stateCleaner = Whitebox.getInternalState(delegate, "stateCleaner");
    Collection<?> cleanupQueue = Whitebox.getInternalState(stateCleaner, "cleanupQueue");
    // create some state which can be cleaned up
    assertThat(testHarness.numKeyedStateEntries(), is(0));
    StateNamespace stateNamespace = StateNamespaces.window(windowCoder, window);
    // State from the SDK Harness is stored as ByteStrings
    BagState<ByteString> state = operator.keyedStateInternals.state(stateNamespace, StateTags.bag(stateId, ByteStringCoder.of()));
    state.add(ByteString.copyFrom("userstate".getBytes(Charsets.UTF_8)));
    assertThat(testHarness.numKeyedStateEntries(), is(1));
    // user timer that fires after the end of the window and after state cleanup
    TimerInternals.TimerData userTimer = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
    timerInternals.setTimer(userTimer);
    // start of bundle
    testHarness.processElement(new StreamRecord<>(windowedValue));
    verify(receiver).accept(windowedValue);
    // move watermark past user timer while bundle is in progress
    testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(1)).getMillis()));
    // Output watermark is held back and timers do not yet fire (they can still be changed!)
    assertThat(timerInputReceived.get(), is(false));
    assertThat(operator.getCurrentOutputWatermark(), is(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis()));
    // The timer fires on bundle finish
    operator.invokeFinishBundle();
    assertThat(timerInputReceived.getAndSet(false), is(true));
    // Move watermark past the cleanup timer
    testHarness.processWatermark(new Watermark(window.maxTimestamp().plus(Duration.millis(2)).getMillis()));
    operator.invokeFinishBundle();
    // Cleanup timer has fired and cleanup queue is prepared for bundle finish
    assertThat(testHarness.numEventTimeTimers(), is(0));
    assertThat(testHarness.numKeyedStateEntries(), is(1));
    assertThat(cleanupQueue, hasSize(1));
    // Cleanup timer are rescheduled if a new timer is created during the bundle
    TimerInternals.TimerData userTimer2 = TimerInternals.TimerData.of("", TimerReceiverFactory.encodeToTimerDataTimerId(timerInputKey.getKey(), timerInputKey.getValue()), stateNamespace, window.maxTimestamp(), window.maxTimestamp(), TimeDomain.EVENT_TIME);
    operator.setTimer(Timer.of(windowedValue.getValue().getKey(), "", windowedValue.getWindows(), window.maxTimestamp(), window.maxTimestamp(), PaneInfo.NO_FIRING), userTimer2);
    assertThat(testHarness.numEventTimeTimers(), is(1));
    if (withCheckpointing) {
        // Upon checkpointing, the bundle will be finished.
        testHarness.snapshot(0, 0);
    } else {
        operator.invokeFinishBundle();
    }
    // Cleanup queue has been processed and cleanup timer has been re-added due to pending timers
    // for the window.
    assertThat(cleanupQueue, hasSize(0));
    verifyNoMoreInteractions(receiver);
    assertThat(testHarness.numKeyedStateEntries(), is(2));
    assertThat(testHarness.numEventTimeTimers(), is(2));
    // No timer has been fired but bundle should be ended
    assertThat(timerInputReceived.get(), is(false));
    assertThat(Whitebox.getInternalState(operator, "bundleStarted"), is(false));
    // Allow user timer and cleanup timer to fire by triggering watermark advancement
    testHarness.setProcessingTime(testHarness.getProcessingTime() + 1);
    assertThat(timerInputReceived.getAndSet(false), is(true));
    assertThat(cleanupQueue, hasSize(1));
    // Cleanup will be executed after the bundle is complete because there are no more pending
    // timers for the window
    operator.invokeFinishBundle();
    assertThat(cleanupQueue, hasSize(0));
    assertThat(testHarness.numKeyedStateEntries(), is(0));
    testHarness.close();
    verifyNoMoreInteractions(receiver);
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) TupleTag(org.apache.beam.sdk.values.TupleTag) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) KeyedOneInputStreamOperatorTestHarness(org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness) WindowedValue(org.apache.beam.sdk.util.WindowedValue) StreamRecordStripper.stripStreamRecordFromWindowedValue(org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) ByteBuffer(java.nio.ByteBuffer) StateNamespace(org.apache.beam.runners.core.StateNamespace) NoopLock(org.apache.beam.sdk.util.NoopLock) Lock(java.util.concurrent.locks.Lock) TimerInternals(org.apache.beam.runners.core.TimerInternals) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Timer(org.apache.beam.runners.core.construction.Timer) MutableObject(org.apache.beam.repackaged.core.org.apache.commons.lang3.mutable.MutableObject) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) Watermark(org.apache.flink.streaming.api.watermark.Watermark)

Example 12 with WindowingStrategy

use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.

the class GroupAlsoByWindowParDoFnFactoryTest method testJavaWindowingStrategyDeserialization.

@Test
public void testJavaWindowingStrategyDeserialization() throws Exception {
    WindowFn windowFn = FixedWindows.of(Duration.millis(17));
    WindowingStrategy windowingStrategy = WindowingStrategy.of(windowFn);
    assertThat(windowingStrategy.getWindowFn(), equalTo(windowFn));
}
Also used : WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) Test(org.junit.Test)

Example 13 with WindowingStrategy

use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.

the class RehydratedComponentsTest method testWindowingStrategy.

@Test
public void testWindowingStrategy() throws Exception {
    SdkComponents sdkComponents = SdkComponents.create();
    sdkComponents.registerEnvironment(Environments.createDockerEnvironment("java"));
    WindowingStrategy windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(1))).withAllowedLateness(Duration.standardSeconds(4));
    String id = sdkComponents.registerWindowingStrategy(windowingStrategy);
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(sdkComponents.toComponents());
    WindowingStrategy<?, ?> rehydratedStrategy = rehydratedComponents.getWindowingStrategy(id);
    assertThat(rehydratedStrategy, equalTo((WindowingStrategy) windowingStrategy.withEnvironmentId(sdkComponents.getOnlyEnvironmentId()).fixDefaults()));
    assertThat(rehydratedComponents.getWindowingStrategy(id), theInstance((WindowingStrategy) rehydratedStrategy));
}
Also used : WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) Test(org.junit.Test)

Example 14 with WindowingStrategy

use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.

the class WindowUtils method getWindowStrategy.

/**
 * Get {@link WindowingStrategy} of given collection id from {@link RunnerApi.Components}.
 */
public static WindowingStrategy<?, BoundedWindow> getWindowStrategy(String collectionId, RunnerApi.Components components) {
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components);
    RunnerApi.WindowingStrategy windowingStrategyProto = components.getWindowingStrategiesOrThrow(components.getPcollectionsOrThrow(collectionId).getWindowingStrategyId());
    WindowingStrategy<?, ?> windowingStrategy;
    try {
        windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
    } catch (Exception e) {
        throw new IllegalStateException(String.format("Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), e);
    }
    @SuppressWarnings("unchecked") WindowingStrategy<?, BoundedWindow> ret = (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
    return ret;
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) IOException(java.io.IOException) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy)

Example 15 with WindowingStrategy

use of org.apache.beam.sdk.values.WindowingStrategy in project beam by apache.

the class NodesTest method testFetchReadySideInputsAndFilterBlockedStreamingSideInputsNode.

@Test
public void testFetchReadySideInputsAndFilterBlockedStreamingSideInputsNode() {
    WindowingStrategy windowingStrategy = WindowingStrategy.globalDefault();
    Map<PCollectionView<?>, RunnerApi.FunctionSpec> pcollectionViewsToWindowMappingFns = ImmutableMap.of(mock(PCollectionView.class), FunctionSpec.newBuilder().setUrn("beam:test:urn:1.0").build());
    NameContext nameContext = NameContextsForTests.nameContextForTest();
    assertSame(FetchAndFilterStreamingSideInputsNode.create(windowingStrategy, pcollectionViewsToWindowMappingFns, nameContext).getWindowingStrategy(), windowingStrategy);
    assertSame(FetchAndFilterStreamingSideInputsNode.create(windowingStrategy, pcollectionViewsToWindowMappingFns, nameContext).getPCollectionViewsToWindowMappingFns(), pcollectionViewsToWindowMappingFns);
    assertSame(FetchAndFilterStreamingSideInputsNode.create(windowingStrategy, pcollectionViewsToWindowMappingFns, nameContext).getNameContext(), nameContext);
}
Also used : DataflowPortabilityPCollectionView(org.apache.beam.runners.dataflow.worker.DataflowPortabilityPCollectionView) PCollectionView(org.apache.beam.sdk.values.PCollectionView) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) NameContext(org.apache.beam.runners.dataflow.worker.counters.NameContext) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) Test(org.junit.Test)

Aggregations

WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)36 WindowedValue (org.apache.beam.sdk.util.WindowedValue)25 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)21 KV (org.apache.beam.sdk.values.KV)19 KvCoder (org.apache.beam.sdk.coders.KvCoder)17 Coder (org.apache.beam.sdk.coders.Coder)16 List (java.util.List)15 TupleTag (org.apache.beam.sdk.values.TupleTag)14 Instant (org.joda.time.Instant)13 Test (org.junit.Test)13 PCollection (org.apache.beam.sdk.values.PCollection)11 ArrayList (java.util.ArrayList)10 HashMap (java.util.HashMap)9 Map (java.util.Map)9 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)9 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)9 Duration (org.joda.time.Duration)9 IOException (java.io.IOException)8 Collectors (java.util.stream.Collectors)8 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)8