Search in sources :

Example 56 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class StreamingSideInputDoFnRunnerTest method testMultipleWindowsNotReady.

@Test
public void testMultipleWindowsNotReady() throws Exception {
    PCollectionView<String> view = createView();
    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.<Windmill.GlobalDataId>asList());
    when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN))).thenReturn(false);
    ListOutputManager outputManager = new ListOutputManager();
    List<PCollectionView<String>> views = Arrays.asList(view);
    StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(SlidingWindows.of(Duration.millis(10)).every(Duration.millis(10)), outputManager, views, sideInputFetcher);
    IntervalWindow window1 = new IntervalWindow(new Instant(0), new Instant(10));
    IntervalWindow window2 = new IntervalWindow(new Instant(-5), new Instant(5));
    long timestamp = 1L;
    WindowedValue<String> elem = WindowedValue.of("e", new Instant(timestamp), Arrays.asList(window1, window2), PaneInfo.NO_FIRING);
    runner.startBundle();
    runner.processElement(elem);
    runner.finishBundle();
    assertTrue(outputManager.getOutput(mainOutputTag).isEmpty());
    // Verify that we added the element to an appropriate tag list, and that we buffered the element
    // in both windows separately
    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState = state.state(StateNamespaces.global(), StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));
    Map<IntervalWindow, Set<GlobalDataRequest>> blockedMap = blockedMapState.read();
    assertThat(blockedMap.get(window1), equalTo(Collections.singleton(Windmill.GlobalDataRequest.newBuilder().setDataId(Windmill.GlobalDataId.newBuilder().setTag(view.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window1))).build()).setExistenceWatermarkDeadline(9000).build())));
    assertThat(blockedMap.get(window2), equalTo(Collections.singleton(Windmill.GlobalDataRequest.newBuilder().setDataId(Windmill.GlobalDataId.newBuilder().setTag(view.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window1))).build()).setExistenceWatermarkDeadline(9000).build())));
    assertThat(sideInputFetcher.elementBag(window1).read(), contains(Iterables.get(elem.explodeWindows(), 0)));
    assertThat(sideInputFetcher.elementBag(window2).read(), contains(Iterables.get(elem.explodeWindows(), 1)));
    assertEquals(sideInputFetcher.watermarkHold(window1).read(), new Instant(timestamp));
    assertEquals(sideInputFetcher.watermarkHold(window2).read(), new Instant(timestamp));
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) Instant(org.joda.time.Instant) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PCollectionView(org.apache.beam.sdk.values.PCollectionView) GlobalDataRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 57 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class StreamingDataflowWorkerTest method buildSessionInput.

private Windmill.GetWorkResponse buildSessionInput(int workToken, long inputWatermark, long outputWatermark, List<Long> inputs, List<Timer> timers) throws Exception {
    // Windmill.GetWorkResponse.Builder builder = Windmill.GetWorkResponse.newBuilder();
    Windmill.WorkItem.Builder builder = Windmill.WorkItem.newBuilder();
    builder.setKey(DEFAULT_KEY_BYTES);
    builder.setShardingKey(DEFAULT_SHARDING_KEY);
    builder.setCacheToken(1);
    builder.setWorkToken(workToken);
    builder.setOutputDataWatermark(outputWatermark * 1000);
    if (!inputs.isEmpty()) {
        InputMessageBundle.Builder messageBuilder = Windmill.InputMessageBundle.newBuilder().setSourceComputationId(DEFAULT_SOURCE_COMPUTATION_ID);
        for (Long input : inputs) {
            messageBuilder.addMessages(Windmill.Message.newBuilder().setTimestamp(input).setData(ByteString.copyFromUtf8(dataStringForIndex(input))).setMetadata(addPaneTag(PaneInfo.NO_FIRING, intervalWindowBytes(new IntervalWindow(new Instant(input), new Instant(input).plus(Duration.millis(10)))))));
        }
        builder.addMessageBundles(messageBuilder);
    }
    if (!timers.isEmpty()) {
        builder.setTimers(Windmill.TimerBundle.newBuilder().addAllTimers(timers));
    }
    return Windmill.GetWorkResponse.newBuilder().addWork(Windmill.ComputationWorkItems.newBuilder().setComputationId(DEFAULT_COMPUTATION_ID).setInputDataWatermark(inputWatermark * 1000).addWork(builder)).build();
}
Also used : InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Instant(org.joda.time.Instant) AtomicLong(java.util.concurrent.atomic.AtomicLong) DataflowCounterUpdateExtractor.splitIntToLong(org.apache.beam.runners.dataflow.worker.counters.DataflowCounterUpdateExtractor.splitIntToLong) UnsignedLong(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.primitives.UnsignedLong) WorkItem(com.google.api.services.dataflow.model.WorkItem) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow)

Example 58 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testSlidingWindowsAndLateData.

@Test
public void testSlidingWindowsAndLateData() throws Exception {
    MetricsContainerImpl container = new MetricsContainerImpl("step");
    MetricsEnvironment.setCurrentContainer(container);
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    WindowingStrategy<? super String, IntervalWindow> windowingStrategy = WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))).withTimestampCombiner(TimestampCombiner.EARLIEST);
    GroupAlsoByWindowFn<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> fn = StreamingGroupAlsoByWindowsDoFns.createForIterable(windowingStrategy, new StepContextStateInternalsFactory<String>(stepContext), StringUtf8Coder.of());
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, windowingStrategy, fn);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(15));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(5), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(2), valueCoder, "v0");
    addElement(messageBundle, Arrays.asList(window(0, 20), window(10, 30)), new Instant(15), valueCoder, "v2");
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
    addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
    addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(30));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(3));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), emptyIterable()), equalTo(window(-10, 10).maxTimestamp()), equalTo(window(-10, 10))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(new Instant(2)), equalTo(window(0, 20))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v2")), equalTo(new Instant(15)), equalTo(window(10, 30)))));
    long droppedValues = container.getCounter(MetricName.named(LateDataDroppingDoFnRunner.class, LateDataDroppingDoFnRunner.DROPPED_DUE_TO_LATENESS)).getCumulative().longValue();
    assertThat(droppedValues, equalTo(2L));
}
Also used : LateDataDroppingDoFnRunner(org.apache.beam.runners.core.LateDataDroppingDoFnRunner) Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 59 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class StreamingKeyedWorkItemSideInputDoFnRunnerTest method testStartBundle.

@Test
public void testStartBundle() throws Exception {
    ListOutputManager outputManager = new ListOutputManager();
    StreamingKeyedWorkItemSideInputDoFnRunner<String, Integer, KV<String, Integer>, IntervalWindow> runner = createRunner(outputManager);
    runner.keyValue().write("a");
    Set<IntervalWindow> readyWindows = ImmutableSet.of(window(10, 20));
    when(sideInputFetcher.getReadyWindows()).thenReturn(readyWindows);
    when(sideInputFetcher.prefetchElements(readyWindows)).thenReturn(ImmutableList.of(elemsBag));
    when(sideInputFetcher.prefetchTimers(readyWindows)).thenReturn(ImmutableList.of(timersBag));
    when(elemsBag.read()).thenReturn(ImmutableList.of(createDatum(13, 13L), createDatum(18, 18L)));
    when(timersBag.read()).thenReturn(ImmutableList.of(timerData(window(10, 20), new Instant(19), Timer.Type.WATERMARK)));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(20));
    runner.startBundle();
    List<WindowedValue<KV<String, Integer>>> result = outputManager.getOutput(mainOutputTag);
    assertEquals(1, result.size());
    WindowedValue<KV<String, Integer>> item0 = result.get(0);
    assertEquals("a", item0.getValue().getKey());
    assertEquals(31, item0.getValue().getValue().intValue());
}
Also used : WindowedValue(org.apache.beam.sdk.util.WindowedValue) Instant(org.joda.time.Instant) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 60 with IntervalWindow

use of org.apache.beam.sdk.transforms.windowing.IntervalWindow in project beam by apache.

the class StreamingKeyedWorkItemSideInputDoFnRunnerTest method testInvokeProcessElement.

@Test
public void testInvokeProcessElement() throws Exception {
    when(sideInputFetcher.storeIfBlocked(Matchers.<WindowedValue<Integer>>any())).thenReturn(false, true, false).thenThrow(new RuntimeException("Does not expect more calls"));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(15L));
    ListOutputManager outputManager = new ListOutputManager();
    StreamingKeyedWorkItemSideInputDoFnRunner<String, Integer, KV<String, Integer>, IntervalWindow> runner = createRunner(outputManager);
    KeyedWorkItem<String, Integer> elemsWorkItem = KeyedWorkItems.elementsWorkItem("a", ImmutableList.of(createDatum(13, 13L), // side inputs non-ready element
    createDatum(16, 16L), createDatum(18, 18L)));
    runner.processElement(new ValueInEmptyWindows<>(elemsWorkItem));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(20));
    runner.processElement(new ValueInEmptyWindows<>(KeyedWorkItems.<String, Integer>timersWorkItem("a", ImmutableList.of(timerData(window(10, 20), new Instant(19), Timer.Type.WATERMARK)))));
    List<WindowedValue<KV<String, Integer>>> result = outputManager.getOutput(mainOutputTag);
    assertEquals(1, result.size());
    WindowedValue<KV<String, Integer>> item0 = result.get(0);
    assertEquals("a", item0.getValue().getKey());
    assertEquals(31, item0.getValue().getValue().intValue());
    assertEquals("a", runner.keyValue().read());
}
Also used : Instant(org.joda.time.Instant) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) WindowedValue(org.apache.beam.sdk.util.WindowedValue) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Aggregations

IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)238 Test (org.junit.Test)214 Instant (org.joda.time.Instant)213 WindowedValue (org.apache.beam.sdk.util.WindowedValue)67 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)56 KV (org.apache.beam.sdk.values.KV)56 Duration (org.joda.time.Duration)33 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)32 WindowMatchers.isSingleWindowedValue (org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue)20 WindowMatchers.isWindowedValue (org.apache.beam.runners.core.WindowMatchers.isWindowedValue)20 ArrayList (java.util.ArrayList)16 TupleTag (org.apache.beam.sdk.values.TupleTag)16 HashMap (java.util.HashMap)14 PCollectionView (org.apache.beam.sdk.values.PCollectionView)14 Category (org.junit.experimental.categories.Category)13 MetricsContainerImpl (org.apache.beam.runners.core.metrics.MetricsContainerImpl)12 FixedWindows (org.apache.beam.sdk.transforms.windowing.FixedWindows)12 ByteBuffer (java.nio.ByteBuffer)11 Map (java.util.Map)11 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)11