Search in sources :

Example 6 with ListOutputManager

use of org.apache.beam.runners.dataflow.worker.util.ListOutputManager in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testFixedWindows.

@Test
public void testFixedWindows() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(0));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(1), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(2), valueCoder, "v2");
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v0");
    addElement(messageBundle, Arrays.asList(window(10, 20)), new Instant(13), valueCoder, "v3");
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    addTimer(workItem2, window(0, 10), new Instant(9), Timer.Type.WATERMARK);
    addTimer(workItem2, window(10, 20), new Instant(19), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(20));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(2));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(window(0, 10).maxTimestamp()), equalTo(window(0, 10))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v3")), equalTo(window(10, 20).maxTimestamp()), equalTo(window(10, 20)))));
}
Also used : Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Example 7 with ListOutputManager

use of org.apache.beam.runners.dataflow.worker.util.ListOutputManager in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testSessions.

@Test
public void testSessions() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))).withTimestampCombiner(TimestampCombiner.EARLIEST));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(0));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(5, 15)), new Instant(5), valueCoder, "v2");
    addElement(messageBundle, Arrays.asList(window(15, 25)), new Instant(15), valueCoder, "v3");
    addElement(messageBundle, Arrays.asList(window(3, 13)), new Instant(3), valueCoder, "v0");
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    // Note that the WATERMARK timer for Instant(9) will have been deleted by
    // ReduceFnRunner when window(0, 10) was merged away.
    addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
    addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(25));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(2));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(new Instant(0)), equalTo(window(0, 15))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v3")), equalTo(new Instant(15)), equalTo(window(15, 25)))));
}
Also used : Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Example 8 with ListOutputManager

use of org.apache.beam.runners.dataflow.worker.util.ListOutputManager in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testSessionsCombine.

@Test
public void testSessionsCombine() throws Exception {
    TupleTag<KV<String, Long>> outputTag = new TupleTag<>();
    CombineFn<Long, ?, Long> combineFn = new SumLongs();
    CoderRegistry registry = CoderRegistry.createDefault();
    AppliedCombineFn<String, Long, ?, Long> appliedCombineFn = AppliedCombineFn.withInputCoder(combineFn, registry, KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of()));
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, Long>, KV<String, Long>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(Sessions.withGapDuration(Duration.millis(10))), appliedCombineFn);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(0));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<Long> valueCoder = BigEndianLongCoder.of();
    addElement(messageBundle, Arrays.asList(window(0, 10)), new Instant(0), valueCoder, 1L);
    addElement(messageBundle, Arrays.asList(window(5, 15)), new Instant(5), valueCoder, 2L);
    addElement(messageBundle, Arrays.asList(window(15, 25)), new Instant(15), valueCoder, 3L);
    addElement(messageBundle, Arrays.asList(window(3, 13)), new Instant(3), valueCoder, 4L);
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    // Note that the WATERMARK timer for Instant(9) will have been deleted by
    // ReduceFnRunner when window(0, 10) was merged away.
    addTimer(workItem2, window(0, 15), new Instant(14), Timer.Type.WATERMARK);
    addTimer(workItem2, window(15, 25), new Instant(24), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(25));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Long>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(2));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), equalTo(7L)), equalTo(window(0, 15).maxTimestamp()), equalTo(window(0, 15))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), equalTo(3L)), equalTo(window(15, 25).maxTimestamp()), equalTo(window(15, 25)))));
}
Also used : Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) CoderRegistry(org.apache.beam.sdk.coders.CoderRegistry) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Example 9 with ListOutputManager

use of org.apache.beam.runners.dataflow.worker.util.ListOutputManager in project beam by apache.

the class StreamingKeyedWorkItemSideInputDoFnRunnerTest method testStartBundle.

@Test
public void testStartBundle() throws Exception {
    ListOutputManager outputManager = new ListOutputManager();
    StreamingKeyedWorkItemSideInputDoFnRunner<String, Integer, KV<String, Integer>, IntervalWindow> runner = createRunner(outputManager);
    runner.keyValue().write("a");
    Set<IntervalWindow> readyWindows = ImmutableSet.of(window(10, 20));
    when(sideInputFetcher.getReadyWindows()).thenReturn(readyWindows);
    when(sideInputFetcher.prefetchElements(readyWindows)).thenReturn(ImmutableList.of(elemsBag));
    when(sideInputFetcher.prefetchTimers(readyWindows)).thenReturn(ImmutableList.of(timersBag));
    when(elemsBag.read()).thenReturn(ImmutableList.of(createDatum(13, 13L), createDatum(18, 18L)));
    when(timersBag.read()).thenReturn(ImmutableList.of(timerData(window(10, 20), new Instant(19), Timer.Type.WATERMARK)));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(20));
    runner.startBundle();
    List<WindowedValue<KV<String, Integer>>> result = outputManager.getOutput(mainOutputTag);
    assertEquals(1, result.size());
    WindowedValue<KV<String, Integer>> item0 = result.get(0);
    assertEquals("a", item0.getValue().getKey());
    assertEquals(31, item0.getValue().getValue().intValue());
}
Also used : WindowedValue(org.apache.beam.sdk.util.WindowedValue) Instant(org.joda.time.Instant) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 10 with ListOutputManager

use of org.apache.beam.runners.dataflow.worker.util.ListOutputManager in project beam by apache.

the class StreamingKeyedWorkItemSideInputDoFnRunnerTest method testInvokeProcessElement.

@Test
public void testInvokeProcessElement() throws Exception {
    when(sideInputFetcher.storeIfBlocked(Matchers.<WindowedValue<Integer>>any())).thenReturn(false, true, false).thenThrow(new RuntimeException("Does not expect more calls"));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(15L));
    ListOutputManager outputManager = new ListOutputManager();
    StreamingKeyedWorkItemSideInputDoFnRunner<String, Integer, KV<String, Integer>, IntervalWindow> runner = createRunner(outputManager);
    KeyedWorkItem<String, Integer> elemsWorkItem = KeyedWorkItems.elementsWorkItem("a", ImmutableList.of(createDatum(13, 13L), // side inputs non-ready element
    createDatum(16, 16L), createDatum(18, 18L)));
    runner.processElement(new ValueInEmptyWindows<>(elemsWorkItem));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(20));
    runner.processElement(new ValueInEmptyWindows<>(KeyedWorkItems.<String, Integer>timersWorkItem("a", ImmutableList.of(timerData(window(10, 20), new Instant(19), Timer.Type.WATERMARK)))));
    List<WindowedValue<KV<String, Integer>>> result = outputManager.getOutput(mainOutputTag);
    assertEquals(1, result.size());
    WindowedValue<KV<String, Integer>> item0 = result.get(0);
    assertEquals("a", item0.getValue().getKey());
    assertEquals(31, item0.getValue().getValue().intValue());
    assertEquals("a", runner.keyValue().read());
}
Also used : Instant(org.joda.time.Instant) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) WindowedValue(org.apache.beam.sdk.util.WindowedValue) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Aggregations

ListOutputManager (org.apache.beam.runners.dataflow.worker.util.ListOutputManager)15 Test (org.junit.Test)15 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)13 Instant (org.joda.time.Instant)12 KV (org.apache.beam.sdk.values.KV)10 KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)8 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)8 WindowedValue (org.apache.beam.sdk.util.WindowedValue)8 TupleTag (org.apache.beam.sdk.values.TupleTag)8 InputMessageBundle (org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle)6 WorkItem (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem)6 Windmill (org.apache.beam.runners.dataflow.worker.windmill.Windmill)5 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)5 PCollectionView (org.apache.beam.sdk.values.PCollectionView)5 HashMap (java.util.HashMap)4 Map (java.util.Map)4 GlobalDataRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest)4 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)4 HashSet (java.util.HashSet)3 Set (java.util.Set)3