Search in sources :

Example 11 with ListOutputManager

use of org.apache.beam.runners.dataflow.worker.util.ListOutputManager in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testEmpty.

@Test
public void testEmpty() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(FixedWindows.of(Duration.millis(10))));
    runner.startBundle();
    runner.finishBundle();
    List<?> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(0));
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) Test(org.junit.Test)

Example 12 with ListOutputManager

use of org.apache.beam.runners.dataflow.worker.util.ListOutputManager in project beam by apache.

the class StreamingGroupAlsoByWindowFnsTest method testSlidingWindows.

@Test
public void testSlidingWindows() throws Exception {
    TupleTag<KV<String, Iterable<String>>> outputTag = new TupleTag<>();
    ListOutputManager outputManager = new ListOutputManager();
    DoFnRunner<KeyedWorkItem<String, String>, KV<String, Iterable<String>>> runner = makeRunner(outputTag, outputManager, WindowingStrategy.of(SlidingWindows.of(Duration.millis(20)).every(Duration.millis(10))).withTimestampCombiner(TimestampCombiner.EARLIEST));
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(5));
    runner.startBundle();
    WorkItem.Builder workItem1 = WorkItem.newBuilder();
    workItem1.setKey(ByteString.copyFromUtf8(KEY));
    workItem1.setWorkToken(WORK_TOKEN);
    InputMessageBundle.Builder messageBundle = workItem1.addMessageBundlesBuilder();
    messageBundle.setSourceComputationId(SOURCE_COMPUTATION_ID);
    Coder<String> valueCoder = StringUtf8Coder.of();
    addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(5), valueCoder, "v1");
    addElement(messageBundle, Arrays.asList(window(-10, 10), window(0, 20)), new Instant(2), valueCoder, "v0");
    addElement(messageBundle, Arrays.asList(window(0, 20), window(10, 30)), new Instant(15), valueCoder, "v2");
    runner.processElement(createValue(workItem1, valueCoder));
    runner.finishBundle();
    runner.startBundle();
    WorkItem.Builder workItem2 = WorkItem.newBuilder();
    workItem2.setKey(ByteString.copyFromUtf8(KEY));
    workItem2.setWorkToken(WORK_TOKEN);
    addTimer(workItem2, window(-10, 10), new Instant(9), Timer.Type.WATERMARK);
    addTimer(workItem2, window(0, 20), new Instant(19), Timer.Type.WATERMARK);
    addTimer(workItem2, window(10, 30), new Instant(29), Timer.Type.WATERMARK);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(new Instant(30));
    runner.processElement(createValue(workItem2, valueCoder));
    runner.finishBundle();
    List<WindowedValue<KV<String, Iterable<String>>>> result = outputManager.getOutput(outputTag);
    assertThat(result.size(), equalTo(3));
    assertThat(result, containsInAnyOrder(WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1")), equalTo(new Instant(2)), equalTo(window(-10, 10))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v0", "v1", "v2")), equalTo(new Instant(2)), equalTo(window(0, 20))), WindowMatchers.isSingleWindowedValue(isKv(equalTo(KEY), containsInAnyOrder("v2")), equalTo(new Instant(15)), equalTo(window(10, 30)))));
}
Also used : Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) KV(org.apache.beam.sdk.values.KV) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) WorkItem(org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem) WindowedValue(org.apache.beam.sdk.util.WindowedValue) InputMessageBundle(org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle) Test(org.junit.Test)

Example 13 with ListOutputManager

use of org.apache.beam.runners.dataflow.worker.util.ListOutputManager in project beam by apache.

the class StreamingSideInputDoFnRunnerTest method testMultipleSideInputs.

@Test
public void testMultipleSideInputs() throws Exception {
    PCollectionView<String> view1 = createView();
    PCollectionView<String> view2 = createView();
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
    Windmill.GlobalDataId id = Windmill.GlobalDataId.newBuilder().setTag(view1.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window))).build();
    Set<Windmill.GlobalDataRequest> requestSet = new HashSet<>();
    requestSet.add(Windmill.GlobalDataRequest.newBuilder().setDataId(id).build());
    Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
    blockedMap.put(window, requestSet);
    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState = state.state(StateNamespaces.global(), StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));
    blockedMapState.write(blockedMap);
    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
    when(stepContext.issueSideInputFetch(any(PCollectionView.class), any(BoundedWindow.class), any(SideInputState.class))).thenReturn(true);
    when(execContext.getSideInputReaderForViews(Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
    when(mockSideInputReader.contains(eq(view1))).thenReturn(true);
    when(mockSideInputReader.contains(eq(view2))).thenReturn(true);
    when(mockSideInputReader.get(eq(view1), any(BoundedWindow.class))).thenReturn("data1");
    when(mockSideInputReader.get(eq(view2), any(BoundedWindow.class))).thenReturn("data2");
    ListOutputManager outputManager = new ListOutputManager();
    List<PCollectionView<String>> views = Arrays.asList(view1, view2);
    StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(outputManager, views, sideInputFetcher);
    sideInputFetcher.watermarkHold(createWindow(0)).add(new Instant(0));
    sideInputFetcher.elementBag(createWindow(0)).add(createDatum("e1", 0));
    runner.startBundle();
    runner.processElement(createDatum("e2", 2));
    runner.finishBundle();
    assertThat(outputManager.getOutput(mainOutputTag), contains(createDatum("e1:data1:data2", 0), createDatum("e2:data1:data2", 2)));
    assertThat(blockedMapState.read(), Matchers.nullValue());
    assertThat(sideInputFetcher.watermarkHold(createWindow(0)).read(), Matchers.nullValue());
    assertThat(sideInputFetcher.elementBag(createWindow(0)).read(), Matchers.emptyIterable());
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) Instant(org.joda.time.Instant) SideInputState(org.apache.beam.runners.dataflow.worker.StateFetcher.SideInputState) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PCollectionView(org.apache.beam.sdk.values.PCollectionView) GlobalDataRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Map(java.util.Map) HashMap(java.util.HashMap) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 14 with ListOutputManager

use of org.apache.beam.runners.dataflow.worker.util.ListOutputManager in project beam by apache.

the class StreamingSideInputDoFnRunnerTest method testSideInputReady.

@Test
public void testSideInputReady() throws Exception {
    PCollectionView<String> view = createView();
    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.<Windmill.GlobalDataId>asList());
    when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN))).thenReturn(true);
    when(execContext.getSideInputReaderForViews(Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
    when(mockSideInputReader.contains(eq(view))).thenReturn(true);
    when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
    ListOutputManager outputManager = new ListOutputManager();
    List<PCollectionView<String>> views = Arrays.asList(view);
    StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(outputManager, views, sideInputFetcher);
    runner.startBundle();
    runner.processElement(createDatum("e", 0));
    runner.finishBundle();
    assertThat(outputManager.getOutput(mainOutputTag), contains(createDatum("e:data", 0)));
}
Also used : PCollectionView(org.apache.beam.sdk.values.PCollectionView) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 15 with ListOutputManager

use of org.apache.beam.runners.dataflow.worker.util.ListOutputManager in project beam by apache.

the class StreamingSideInputDoFnRunnerTest method testSideInputNotification.

@Test
public void testSideInputNotification() throws Exception {
    PCollectionView<String> view = createView();
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
    Windmill.GlobalDataId id = Windmill.GlobalDataId.newBuilder().setTag(view.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window))).build();
    Set<Windmill.GlobalDataRequest> requestSet = new HashSet<>();
    requestSet.add(Windmill.GlobalDataRequest.newBuilder().setDataId(id).build());
    Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
    blockedMap.put(window, requestSet);
    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState = state.state(StateNamespaces.global(), StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));
    blockedMapState.write(blockedMap);
    ListOutputManager outputManager = new ListOutputManager();
    List<PCollectionView<String>> views = Arrays.asList(view);
    StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(outputManager, views, sideInputFetcher);
    sideInputFetcher.watermarkHold(createWindow(0)).add(new Instant(0));
    sideInputFetcher.elementBag(createWindow(0)).add(createDatum("e", 0));
    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
    when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN))).thenReturn(false);
    when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.KNOWN_READY))).thenReturn(true);
    when(execContext.getSideInputReaderForViews(Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
    when(mockSideInputReader.contains(eq(view))).thenReturn(true);
    when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
    runner.startBundle();
    runner.finishBundle();
    assertThat(outputManager.getOutput(mainOutputTag), contains(createDatum("e:data", 0)));
    assertThat(blockedMapState.read(), Matchers.nullValue());
    assertThat(sideInputFetcher.watermarkHold(createWindow(0)).read(), Matchers.nullValue());
    assertThat(sideInputFetcher.elementBag(createWindow(0)).read(), Matchers.emptyIterable());
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) Instant(org.joda.time.Instant) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PCollectionView(org.apache.beam.sdk.values.PCollectionView) GlobalDataRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Map(java.util.Map) HashMap(java.util.HashMap) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

ListOutputManager (org.apache.beam.runners.dataflow.worker.util.ListOutputManager)15 Test (org.junit.Test)15 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)13 Instant (org.joda.time.Instant)12 KV (org.apache.beam.sdk.values.KV)10 KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)8 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)8 WindowedValue (org.apache.beam.sdk.util.WindowedValue)8 TupleTag (org.apache.beam.sdk.values.TupleTag)8 InputMessageBundle (org.apache.beam.runners.dataflow.worker.windmill.Windmill.InputMessageBundle)6 WorkItem (org.apache.beam.runners.dataflow.worker.windmill.Windmill.WorkItem)6 Windmill (org.apache.beam.runners.dataflow.worker.windmill.Windmill)5 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)5 PCollectionView (org.apache.beam.sdk.values.PCollectionView)5 HashMap (java.util.HashMap)4 Map (java.util.Map)4 GlobalDataRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest)4 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)4 HashSet (java.util.HashSet)3 Set (java.util.Set)3