Search in sources :

Example 1 with GlobalDataRequest

use of org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest in project beam by apache.

the class StreamingSideInputDoFnRunnerTest method testSideInputNotReady.

@Test
public void testSideInputNotReady() throws Exception {
    PCollectionView<String> view = createView();
    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.<Windmill.GlobalDataId>asList());
    when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN))).thenReturn(false);
    ListOutputManager outputManager = new ListOutputManager();
    List<PCollectionView<String>> views = Arrays.asList(view);
    StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(outputManager, views, sideInputFetcher);
    runner.startBundle();
    runner.processElement(createDatum("e", 0));
    runner.finishBundle();
    assertTrue(outputManager.getOutput(mainOutputTag).isEmpty());
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
    // Verify that we added the element to an appropriate tag list, and that we buffered the element
    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState = state.state(StateNamespaces.global(), StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));
    assertEquals(blockedMapState.read(), Collections.singletonMap(window, Collections.singleton(Windmill.GlobalDataRequest.newBuilder().setDataId(Windmill.GlobalDataId.newBuilder().setTag(view.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window))).build()).setExistenceWatermarkDeadline(9000).build())));
    assertThat(sideInputFetcher.elementBag(createWindow(0)).read(), Matchers.contains(createDatum("e", 0)));
    assertEquals(sideInputFetcher.watermarkHold(createWindow(0)).read(), new Instant(0));
}
Also used : Instant(org.joda.time.Instant) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PCollectionView(org.apache.beam.sdk.values.PCollectionView) GlobalDataRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 2 with GlobalDataRequest

use of org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest in project beam by apache.

the class StreamingSideInputDoFnRunnerTest method testMultipleWindowsNotReady.

@Test
public void testMultipleWindowsNotReady() throws Exception {
    PCollectionView<String> view = createView();
    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.<Windmill.GlobalDataId>asList());
    when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN))).thenReturn(false);
    ListOutputManager outputManager = new ListOutputManager();
    List<PCollectionView<String>> views = Arrays.asList(view);
    StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(SlidingWindows.of(Duration.millis(10)).every(Duration.millis(10)), outputManager, views, sideInputFetcher);
    IntervalWindow window1 = new IntervalWindow(new Instant(0), new Instant(10));
    IntervalWindow window2 = new IntervalWindow(new Instant(-5), new Instant(5));
    long timestamp = 1L;
    WindowedValue<String> elem = WindowedValue.of("e", new Instant(timestamp), Arrays.asList(window1, window2), PaneInfo.NO_FIRING);
    runner.startBundle();
    runner.processElement(elem);
    runner.finishBundle();
    assertTrue(outputManager.getOutput(mainOutputTag).isEmpty());
    // Verify that we added the element to an appropriate tag list, and that we buffered the element
    // in both windows separately
    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState = state.state(StateNamespaces.global(), StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));
    Map<IntervalWindow, Set<GlobalDataRequest>> blockedMap = blockedMapState.read();
    assertThat(blockedMap.get(window1), equalTo(Collections.singleton(Windmill.GlobalDataRequest.newBuilder().setDataId(Windmill.GlobalDataId.newBuilder().setTag(view.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window1))).build()).setExistenceWatermarkDeadline(9000).build())));
    assertThat(blockedMap.get(window2), equalTo(Collections.singleton(Windmill.GlobalDataRequest.newBuilder().setDataId(Windmill.GlobalDataId.newBuilder().setTag(view.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window1))).build()).setExistenceWatermarkDeadline(9000).build())));
    assertThat(sideInputFetcher.elementBag(window1).read(), contains(Iterables.get(elem.explodeWindows(), 0)));
    assertThat(sideInputFetcher.elementBag(window2).read(), contains(Iterables.get(elem.explodeWindows(), 1)));
    assertEquals(sideInputFetcher.watermarkHold(window1).read(), new Instant(timestamp));
    assertEquals(sideInputFetcher.watermarkHold(window2).read(), new Instant(timestamp));
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) Instant(org.joda.time.Instant) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PCollectionView(org.apache.beam.sdk.values.PCollectionView) GlobalDataRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Map(java.util.Map) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 3 with GlobalDataRequest

use of org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest in project beam by apache.

the class StreamingSideInputDoFnRunnerTest method testMultipleSideInputs.

@Test
public void testMultipleSideInputs() throws Exception {
    PCollectionView<String> view1 = createView();
    PCollectionView<String> view2 = createView();
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
    Windmill.GlobalDataId id = Windmill.GlobalDataId.newBuilder().setTag(view1.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window))).build();
    Set<Windmill.GlobalDataRequest> requestSet = new HashSet<>();
    requestSet.add(Windmill.GlobalDataRequest.newBuilder().setDataId(id).build());
    Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
    blockedMap.put(window, requestSet);
    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState = state.state(StateNamespaces.global(), StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));
    blockedMapState.write(blockedMap);
    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
    when(stepContext.issueSideInputFetch(any(PCollectionView.class), any(BoundedWindow.class), any(SideInputState.class))).thenReturn(true);
    when(execContext.getSideInputReaderForViews(Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
    when(mockSideInputReader.contains(eq(view1))).thenReturn(true);
    when(mockSideInputReader.contains(eq(view2))).thenReturn(true);
    when(mockSideInputReader.get(eq(view1), any(BoundedWindow.class))).thenReturn("data1");
    when(mockSideInputReader.get(eq(view2), any(BoundedWindow.class))).thenReturn("data2");
    ListOutputManager outputManager = new ListOutputManager();
    List<PCollectionView<String>> views = Arrays.asList(view1, view2);
    StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(outputManager, views, sideInputFetcher);
    sideInputFetcher.watermarkHold(createWindow(0)).add(new Instant(0));
    sideInputFetcher.elementBag(createWindow(0)).add(createDatum("e1", 0));
    runner.startBundle();
    runner.processElement(createDatum("e2", 2));
    runner.finishBundle();
    assertThat(outputManager.getOutput(mainOutputTag), contains(createDatum("e1:data1:data2", 0), createDatum("e2:data1:data2", 2)));
    assertThat(blockedMapState.read(), Matchers.nullValue());
    assertThat(sideInputFetcher.watermarkHold(createWindow(0)).read(), Matchers.nullValue());
    assertThat(sideInputFetcher.elementBag(createWindow(0)).read(), Matchers.emptyIterable());
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) Instant(org.joda.time.Instant) SideInputState(org.apache.beam.runners.dataflow.worker.StateFetcher.SideInputState) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PCollectionView(org.apache.beam.sdk.values.PCollectionView) GlobalDataRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Map(java.util.Map) HashMap(java.util.HashMap) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with GlobalDataRequest

use of org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest in project beam by apache.

the class StreamingSideInputDoFnRunnerTest method testSideInputNotification.

@Test
public void testSideInputNotification() throws Exception {
    PCollectionView<String> view = createView();
    IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
    Windmill.GlobalDataId id = Windmill.GlobalDataId.newBuilder().setTag(view.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window))).build();
    Set<Windmill.GlobalDataRequest> requestSet = new HashSet<>();
    requestSet.add(Windmill.GlobalDataRequest.newBuilder().setDataId(id).build());
    Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
    blockedMap.put(window, requestSet);
    ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState = state.state(StateNamespaces.global(), StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));
    blockedMapState.write(blockedMap);
    ListOutputManager outputManager = new ListOutputManager();
    List<PCollectionView<String>> views = Arrays.asList(view);
    StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
    StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(outputManager, views, sideInputFetcher);
    sideInputFetcher.watermarkHold(createWindow(0)).add(new Instant(0));
    sideInputFetcher.elementBag(createWindow(0)).add(createDatum("e", 0));
    when(stepContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
    when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN))).thenReturn(false);
    when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.KNOWN_READY))).thenReturn(true);
    when(execContext.getSideInputReaderForViews(Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
    when(mockSideInputReader.contains(eq(view))).thenReturn(true);
    when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
    runner.startBundle();
    runner.finishBundle();
    assertThat(outputManager.getOutput(mainOutputTag), contains(createDatum("e:data", 0)));
    assertThat(blockedMapState.read(), Matchers.nullValue());
    assertThat(sideInputFetcher.watermarkHold(createWindow(0)).read(), Matchers.nullValue());
    assertThat(sideInputFetcher.elementBag(createWindow(0)).read(), Matchers.emptyIterable());
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) Instant(org.joda.time.Instant) ListOutputManager(org.apache.beam.runners.dataflow.worker.util.ListOutputManager) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PCollectionView(org.apache.beam.sdk.values.PCollectionView) GlobalDataRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest) Windmill(org.apache.beam.runners.dataflow.worker.windmill.Windmill) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Map(java.util.Map) HashMap(java.util.HashMap) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 5 with GlobalDataRequest

use of org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest in project beam by apache.

the class GrpcWindmillServerTest method testStreamingGetData.

@Test
@SuppressWarnings("FutureReturnValueIgnored")
public void testStreamingGetData() throws Exception {
    // This server responds to GetDataRequests with responses that mirror the requests.
    serviceRegistry.addService(new CloudWindmillServiceV1Alpha1ImplBase() {

        @Override
        public StreamObserver<StreamingGetDataRequest> getDataStream(StreamObserver<StreamingGetDataResponse> responseObserver) {
            return new StreamObserver<StreamingGetDataRequest>() {

                boolean sawHeader = false;

                HashSet<Long> seenIds = new HashSet<>();

                ResponseErrorInjector injector = new ResponseErrorInjector(responseObserver);

                StreamingGetDataResponse.Builder responseBuilder = StreamingGetDataResponse.newBuilder();

                @Override
                public void onNext(StreamingGetDataRequest chunk) {
                    maybeInjectError(responseObserver);
                    try {
                        if (!sawHeader) {
                            LOG.info("Received header");
                            errorCollector.checkThat(chunk.getHeader(), Matchers.equalTo(JobHeader.newBuilder().setJobId("job").setProjectId("project").setWorkerId("worker").build()));
                            sawHeader = true;
                        } else {
                            LOG.info("Received get data of {} global data, {} data requests", chunk.getGlobalDataRequestCount(), chunk.getStateRequestCount());
                            errorCollector.checkThat(chunk.getSerializedSize(), Matchers.lessThanOrEqualTo(STREAM_CHUNK_SIZE));
                            int i = 0;
                            for (GlobalDataRequest request : chunk.getGlobalDataRequestList()) {
                                long requestId = chunk.getRequestId(i++);
                                errorCollector.checkThat(seenIds.add(requestId), Matchers.is(true));
                                sendResponse(requestId, processGlobalDataRequest(request));
                            }
                            for (ComputationGetDataRequest request : chunk.getStateRequestList()) {
                                long requestId = chunk.getRequestId(i++);
                                errorCollector.checkThat(seenIds.add(requestId), Matchers.is(true));
                                sendResponse(requestId, processStateRequest(request));
                            }
                            flushResponse();
                        }
                    } catch (Exception e) {
                        errorCollector.addError(e);
                    }
                }

                @Override
                public void onError(Throwable throwable) {
                }

                @Override
                public void onCompleted() {
                    injector.cancel();
                    responseObserver.onCompleted();
                }

                private ByteString processGlobalDataRequest(GlobalDataRequest request) {
                    errorCollector.checkThat(request.getStateFamily(), Matchers.is("family"));
                    return GlobalData.newBuilder().setDataId(request.getDataId()).setStateFamily("family").setData(ByteString.copyFromUtf8(request.getDataId().getTag())).build().toByteString();
                }

                private ByteString processStateRequest(ComputationGetDataRequest compRequest) {
                    errorCollector.checkThat(compRequest.getRequestsCount(), Matchers.is(1));
                    errorCollector.checkThat(compRequest.getComputationId(), Matchers.is("computation"));
                    KeyedGetDataRequest request = compRequest.getRequests(0);
                    KeyedGetDataResponse response = makeGetDataResponse(request.getValuesToFetch(0).getTag().toStringUtf8());
                    return response.toByteString();
                }

                private void sendResponse(long id, ByteString serializedResponse) {
                    if (ThreadLocalRandom.current().nextInt(4) == 0) {
                        sendChunkedResponse(id, serializedResponse);
                    } else {
                        responseBuilder.addRequestId(id).addSerializedResponse(serializedResponse);
                        if (responseBuilder.getRequestIdCount() > 10) {
                            flushResponse();
                        }
                    }
                }

                private void sendChunkedResponse(long id, ByteString serializedResponse) {
                    LOG.info("Sending response with {} chunks", (serializedResponse.size() / 10) + 1);
                    for (int i = 0; i < serializedResponse.size(); i += 10) {
                        int end = Math.min(serializedResponse.size(), i + 10);
                        try {
                            responseObserver.onNext(StreamingGetDataResponse.newBuilder().addRequestId(id).addSerializedResponse(serializedResponse.substring(i, end)).setRemainingBytesForResponse(serializedResponse.size() - end).build());
                        } catch (IllegalStateException e) {
                        // Stream is already closed.
                        }
                    }
                }

                private void flushResponse() {
                    if (responseBuilder.getRequestIdCount() > 0) {
                        LOG.info("Sending batched response of {} ids", responseBuilder.getRequestIdCount());
                        try {
                            responseObserver.onNext(responseBuilder.build());
                        } catch (IllegalStateException e) {
                        // Stream is already closed.
                        }
                        responseBuilder.clear();
                    }
                }
            };
        }
    });
    GetDataStream stream = client.getDataStream();
    // Make requests of varying sizes to test chunking, and verify the responses.
    ExecutorService executor = Executors.newFixedThreadPool(50);
    final CountDownLatch done = new CountDownLatch(200);
    for (int i = 0; i < 100; ++i) {
        final String key = "key" + i;
        final String s = i % 5 == 0 ? largeString(i) : "tag";
        executor.submit(() -> {
            errorCollector.checkThat(stream.requestKeyedData("computation", makeGetDataRequest(key, s)), Matchers.equalTo(makeGetDataResponse(s)));
            done.countDown();
        });
        executor.execute(() -> {
            errorCollector.checkThat(stream.requestGlobalData(makeGlobalDataRequest(key)), Matchers.equalTo(makeGlobalDataResponse(key)));
            done.countDown();
        });
    }
    done.await();
    stream.close();
    assertTrue(stream.awaitTermination(60, TimeUnit.SECONDS));
    executor.shutdown();
}
Also used : StreamingGetDataResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.StreamingGetDataResponse) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) GetDataStream(org.apache.beam.runners.dataflow.worker.windmill.WindmillServerStub.GetDataStream) HashSet(java.util.HashSet) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) CloudWindmillServiceV1Alpha1ImplBase(org.apache.beam.runners.dataflow.worker.windmill.CloudWindmillServiceV1Alpha1Grpc.CloudWindmillServiceV1Alpha1ImplBase) StreamingGetDataRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.StreamingGetDataRequest) KeyedGetDataRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.KeyedGetDataRequest) CountDownLatch(java.util.concurrent.CountDownLatch) StatusRuntimeException(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.StatusRuntimeException) KeyedGetDataResponse(org.apache.beam.runners.dataflow.worker.windmill.Windmill.KeyedGetDataResponse) GlobalDataRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest) ExecutorService(java.util.concurrent.ExecutorService) ComputationGetDataRequest(org.apache.beam.runners.dataflow.worker.windmill.Windmill.ComputationGetDataRequest) Test(org.junit.Test)

Aggregations

GlobalDataRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest)5 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)5 Test (org.junit.Test)5 HashMap (java.util.HashMap)4 HashSet (java.util.HashSet)4 Map (java.util.Map)4 ListOutputManager (org.apache.beam.runners.dataflow.worker.util.ListOutputManager)4 Windmill (org.apache.beam.runners.dataflow.worker.windmill.Windmill)4 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)4 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)4 PCollectionView (org.apache.beam.sdk.values.PCollectionView)4 Instant (org.joda.time.Instant)4 Set (java.util.Set)3 CountDownLatch (java.util.concurrent.CountDownLatch)1 ExecutorService (java.util.concurrent.ExecutorService)1 SideInputState (org.apache.beam.runners.dataflow.worker.StateFetcher.SideInputState)1 CloudWindmillServiceV1Alpha1ImplBase (org.apache.beam.runners.dataflow.worker.windmill.CloudWindmillServiceV1Alpha1Grpc.CloudWindmillServiceV1Alpha1ImplBase)1 ComputationGetDataRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.ComputationGetDataRequest)1 KeyedGetDataRequest (org.apache.beam.runners.dataflow.worker.windmill.Windmill.KeyedGetDataRequest)1 KeyedGetDataResponse (org.apache.beam.runners.dataflow.worker.windmill.Windmill.KeyedGetDataResponse)1