use of org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest in project beam by apache.
the class StreamingSideInputDoFnRunnerTest method testSideInputNotReady.
@Test
public void testSideInputNotReady() throws Exception {
PCollectionView<String> view = createView();
when(stepContext.getSideInputNotifications()).thenReturn(Arrays.<Windmill.GlobalDataId>asList());
when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN))).thenReturn(false);
ListOutputManager outputManager = new ListOutputManager();
List<PCollectionView<String>> views = Arrays.asList(view);
StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(outputManager, views, sideInputFetcher);
runner.startBundle();
runner.processElement(createDatum("e", 0));
runner.finishBundle();
assertTrue(outputManager.getOutput(mainOutputTag).isEmpty());
IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
// Verify that we added the element to an appropriate tag list, and that we buffered the element
ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState = state.state(StateNamespaces.global(), StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));
assertEquals(blockedMapState.read(), Collections.singletonMap(window, Collections.singleton(Windmill.GlobalDataRequest.newBuilder().setDataId(Windmill.GlobalDataId.newBuilder().setTag(view.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window))).build()).setExistenceWatermarkDeadline(9000).build())));
assertThat(sideInputFetcher.elementBag(createWindow(0)).read(), Matchers.contains(createDatum("e", 0)));
assertEquals(sideInputFetcher.watermarkHold(createWindow(0)).read(), new Instant(0));
}
use of org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest in project beam by apache.
the class StreamingSideInputDoFnRunnerTest method testMultipleWindowsNotReady.
@Test
public void testMultipleWindowsNotReady() throws Exception {
PCollectionView<String> view = createView();
when(stepContext.getSideInputNotifications()).thenReturn(Arrays.<Windmill.GlobalDataId>asList());
when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN))).thenReturn(false);
ListOutputManager outputManager = new ListOutputManager();
List<PCollectionView<String>> views = Arrays.asList(view);
StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(SlidingWindows.of(Duration.millis(10)).every(Duration.millis(10)), outputManager, views, sideInputFetcher);
IntervalWindow window1 = new IntervalWindow(new Instant(0), new Instant(10));
IntervalWindow window2 = new IntervalWindow(new Instant(-5), new Instant(5));
long timestamp = 1L;
WindowedValue<String> elem = WindowedValue.of("e", new Instant(timestamp), Arrays.asList(window1, window2), PaneInfo.NO_FIRING);
runner.startBundle();
runner.processElement(elem);
runner.finishBundle();
assertTrue(outputManager.getOutput(mainOutputTag).isEmpty());
// Verify that we added the element to an appropriate tag list, and that we buffered the element
// in both windows separately
ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState = state.state(StateNamespaces.global(), StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));
Map<IntervalWindow, Set<GlobalDataRequest>> blockedMap = blockedMapState.read();
assertThat(blockedMap.get(window1), equalTo(Collections.singleton(Windmill.GlobalDataRequest.newBuilder().setDataId(Windmill.GlobalDataId.newBuilder().setTag(view.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window1))).build()).setExistenceWatermarkDeadline(9000).build())));
assertThat(blockedMap.get(window2), equalTo(Collections.singleton(Windmill.GlobalDataRequest.newBuilder().setDataId(Windmill.GlobalDataId.newBuilder().setTag(view.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window1))).build()).setExistenceWatermarkDeadline(9000).build())));
assertThat(sideInputFetcher.elementBag(window1).read(), contains(Iterables.get(elem.explodeWindows(), 0)));
assertThat(sideInputFetcher.elementBag(window2).read(), contains(Iterables.get(elem.explodeWindows(), 1)));
assertEquals(sideInputFetcher.watermarkHold(window1).read(), new Instant(timestamp));
assertEquals(sideInputFetcher.watermarkHold(window2).read(), new Instant(timestamp));
}
use of org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest in project beam by apache.
the class StreamingSideInputDoFnRunnerTest method testMultipleSideInputs.
@Test
public void testMultipleSideInputs() throws Exception {
PCollectionView<String> view1 = createView();
PCollectionView<String> view2 = createView();
IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
Windmill.GlobalDataId id = Windmill.GlobalDataId.newBuilder().setTag(view1.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window))).build();
Set<Windmill.GlobalDataRequest> requestSet = new HashSet<>();
requestSet.add(Windmill.GlobalDataRequest.newBuilder().setDataId(id).build());
Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
blockedMap.put(window, requestSet);
ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState = state.state(StateNamespaces.global(), StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));
blockedMapState.write(blockedMap);
when(stepContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
when(stepContext.issueSideInputFetch(any(PCollectionView.class), any(BoundedWindow.class), any(SideInputState.class))).thenReturn(true);
when(execContext.getSideInputReaderForViews(Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
when(mockSideInputReader.contains(eq(view1))).thenReturn(true);
when(mockSideInputReader.contains(eq(view2))).thenReturn(true);
when(mockSideInputReader.get(eq(view1), any(BoundedWindow.class))).thenReturn("data1");
when(mockSideInputReader.get(eq(view2), any(BoundedWindow.class))).thenReturn("data2");
ListOutputManager outputManager = new ListOutputManager();
List<PCollectionView<String>> views = Arrays.asList(view1, view2);
StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(outputManager, views, sideInputFetcher);
sideInputFetcher.watermarkHold(createWindow(0)).add(new Instant(0));
sideInputFetcher.elementBag(createWindow(0)).add(createDatum("e1", 0));
runner.startBundle();
runner.processElement(createDatum("e2", 2));
runner.finishBundle();
assertThat(outputManager.getOutput(mainOutputTag), contains(createDatum("e1:data1:data2", 0), createDatum("e2:data1:data2", 2)));
assertThat(blockedMapState.read(), Matchers.nullValue());
assertThat(sideInputFetcher.watermarkHold(createWindow(0)).read(), Matchers.nullValue());
assertThat(sideInputFetcher.elementBag(createWindow(0)).read(), Matchers.emptyIterable());
}
use of org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest in project beam by apache.
the class StreamingSideInputDoFnRunnerTest method testSideInputNotification.
@Test
public void testSideInputNotification() throws Exception {
PCollectionView<String> view = createView();
IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(10));
Windmill.GlobalDataId id = Windmill.GlobalDataId.newBuilder().setTag(view.getTagInternal().getId()).setVersion(ByteString.copyFrom(CoderUtils.encodeToByteArray(IntervalWindow.getCoder(), window))).build();
Set<Windmill.GlobalDataRequest> requestSet = new HashSet<>();
requestSet.add(Windmill.GlobalDataRequest.newBuilder().setDataId(id).build());
Map<IntervalWindow, Set<Windmill.GlobalDataRequest>> blockedMap = new HashMap<>();
blockedMap.put(window, requestSet);
ValueState<Map<IntervalWindow, Set<GlobalDataRequest>>> blockedMapState = state.state(StateNamespaces.global(), StreamingSideInputFetcher.blockedMapAddr(WINDOW_FN.windowCoder()));
blockedMapState.write(blockedMap);
ListOutputManager outputManager = new ListOutputManager();
List<PCollectionView<String>> views = Arrays.asList(view);
StreamingSideInputFetcher<String, IntervalWindow> sideInputFetcher = createFetcher(views);
StreamingSideInputDoFnRunner<String, String, IntervalWindow> runner = createRunner(outputManager, views, sideInputFetcher);
sideInputFetcher.watermarkHold(createWindow(0)).add(new Instant(0));
sideInputFetcher.elementBag(createWindow(0)).add(createDatum("e", 0));
when(stepContext.getSideInputNotifications()).thenReturn(Arrays.asList(id));
when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.UNKNOWN))).thenReturn(false);
when(stepContext.issueSideInputFetch(eq(view), any(BoundedWindow.class), eq(SideInputState.KNOWN_READY))).thenReturn(true);
when(execContext.getSideInputReaderForViews(Mockito.<Iterable<? extends PCollectionView<?>>>any())).thenReturn(mockSideInputReader);
when(mockSideInputReader.contains(eq(view))).thenReturn(true);
when(mockSideInputReader.get(eq(view), any(BoundedWindow.class))).thenReturn("data");
runner.startBundle();
runner.finishBundle();
assertThat(outputManager.getOutput(mainOutputTag), contains(createDatum("e:data", 0)));
assertThat(blockedMapState.read(), Matchers.nullValue());
assertThat(sideInputFetcher.watermarkHold(createWindow(0)).read(), Matchers.nullValue());
assertThat(sideInputFetcher.elementBag(createWindow(0)).read(), Matchers.emptyIterable());
}
use of org.apache.beam.runners.dataflow.worker.windmill.Windmill.GlobalDataRequest in project beam by apache.
the class GrpcWindmillServerTest method testStreamingGetData.
@Test
@SuppressWarnings("FutureReturnValueIgnored")
public void testStreamingGetData() throws Exception {
// This server responds to GetDataRequests with responses that mirror the requests.
serviceRegistry.addService(new CloudWindmillServiceV1Alpha1ImplBase() {
@Override
public StreamObserver<StreamingGetDataRequest> getDataStream(StreamObserver<StreamingGetDataResponse> responseObserver) {
return new StreamObserver<StreamingGetDataRequest>() {
boolean sawHeader = false;
HashSet<Long> seenIds = new HashSet<>();
ResponseErrorInjector injector = new ResponseErrorInjector(responseObserver);
StreamingGetDataResponse.Builder responseBuilder = StreamingGetDataResponse.newBuilder();
@Override
public void onNext(StreamingGetDataRequest chunk) {
maybeInjectError(responseObserver);
try {
if (!sawHeader) {
LOG.info("Received header");
errorCollector.checkThat(chunk.getHeader(), Matchers.equalTo(JobHeader.newBuilder().setJobId("job").setProjectId("project").setWorkerId("worker").build()));
sawHeader = true;
} else {
LOG.info("Received get data of {} global data, {} data requests", chunk.getGlobalDataRequestCount(), chunk.getStateRequestCount());
errorCollector.checkThat(chunk.getSerializedSize(), Matchers.lessThanOrEqualTo(STREAM_CHUNK_SIZE));
int i = 0;
for (GlobalDataRequest request : chunk.getGlobalDataRequestList()) {
long requestId = chunk.getRequestId(i++);
errorCollector.checkThat(seenIds.add(requestId), Matchers.is(true));
sendResponse(requestId, processGlobalDataRequest(request));
}
for (ComputationGetDataRequest request : chunk.getStateRequestList()) {
long requestId = chunk.getRequestId(i++);
errorCollector.checkThat(seenIds.add(requestId), Matchers.is(true));
sendResponse(requestId, processStateRequest(request));
}
flushResponse();
}
} catch (Exception e) {
errorCollector.addError(e);
}
}
@Override
public void onError(Throwable throwable) {
}
@Override
public void onCompleted() {
injector.cancel();
responseObserver.onCompleted();
}
private ByteString processGlobalDataRequest(GlobalDataRequest request) {
errorCollector.checkThat(request.getStateFamily(), Matchers.is("family"));
return GlobalData.newBuilder().setDataId(request.getDataId()).setStateFamily("family").setData(ByteString.copyFromUtf8(request.getDataId().getTag())).build().toByteString();
}
private ByteString processStateRequest(ComputationGetDataRequest compRequest) {
errorCollector.checkThat(compRequest.getRequestsCount(), Matchers.is(1));
errorCollector.checkThat(compRequest.getComputationId(), Matchers.is("computation"));
KeyedGetDataRequest request = compRequest.getRequests(0);
KeyedGetDataResponse response = makeGetDataResponse(request.getValuesToFetch(0).getTag().toStringUtf8());
return response.toByteString();
}
private void sendResponse(long id, ByteString serializedResponse) {
if (ThreadLocalRandom.current().nextInt(4) == 0) {
sendChunkedResponse(id, serializedResponse);
} else {
responseBuilder.addRequestId(id).addSerializedResponse(serializedResponse);
if (responseBuilder.getRequestIdCount() > 10) {
flushResponse();
}
}
}
private void sendChunkedResponse(long id, ByteString serializedResponse) {
LOG.info("Sending response with {} chunks", (serializedResponse.size() / 10) + 1);
for (int i = 0; i < serializedResponse.size(); i += 10) {
int end = Math.min(serializedResponse.size(), i + 10);
try {
responseObserver.onNext(StreamingGetDataResponse.newBuilder().addRequestId(id).addSerializedResponse(serializedResponse.substring(i, end)).setRemainingBytesForResponse(serializedResponse.size() - end).build());
} catch (IllegalStateException e) {
// Stream is already closed.
}
}
}
private void flushResponse() {
if (responseBuilder.getRequestIdCount() > 0) {
LOG.info("Sending batched response of {} ids", responseBuilder.getRequestIdCount());
try {
responseObserver.onNext(responseBuilder.build());
} catch (IllegalStateException e) {
// Stream is already closed.
}
responseBuilder.clear();
}
}
};
}
});
GetDataStream stream = client.getDataStream();
// Make requests of varying sizes to test chunking, and verify the responses.
ExecutorService executor = Executors.newFixedThreadPool(50);
final CountDownLatch done = new CountDownLatch(200);
for (int i = 0; i < 100; ++i) {
final String key = "key" + i;
final String s = i % 5 == 0 ? largeString(i) : "tag";
executor.submit(() -> {
errorCollector.checkThat(stream.requestKeyedData("computation", makeGetDataRequest(key, s)), Matchers.equalTo(makeGetDataResponse(s)));
done.countDown();
});
executor.execute(() -> {
errorCollector.checkThat(stream.requestGlobalData(makeGlobalDataRequest(key)), Matchers.equalTo(makeGlobalDataResponse(key)));
done.countDown();
});
}
done.await();
stream.close();
assertTrue(stream.awaitTermination(60, TimeUnit.SECONDS));
executor.shutdown();
}
Aggregations