use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.
the class SparkExecutableStageFunction method getStateRequestHandler.
private StateRequestHandler getStateRequestHandler(ExecutableStage executableStage, ProcessBundleDescriptors.ExecutableProcessBundleDescriptor processBundleDescriptor) {
EnumMap<TypeCase, StateRequestHandler> handlerMap = new EnumMap<>(StateKey.TypeCase.class);
final StateRequestHandler sideInputHandler;
StateRequestHandlers.SideInputHandlerFactory sideInputHandlerFactory = BatchSideInputHandlerFactory.forStage(executableStage, new BatchSideInputHandlerFactory.SideInputGetter() {
@Override
public <T> List<T> getSideInput(String pCollectionId) {
Tuple2<Broadcast<List<byte[]>>, WindowedValueCoder<SideInputT>> tuple2 = sideInputs.get(pCollectionId);
Broadcast<List<byte[]>> broadcast = tuple2._1;
WindowedValueCoder<SideInputT> coder = tuple2._2;
return (List<T>) broadcast.value().stream().map(bytes -> CoderHelpers.fromByteArray(bytes, coder)).collect(Collectors.toList());
}
});
try {
sideInputHandler = StateRequestHandlers.forSideInputHandlerFactory(ProcessBundleDescriptors.getSideInputs(executableStage), sideInputHandlerFactory);
} catch (IOException e) {
throw new RuntimeException("Failed to setup state handler", e);
}
if (bagUserStateHandlerFactory == null) {
bagUserStateHandlerFactory = new InMemoryBagUserStateFactory();
}
final StateRequestHandler userStateHandler;
if (executableStage.getUserStates().size() > 0) {
// Need to discard the old key's state
bagUserStateHandlerFactory.resetForNewKey();
userStateHandler = StateRequestHandlers.forBagUserStateHandlerFactory(processBundleDescriptor, bagUserStateHandlerFactory);
} else {
userStateHandler = StateRequestHandler.unsupported();
}
handlerMap.put(StateKey.TypeCase.ITERABLE_SIDE_INPUT, sideInputHandler);
handlerMap.put(StateKey.TypeCase.MULTIMAP_SIDE_INPUT, sideInputHandler);
handlerMap.put(StateKey.TypeCase.MULTIMAP_KEYS_SIDE_INPUT, sideInputHandler);
handlerMap.put(StateKey.TypeCase.BAG_USER_STATE, userStateHandler);
return StateRequestHandlers.delegateBasedUponType(handlerMap);
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.
the class BeamFnDataReadRunner method trySplit.
public void trySplit(ProcessBundleSplitRequest request, ProcessBundleSplitResponse.Builder response) {
DesiredSplit desiredSplit = request.getDesiredSplitsMap().get(pTransformId);
if (desiredSplit == null) {
return;
}
long totalBufferSize = desiredSplit.getEstimatedInputElements();
List<Long> allowedSplitPoints = new ArrayList<>(desiredSplit.getAllowedSplitPointsList());
HandlesSplits splittingConsumer = null;
if (consumer instanceof HandlesSplits) {
splittingConsumer = ((HandlesSplits) consumer);
}
synchronized (splittingLock) {
// provide.
if (index == stopIndex) {
return;
}
// being released.
if (!request.getInstructionId().equals(processBundleInstructionIdSupplier.get())) {
return;
}
// split request is bounded incorrectly, use the stop index as the upper bound.
if (totalBufferSize < index + 1) {
totalBufferSize = index + 1;
} else if (totalBufferSize > stopIndex) {
totalBufferSize = stopIndex;
}
// In the case where we have yet to process an element, set the current element progress to 1.
double currentElementProgress = 1;
// progress defaulting to 0.5 if no progress was able to get fetched.
if (index >= 0) {
if (splittingConsumer != null) {
currentElementProgress = splittingConsumer.getProgress();
} else {
currentElementProgress = 0.5;
}
}
// Now figure out where to split.
//
// The units here (except for keepOfElementRemainder) are all in terms of number or
// (possibly fractional) elements.
// Compute the amount of "remaining" work that we know of.
double remainder = totalBufferSize - index - currentElementProgress;
// Compute the number of elements (including fractional elements) that we should "keep".
double keep = remainder * desiredSplit.getFractionOfRemainder();
// splittable.
if (currentElementProgress < 1) {
// See if the amount we need to keep falls within the current element's remainder and if
// so, attempt to split it.
double keepOfElementRemainder = keep / (1 - currentElementProgress);
// If both index and index are allowed split point, we can split at index.
if (keepOfElementRemainder < 1 && isValidSplitPoint(allowedSplitPoints, index) && isValidSplitPoint(allowedSplitPoints, index + 1)) {
SplitResult splitResult = splittingConsumer != null ? splittingConsumer.trySplit(keepOfElementRemainder) : null;
if (splitResult != null) {
stopIndex = index + 1;
response.addAllPrimaryRoots(splitResult.getPrimaryRoots()).addAllResidualRoots(splitResult.getResidualRoots()).addChannelSplitsBuilder().setLastPrimaryElement(index - 1).setFirstResidualElement(stopIndex);
return;
}
}
}
// Otherwise, split at the closest allowed element boundary.
long newStopIndex = index + Math.max(1, Math.round(currentElementProgress + keep));
if (!isValidSplitPoint(allowedSplitPoints, newStopIndex)) {
// Choose the closest allowed split point.
Collections.sort(allowedSplitPoints);
int closestSplitPointIndex = -(Collections.binarySearch(allowedSplitPoints, newStopIndex) + 1);
if (closestSplitPointIndex == 0) {
newStopIndex = allowedSplitPoints.get(0);
} else if (closestSplitPointIndex == allowedSplitPoints.size()) {
newStopIndex = allowedSplitPoints.get(closestSplitPointIndex - 1);
} else {
long prevPoint = allowedSplitPoints.get(closestSplitPointIndex - 1);
long nextPoint = allowedSplitPoints.get(closestSplitPointIndex);
if (index < prevPoint && newStopIndex - prevPoint < nextPoint - newStopIndex) {
newStopIndex = prevPoint;
} else {
newStopIndex = nextPoint;
}
}
}
if (newStopIndex < stopIndex && newStopIndex > index) {
stopIndex = newStopIndex;
response.addChannelSplitsBuilder().setLastPrimaryElement(stopIndex - 1).setFirstResidualElement(stopIndex);
return;
}
}
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.
the class BeamFnDataOutboundAggregatorTest method testConfiguredBufferLimit.
@Test
public void testConfiguredBufferLimit() throws Exception {
List<BeamFnApi.Elements> values = new ArrayList<>();
AtomicBoolean onCompletedWasCalled = new AtomicBoolean();
PipelineOptions options = PipelineOptionsFactory.create();
options.as(ExperimentalOptions.class).setExperiments(Arrays.asList("data_buffer_size_limit=100"));
BeamFnDataOutboundAggregator aggregator = new BeamFnDataOutboundAggregator(options, endpoint::getInstructionId, TestStreams.<Elements>withOnNext(values::add).withOnCompleted(() -> onCompletedWasCalled.set(true)).build(), false);
// Test that nothing is emitted till the default buffer size is surpassed.
FnDataReceiver<byte[]> dataReceiver = registerOutputLocation(aggregator, endpoint, CODER);
aggregator.start();
dataReceiver.accept(new byte[51]);
assertThat(values, empty());
// Test that when we cross the buffer, we emit.
dataReceiver.accept(new byte[49]);
assertEquals(messageWithData(new byte[51], new byte[49]), values.get(0));
Receiver<?> receiver;
if (endpoint.isTimer()) {
receiver = Iterables.getOnlyElement(aggregator.outputTimersReceivers.values());
} else {
receiver = Iterables.getOnlyElement(aggregator.outputDataReceivers.values());
}
assertEquals(0L, receiver.getOutput().size());
assertEquals(102L, receiver.getByteCount());
assertEquals(2L, receiver.getElementCount());
// Test that when we close we empty the value, and then send the stream terminator as part
// of the same message
dataReceiver.accept(new byte[1]);
aggregator.sendOrCollectBufferedDataAndFinishOutboundStreams();
// Test that receiver stats have been reset after
// sendOrCollectBufferedDataAndFinishOutboundStreams.
assertEquals(0L, receiver.getOutput().size());
assertEquals(0L, receiver.getByteCount());
assertEquals(0L, receiver.getElementCount());
BeamFnApi.Elements.Builder builder = messageWithDataBuilder(new byte[1]);
if (endpoint.isTimer()) {
builder.addTimers(BeamFnApi.Elements.Timers.newBuilder().setInstructionId(endpoint.getInstructionId()).setTransformId(endpoint.getTransformId()).setTimerFamilyId(endpoint.getTimerFamilyId()).setIsLast(true));
} else {
builder.addData(BeamFnApi.Elements.Data.newBuilder().setInstructionId(endpoint.getInstructionId()).setTransformId(endpoint.getTransformId()).setIsLast(true));
}
assertEquals(builder.build(), values.get(1));
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.
the class BeamFnDataOutboundAggregatorTest method testConfiguredBufferLimitMultipleEndpoints.
@Test
public void testConfiguredBufferLimitMultipleEndpoints() throws Exception {
List<BeamFnApi.Elements> values = new ArrayList<>();
AtomicBoolean onCompletedWasCalled = new AtomicBoolean();
PipelineOptions options = PipelineOptionsFactory.create();
options.as(ExperimentalOptions.class).setExperiments(Arrays.asList("data_buffer_size_limit=100"));
BeamFnDataOutboundAggregator aggregator = new BeamFnDataOutboundAggregator(options, endpoint::getInstructionId, TestStreams.<Elements>withOnNext(values::add).withOnCompleted(() -> onCompletedWasCalled.set(true)).build(), false);
// Test that nothing is emitted till the default buffer size is surpassed.
LogicalEndpoint additionalEndpoint = LogicalEndpoint.data(endpoint.getInstructionId(), "additional:" + endpoint.getTransformId());
FnDataReceiver<byte[]> dataReceiver = registerOutputLocation(aggregator, endpoint, CODER);
FnDataReceiver<byte[]> additionalDataReceiver = registerOutputLocation(aggregator, additionalEndpoint, CODER);
aggregator.start();
dataReceiver.accept(new byte[51]);
assertThat(values, empty());
// Test that when we cross the buffer, we emit.
additionalDataReceiver.accept(new byte[49]);
checkEqualInAnyOrder(messageWithDataBuilder(new byte[51]).mergeFrom(messageWithDataBuilder(additionalEndpoint, new byte[49]).build()).build(), values.get(0));
// Test that when we close we empty the value, and then the stream terminator as part
// of the same message
dataReceiver.accept(new byte[1]);
aggregator.sendOrCollectBufferedDataAndFinishOutboundStreams();
BeamFnApi.Elements.Builder builder = messageWithDataBuilder(new byte[1]);
if (endpoint.isTimer()) {
builder.addTimers(BeamFnApi.Elements.Timers.newBuilder().setInstructionId(endpoint.getInstructionId()).setTransformId(endpoint.getTransformId()).setTimerFamilyId(endpoint.getTimerFamilyId()).setIsLast(true));
} else {
builder.addData(BeamFnApi.Elements.Data.newBuilder().setInstructionId(endpoint.getInstructionId()).setTransformId(endpoint.getTransformId()).setIsLast(true));
}
builder.addData(BeamFnApi.Elements.Data.newBuilder().setInstructionId(additionalEndpoint.getInstructionId()).setTransformId(additionalEndpoint.getTransformId()).setIsLast(true));
checkEqualInAnyOrder(builder.build(), values.get(1));
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.
the class BeamFnDataGrpcServiceTest method testMessageReceivedBySingleClientWhenThereAreMultipleClients.
@Test
public void testMessageReceivedBySingleClientWhenThereAreMultipleClients() throws Exception {
BlockingQueue<Elements> clientInboundElements = new LinkedBlockingQueue<>();
ExecutorService executorService = Executors.newCachedThreadPool();
CountDownLatch waitForInboundElements = new CountDownLatch(1);
int numberOfClients = 3;
for (int client = 0; client < numberOfClients; ++client) {
executorService.submit(() -> {
ManagedChannel channel = ManagedChannelFactory.createDefault().withInterceptors(Arrays.asList(AddHarnessIdInterceptor.create(WORKER_ID))).forDescriptor(service.getApiServiceDescriptor());
StreamObserver<BeamFnApi.Elements> outboundObserver = BeamFnDataGrpc.newStub(channel).data(TestStreams.withOnNext(clientInboundElements::add).build());
waitForInboundElements.await();
outboundObserver.onCompleted();
return null;
});
}
for (int i = 0; i < 3; ++i) {
CloseableFnDataReceiver<WindowedValue<String>> consumer = service.getDataService(WORKER_ID).send(LogicalEndpoint.data(Integer.toString(i), TRANSFORM_ID), CODER);
consumer.accept(valueInGlobalWindow("A" + i));
consumer.accept(valueInGlobalWindow("B" + i));
consumer.accept(valueInGlobalWindow("C" + i));
consumer.close();
}
// Specifically copy the elements to a new list so we perform blocking calls on the queue
// to ensure the elements arrive.
List<Elements> copy = new ArrayList<>();
for (int i = 0; i < numberOfClients; ++i) {
copy.add(clientInboundElements.take());
}
assertThat(copy, containsInAnyOrder(elementsWithData("0"), elementsWithData("1"), elementsWithData("2")));
waitForInboundElements.countDown();
}
Aggregations