Search in sources :

Example 11 with List

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.

the class SparkExecutableStageFunction method getStateRequestHandler.

private StateRequestHandler getStateRequestHandler(ExecutableStage executableStage, ProcessBundleDescriptors.ExecutableProcessBundleDescriptor processBundleDescriptor) {
    EnumMap<TypeCase, StateRequestHandler> handlerMap = new EnumMap<>(StateKey.TypeCase.class);
    final StateRequestHandler sideInputHandler;
    StateRequestHandlers.SideInputHandlerFactory sideInputHandlerFactory = BatchSideInputHandlerFactory.forStage(executableStage, new BatchSideInputHandlerFactory.SideInputGetter() {

        @Override
        public <T> List<T> getSideInput(String pCollectionId) {
            Tuple2<Broadcast<List<byte[]>>, WindowedValueCoder<SideInputT>> tuple2 = sideInputs.get(pCollectionId);
            Broadcast<List<byte[]>> broadcast = tuple2._1;
            WindowedValueCoder<SideInputT> coder = tuple2._2;
            return (List<T>) broadcast.value().stream().map(bytes -> CoderHelpers.fromByteArray(bytes, coder)).collect(Collectors.toList());
        }
    });
    try {
        sideInputHandler = StateRequestHandlers.forSideInputHandlerFactory(ProcessBundleDescriptors.getSideInputs(executableStage), sideInputHandlerFactory);
    } catch (IOException e) {
        throw new RuntimeException("Failed to setup state handler", e);
    }
    if (bagUserStateHandlerFactory == null) {
        bagUserStateHandlerFactory = new InMemoryBagUserStateFactory();
    }
    final StateRequestHandler userStateHandler;
    if (executableStage.getUserStates().size() > 0) {
        // Need to discard the old key's state
        bagUserStateHandlerFactory.resetForNewKey();
        userStateHandler = StateRequestHandlers.forBagUserStateHandlerFactory(processBundleDescriptor, bagUserStateHandlerFactory);
    } else {
        userStateHandler = StateRequestHandler.unsupported();
    }
    handlerMap.put(StateKey.TypeCase.ITERABLE_SIDE_INPUT, sideInputHandler);
    handlerMap.put(StateKey.TypeCase.MULTIMAP_SIDE_INPUT, sideInputHandler);
    handlerMap.put(StateKey.TypeCase.MULTIMAP_KEYS_SIDE_INPUT, sideInputHandler);
    handlerMap.put(StateKey.TypeCase.BAG_USER_STATE, userStateHandler);
    return StateRequestHandlers.delegateBasedUponType(handlerMap);
}
Also used : WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) TimerInternals(org.apache.beam.runners.core.TimerInternals) BatchSideInputHandlerFactory(org.apache.beam.runners.fnexecution.translation.BatchSideInputHandlerFactory) Locale(java.util.Locale) JobBundleFactory(org.apache.beam.runners.fnexecution.control.JobBundleFactory) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) TimerReceiverFactory(org.apache.beam.runners.fnexecution.control.TimerReceiverFactory) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) Broadcast(org.apache.spark.broadcast.Broadcast) StageBundleFactory(org.apache.beam.runners.fnexecution.control.StageBundleFactory) EnumMap(java.util.EnumMap) FnDataReceiver(org.apache.beam.sdk.fn.data.FnDataReceiver) BundleProgressHandler(org.apache.beam.runners.fnexecution.control.BundleProgressHandler) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) Serializable(java.io.Serializable) List(java.util.List) ByteArray(org.apache.beam.runners.spark.util.ByteArray) SparkPipelineOptions(org.apache.beam.runners.spark.SparkPipelineOptions) StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse) Coder(org.apache.beam.sdk.coders.Coder) CoderHelpers(org.apache.beam.runners.spark.coders.CoderHelpers) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) RemoteBundle(org.apache.beam.runners.fnexecution.control.RemoteBundle) InMemoryBagUserStateFactory(org.apache.beam.runners.fnexecution.state.InMemoryBagUserStateFactory) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) ProcessBundleProgressResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Iterator(java.util.Iterator) OutputReceiverFactory(org.apache.beam.runners.fnexecution.control.OutputReceiverFactory) ProcessBundleDescriptors(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors) MetricsContainerImpl(org.apache.beam.runners.core.metrics.MetricsContainerImpl) PipelineTranslatorUtils(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils) StateRequestHandlers(org.apache.beam.runners.fnexecution.state.StateRequestHandlers) IOException(java.io.IOException) MetricsContainerStepMapAccumulator(org.apache.beam.runners.spark.metrics.MetricsContainerStepMapAccumulator) InMemoryTimerInternals(org.apache.beam.runners.core.InMemoryTimerInternals) Timer(org.apache.beam.runners.core.construction.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) FileSystems(org.apache.beam.sdk.io.FileSystems) Collections(java.util.Collections) TypeCase(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey.TypeCase) ExecutableStageContext(org.apache.beam.runners.fnexecution.control.ExecutableStageContext) StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) TypeCase(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey.TypeCase) IOException(java.io.IOException) InMemoryBagUserStateFactory(org.apache.beam.runners.fnexecution.state.InMemoryBagUserStateFactory) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) Broadcast(org.apache.spark.broadcast.Broadcast) BatchSideInputHandlerFactory(org.apache.beam.runners.fnexecution.translation.BatchSideInputHandlerFactory) Tuple2(scala.Tuple2) List(java.util.List) StateRequestHandlers(org.apache.beam.runners.fnexecution.state.StateRequestHandlers) EnumMap(java.util.EnumMap)

Example 12 with List

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.

the class BeamFnDataReadRunner method trySplit.

public void trySplit(ProcessBundleSplitRequest request, ProcessBundleSplitResponse.Builder response) {
    DesiredSplit desiredSplit = request.getDesiredSplitsMap().get(pTransformId);
    if (desiredSplit == null) {
        return;
    }
    long totalBufferSize = desiredSplit.getEstimatedInputElements();
    List<Long> allowedSplitPoints = new ArrayList<>(desiredSplit.getAllowedSplitPointsList());
    HandlesSplits splittingConsumer = null;
    if (consumer instanceof HandlesSplits) {
        splittingConsumer = ((HandlesSplits) consumer);
    }
    synchronized (splittingLock) {
        // provide.
        if (index == stopIndex) {
            return;
        }
        // being released.
        if (!request.getInstructionId().equals(processBundleInstructionIdSupplier.get())) {
            return;
        }
        // split request is bounded incorrectly, use the stop index as the upper bound.
        if (totalBufferSize < index + 1) {
            totalBufferSize = index + 1;
        } else if (totalBufferSize > stopIndex) {
            totalBufferSize = stopIndex;
        }
        // In the case where we have yet to process an element, set the current element progress to 1.
        double currentElementProgress = 1;
        // progress defaulting to 0.5 if no progress was able to get fetched.
        if (index >= 0) {
            if (splittingConsumer != null) {
                currentElementProgress = splittingConsumer.getProgress();
            } else {
                currentElementProgress = 0.5;
            }
        }
        // Now figure out where to split.
        // 
        // The units here (except for keepOfElementRemainder) are all in terms of number or
        // (possibly fractional) elements.
        // Compute the amount of "remaining" work that we know of.
        double remainder = totalBufferSize - index - currentElementProgress;
        // Compute the number of elements (including fractional elements) that we should "keep".
        double keep = remainder * desiredSplit.getFractionOfRemainder();
        // splittable.
        if (currentElementProgress < 1) {
            // See if the amount we need to keep falls within the current element's remainder and if
            // so, attempt to split it.
            double keepOfElementRemainder = keep / (1 - currentElementProgress);
            // If both index and index are allowed split point, we can split at index.
            if (keepOfElementRemainder < 1 && isValidSplitPoint(allowedSplitPoints, index) && isValidSplitPoint(allowedSplitPoints, index + 1)) {
                SplitResult splitResult = splittingConsumer != null ? splittingConsumer.trySplit(keepOfElementRemainder) : null;
                if (splitResult != null) {
                    stopIndex = index + 1;
                    response.addAllPrimaryRoots(splitResult.getPrimaryRoots()).addAllResidualRoots(splitResult.getResidualRoots()).addChannelSplitsBuilder().setLastPrimaryElement(index - 1).setFirstResidualElement(stopIndex);
                    return;
                }
            }
        }
        // Otherwise, split at the closest allowed element boundary.
        long newStopIndex = index + Math.max(1, Math.round(currentElementProgress + keep));
        if (!isValidSplitPoint(allowedSplitPoints, newStopIndex)) {
            // Choose the closest allowed split point.
            Collections.sort(allowedSplitPoints);
            int closestSplitPointIndex = -(Collections.binarySearch(allowedSplitPoints, newStopIndex) + 1);
            if (closestSplitPointIndex == 0) {
                newStopIndex = allowedSplitPoints.get(0);
            } else if (closestSplitPointIndex == allowedSplitPoints.size()) {
                newStopIndex = allowedSplitPoints.get(closestSplitPointIndex - 1);
            } else {
                long prevPoint = allowedSplitPoints.get(closestSplitPointIndex - 1);
                long nextPoint = allowedSplitPoints.get(closestSplitPointIndex);
                if (index < prevPoint && newStopIndex - prevPoint < nextPoint - newStopIndex) {
                    newStopIndex = prevPoint;
                } else {
                    newStopIndex = nextPoint;
                }
            }
        }
        if (newStopIndex < stopIndex && newStopIndex > index) {
            stopIndex = newStopIndex;
            response.addChannelSplitsBuilder().setLastPrimaryElement(stopIndex - 1).setFirstResidualElement(stopIndex);
            return;
        }
    }
}
Also used : SplitResult(org.apache.beam.fn.harness.HandlesSplits.SplitResult) ArrayList(java.util.ArrayList) DesiredSplit(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleSplitRequest.DesiredSplit)

Example 13 with List

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.

the class BeamFnDataOutboundAggregatorTest method testConfiguredBufferLimit.

@Test
public void testConfiguredBufferLimit() throws Exception {
    List<BeamFnApi.Elements> values = new ArrayList<>();
    AtomicBoolean onCompletedWasCalled = new AtomicBoolean();
    PipelineOptions options = PipelineOptionsFactory.create();
    options.as(ExperimentalOptions.class).setExperiments(Arrays.asList("data_buffer_size_limit=100"));
    BeamFnDataOutboundAggregator aggregator = new BeamFnDataOutboundAggregator(options, endpoint::getInstructionId, TestStreams.<Elements>withOnNext(values::add).withOnCompleted(() -> onCompletedWasCalled.set(true)).build(), false);
    // Test that nothing is emitted till the default buffer size is surpassed.
    FnDataReceiver<byte[]> dataReceiver = registerOutputLocation(aggregator, endpoint, CODER);
    aggregator.start();
    dataReceiver.accept(new byte[51]);
    assertThat(values, empty());
    // Test that when we cross the buffer, we emit.
    dataReceiver.accept(new byte[49]);
    assertEquals(messageWithData(new byte[51], new byte[49]), values.get(0));
    Receiver<?> receiver;
    if (endpoint.isTimer()) {
        receiver = Iterables.getOnlyElement(aggregator.outputTimersReceivers.values());
    } else {
        receiver = Iterables.getOnlyElement(aggregator.outputDataReceivers.values());
    }
    assertEquals(0L, receiver.getOutput().size());
    assertEquals(102L, receiver.getByteCount());
    assertEquals(2L, receiver.getElementCount());
    // Test that when we close we empty the value, and then send the stream terminator as part
    // of the same message
    dataReceiver.accept(new byte[1]);
    aggregator.sendOrCollectBufferedDataAndFinishOutboundStreams();
    // Test that receiver stats have been reset after
    // sendOrCollectBufferedDataAndFinishOutboundStreams.
    assertEquals(0L, receiver.getOutput().size());
    assertEquals(0L, receiver.getByteCount());
    assertEquals(0L, receiver.getElementCount());
    BeamFnApi.Elements.Builder builder = messageWithDataBuilder(new byte[1]);
    if (endpoint.isTimer()) {
        builder.addTimers(BeamFnApi.Elements.Timers.newBuilder().setInstructionId(endpoint.getInstructionId()).setTransformId(endpoint.getTransformId()).setTimerFamilyId(endpoint.getTimerFamilyId()).setIsLast(true));
    } else {
        builder.addData(BeamFnApi.Elements.Data.newBuilder().setInstructionId(endpoint.getInstructionId()).setTransformId(endpoint.getTransformId()).setIsLast(true));
    }
    assertEquals(builder.build(), values.get(1));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) ArrayList(java.util.ArrayList) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) Test(org.junit.Test)

Example 14 with List

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.

the class BeamFnDataOutboundAggregatorTest method testConfiguredBufferLimitMultipleEndpoints.

@Test
public void testConfiguredBufferLimitMultipleEndpoints() throws Exception {
    List<BeamFnApi.Elements> values = new ArrayList<>();
    AtomicBoolean onCompletedWasCalled = new AtomicBoolean();
    PipelineOptions options = PipelineOptionsFactory.create();
    options.as(ExperimentalOptions.class).setExperiments(Arrays.asList("data_buffer_size_limit=100"));
    BeamFnDataOutboundAggregator aggregator = new BeamFnDataOutboundAggregator(options, endpoint::getInstructionId, TestStreams.<Elements>withOnNext(values::add).withOnCompleted(() -> onCompletedWasCalled.set(true)).build(), false);
    // Test that nothing is emitted till the default buffer size is surpassed.
    LogicalEndpoint additionalEndpoint = LogicalEndpoint.data(endpoint.getInstructionId(), "additional:" + endpoint.getTransformId());
    FnDataReceiver<byte[]> dataReceiver = registerOutputLocation(aggregator, endpoint, CODER);
    FnDataReceiver<byte[]> additionalDataReceiver = registerOutputLocation(aggregator, additionalEndpoint, CODER);
    aggregator.start();
    dataReceiver.accept(new byte[51]);
    assertThat(values, empty());
    // Test that when we cross the buffer, we emit.
    additionalDataReceiver.accept(new byte[49]);
    checkEqualInAnyOrder(messageWithDataBuilder(new byte[51]).mergeFrom(messageWithDataBuilder(additionalEndpoint, new byte[49]).build()).build(), values.get(0));
    // Test that when we close we empty the value, and then the stream terminator as part
    // of the same message
    dataReceiver.accept(new byte[1]);
    aggregator.sendOrCollectBufferedDataAndFinishOutboundStreams();
    BeamFnApi.Elements.Builder builder = messageWithDataBuilder(new byte[1]);
    if (endpoint.isTimer()) {
        builder.addTimers(BeamFnApi.Elements.Timers.newBuilder().setInstructionId(endpoint.getInstructionId()).setTransformId(endpoint.getTransformId()).setTimerFamilyId(endpoint.getTimerFamilyId()).setIsLast(true));
    } else {
        builder.addData(BeamFnApi.Elements.Data.newBuilder().setInstructionId(endpoint.getInstructionId()).setTransformId(endpoint.getTransformId()).setIsLast(true));
    }
    builder.addData(BeamFnApi.Elements.Data.newBuilder().setInstructionId(additionalEndpoint.getInstructionId()).setTransformId(additionalEndpoint.getTransformId()).setIsLast(true));
    checkEqualInAnyOrder(builder.build(), values.get(1));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) ArrayList(java.util.ArrayList) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) Test(org.junit.Test)

Example 15 with List

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.

the class BeamFnDataGrpcServiceTest method testMessageReceivedBySingleClientWhenThereAreMultipleClients.

@Test
public void testMessageReceivedBySingleClientWhenThereAreMultipleClients() throws Exception {
    BlockingQueue<Elements> clientInboundElements = new LinkedBlockingQueue<>();
    ExecutorService executorService = Executors.newCachedThreadPool();
    CountDownLatch waitForInboundElements = new CountDownLatch(1);
    int numberOfClients = 3;
    for (int client = 0; client < numberOfClients; ++client) {
        executorService.submit(() -> {
            ManagedChannel channel = ManagedChannelFactory.createDefault().withInterceptors(Arrays.asList(AddHarnessIdInterceptor.create(WORKER_ID))).forDescriptor(service.getApiServiceDescriptor());
            StreamObserver<BeamFnApi.Elements> outboundObserver = BeamFnDataGrpc.newStub(channel).data(TestStreams.withOnNext(clientInboundElements::add).build());
            waitForInboundElements.await();
            outboundObserver.onCompleted();
            return null;
        });
    }
    for (int i = 0; i < 3; ++i) {
        CloseableFnDataReceiver<WindowedValue<String>> consumer = service.getDataService(WORKER_ID).send(LogicalEndpoint.data(Integer.toString(i), TRANSFORM_ID), CODER);
        consumer.accept(valueInGlobalWindow("A" + i));
        consumer.accept(valueInGlobalWindow("B" + i));
        consumer.accept(valueInGlobalWindow("C" + i));
        consumer.close();
    }
    // Specifically copy the elements to a new list so we perform blocking calls on the queue
    // to ensure the elements arrive.
    List<Elements> copy = new ArrayList<>();
    for (int i = 0; i < numberOfClients; ++i) {
        copy.add(clientInboundElements.take());
    }
    assertThat(copy, containsInAnyOrder(elementsWithData("0"), elementsWithData("1"), elementsWithData("2")));
    waitForInboundElements.countDown();
}
Also used : ArrayList(java.util.ArrayList) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue) CountDownLatch(java.util.concurrent.CountDownLatch) LogicalEndpoint(org.apache.beam.sdk.fn.data.LogicalEndpoint) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ExecutorService(java.util.concurrent.ExecutorService) ManagedChannel(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ManagedChannel) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)27 Test (org.junit.Test)24 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)15 Elements (org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements)13 List (java.util.List)9 ExecutorService (java.util.concurrent.ExecutorService)9 WindowedValue (org.apache.beam.sdk.util.WindowedValue)9 IOException (java.io.IOException)8 Collection (java.util.Collection)8 CountDownLatch (java.util.concurrent.CountDownLatch)8 BeamFnApi (org.apache.beam.model.fnexecution.v1.BeamFnApi)8 Coder (org.apache.beam.sdk.coders.Coder)8 KvCoder (org.apache.beam.sdk.coders.KvCoder)8 Map (java.util.Map)7 ProcessBundleDescriptor (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor)7 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)7 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)7 Iterables (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables)7 Collections (java.util.Collections)6 CompletableFuture (java.util.concurrent.CompletableFuture)6