Search in sources :

Example 11 with Data

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.

the class BeamFnDataWriteRunnerTest method createRecordingAggregator.

private BeamFnDataOutboundAggregator createRecordingAggregator(Map<String, List<WindowedValue<String>>> output, Supplier<String> bundleId) {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.as(ExperimentalOptions.class).setExperiments(Arrays.asList("data_buffer_size_limit=0"));
    return new BeamFnDataOutboundAggregator(options, bundleId, new StreamObserver<Elements>() {

        @Override
        public void onNext(Elements elements) {
            for (Data data : elements.getDataList()) {
                try {
                    output.get(bundleId.get()).add(WIRE_CODER.decode(data.getData().newInput()));
                } catch (IOException e) {
                    throw new RuntimeException("Failed to decode output.");
                }
            }
        }

        @Override
        public void onError(Throwable throwable) {
        }

        @Override
        public void onCompleted() {
        }
    }, false);
}
Also used : BeamFnDataOutboundAggregator(org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) Data(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data) IOException(java.io.IOException) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements)

Example 12 with Data

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.

the class BeamFnDataGrpcClientTest method testForInboundConsumerThatThrows.

@Test
public void testForInboundConsumerThatThrows() throws Exception {
    CountDownLatch waitForClientToConnect = new CountDownLatch(1);
    AtomicInteger consumerInvoked = new AtomicInteger();
    Collection<BeamFnApi.Elements> inboundServerValues = new ConcurrentLinkedQueue<>();
    AtomicReference<StreamObserver<BeamFnApi.Elements>> outboundServerObserver = new AtomicReference<>();
    CallStreamObserver<BeamFnApi.Elements> inboundServerObserver = TestStreams.withOnNext(inboundServerValues::add).build();
    Endpoints.ApiServiceDescriptor apiServiceDescriptor = Endpoints.ApiServiceDescriptor.newBuilder().setUrl(this.getClass().getName() + "-" + UUID.randomUUID()).build();
    Server server = InProcessServerBuilder.forName(apiServiceDescriptor.getUrl()).addService(new BeamFnDataGrpc.BeamFnDataImplBase() {

        @Override
        public StreamObserver<BeamFnApi.Elements> data(StreamObserver<BeamFnApi.Elements> outboundObserver) {
            outboundServerObserver.set(outboundObserver);
            waitForClientToConnect.countDown();
            return inboundServerObserver;
        }
    }).build();
    server.start();
    RuntimeException exceptionToThrow = new RuntimeException("TestFailure");
    try {
        ManagedChannel channel = InProcessChannelBuilder.forName(apiServiceDescriptor.getUrl()).build();
        BeamFnDataGrpcClient clientFactory = new BeamFnDataGrpcClient(PipelineOptionsFactory.create(), (Endpoints.ApiServiceDescriptor descriptor) -> channel, OutboundObserverFactory.trivial());
        BeamFnDataInboundObserver2 observer = BeamFnDataInboundObserver2.forConsumers(Arrays.asList(DataEndpoint.create(TRANSFORM_ID_A, CODER, t -> {
            consumerInvoked.incrementAndGet();
            throw exceptionToThrow;
        })), Collections.emptyList());
        clientFactory.registerReceiver(INSTRUCTION_ID_A, Arrays.asList(apiServiceDescriptor), observer);
        waitForClientToConnect.await();
        // This first message should cause a failure afterwards all other messages are dropped.
        outboundServerObserver.get().onNext(ELEMENTS_A_1);
        outboundServerObserver.get().onNext(ELEMENTS_A_2);
        try {
            observer.awaitCompletion();
            fail("Expected channel to fail");
        } catch (Exception e) {
            assertEquals(exceptionToThrow, e);
        }
        // The server should not have received any values
        assertThat(inboundServerValues, empty());
        // The consumer should have only been invoked once
        assertEquals(1, consumerInvoked.get());
    } finally {
        server.shutdownNow();
    }
}
Also used : CallStreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.CallStreamObserver) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) Server(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) BeamFnDataInboundObserver2(org.apache.beam.sdk.fn.data.BeamFnDataInboundObserver2) Endpoints(org.apache.beam.model.pipeline.v1.Endpoints) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ManagedChannel(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ManagedChannel) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Example 13 with Data

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.

the class BeamFnDataGrpcClientTest method testForOutboundConsumer.

@Test
public void testForOutboundConsumer() throws Exception {
    CountDownLatch waitForInboundServerValuesCompletion = new CountDownLatch(2);
    Collection<BeamFnApi.Elements> inboundServerValues = new ConcurrentLinkedQueue<>();
    CallStreamObserver<BeamFnApi.Elements> inboundServerObserver = TestStreams.withOnNext((BeamFnApi.Elements t) -> {
        inboundServerValues.add(t);
        waitForInboundServerValuesCompletion.countDown();
    }).build();
    Endpoints.ApiServiceDescriptor apiServiceDescriptor = Endpoints.ApiServiceDescriptor.newBuilder().setUrl(this.getClass().getName() + "-" + UUID.randomUUID()).build();
    Server server = InProcessServerBuilder.forName(apiServiceDescriptor.getUrl()).addService(new BeamFnDataGrpc.BeamFnDataImplBase() {

        @Override
        public StreamObserver<BeamFnApi.Elements> data(StreamObserver<BeamFnApi.Elements> outboundObserver) {
            return inboundServerObserver;
        }
    }).build();
    server.start();
    try {
        ManagedChannel channel = InProcessChannelBuilder.forName(apiServiceDescriptor.getUrl()).build();
        BeamFnDataGrpcClient clientFactory = new BeamFnDataGrpcClient(PipelineOptionsFactory.fromArgs(new String[] { "--experiments=data_buffer_size_limit=20" }).create(), (Endpoints.ApiServiceDescriptor descriptor) -> channel, OutboundObserverFactory.trivial());
        BeamFnDataOutboundAggregator aggregator = clientFactory.createOutboundAggregator(apiServiceDescriptor, () -> INSTRUCTION_ID_A, false);
        FnDataReceiver<WindowedValue<String>> fnDataReceiver = aggregator.registerOutputDataLocation(TRANSFORM_ID_A, CODER);
        fnDataReceiver.accept(valueInGlobalWindow("ABC"));
        fnDataReceiver.accept(valueInGlobalWindow("DEF"));
        fnDataReceiver.accept(valueInGlobalWindow("GHI"));
        aggregator.sendOrCollectBufferedDataAndFinishOutboundStreams();
        waitForInboundServerValuesCompletion.await();
        assertThat(inboundServerValues, contains(ELEMENTS_A_1, ELEMENTS_A_2));
    } finally {
        server.shutdownNow();
    }
}
Also used : CallStreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.CallStreamObserver) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) BeamFnDataOutboundAggregator(org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator) Server(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) CountDownLatch(java.util.concurrent.CountDownLatch) Endpoints(org.apache.beam.model.pipeline.v1.Endpoints) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ManagedChannel(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ManagedChannel) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Example 14 with Data

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.

the class ProcessBundleHandler method processBundle.

/**
 * Processes a bundle, running the start(), process(), and finish() functions. This function is
 * required to be reentrant.
 */
public BeamFnApi.InstructionResponse.Builder processBundle(BeamFnApi.InstructionRequest request) throws Exception {
    BeamFnApi.ProcessBundleResponse.Builder response = BeamFnApi.ProcessBundleResponse.newBuilder();
    BundleProcessor bundleProcessor = bundleProcessorCache.get(request, () -> {
        try {
            return createBundleProcessor(request.getProcessBundle().getProcessBundleDescriptorId(), request.getProcessBundle());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    });
    try {
        PTransformFunctionRegistry startFunctionRegistry = bundleProcessor.getStartFunctionRegistry();
        PTransformFunctionRegistry finishFunctionRegistry = bundleProcessor.getFinishFunctionRegistry();
        ExecutionStateTracker stateTracker = bundleProcessor.getStateTracker();
        try (HandleStateCallsForBundle beamFnStateClient = bundleProcessor.getBeamFnStateClient()) {
            try (Closeable closeTracker = stateTracker.activate()) {
                // Already in reverse topological order so we don't need to do anything.
                for (ThrowingRunnable startFunction : startFunctionRegistry.getFunctions()) {
                    LOG.debug("Starting function {}", startFunction);
                    startFunction.run();
                }
                if (request.getProcessBundle().hasElements()) {
                    boolean inputFinished = bundleProcessor.getInboundObserver().multiplexElements(request.getProcessBundle().getElements());
                    if (!inputFinished) {
                        throw new RuntimeException("Elements embedded in ProcessBundleRequest do not contain stream terminators for " + "all data and timer inputs. Unterminated endpoints: " + bundleProcessor.getInboundObserver().getUnfinishedEndpoints());
                    }
                } else if (!bundleProcessor.getInboundEndpointApiServiceDescriptors().isEmpty()) {
                    BeamFnDataInboundObserver2 observer = bundleProcessor.getInboundObserver();
                    beamFnDataClient.registerReceiver(request.getInstructionId(), bundleProcessor.getInboundEndpointApiServiceDescriptors(), observer);
                    observer.awaitCompletion();
                    beamFnDataClient.unregisterReceiver(request.getInstructionId(), bundleProcessor.getInboundEndpointApiServiceDescriptors());
                }
                // Need to reverse this since we want to call finish in topological order.
                for (ThrowingRunnable finishFunction : Lists.reverse(finishFunctionRegistry.getFunctions())) {
                    LOG.debug("Finishing function {}", finishFunction);
                    finishFunction.run();
                }
            }
            // If bundleProcessor has not flushed any elements, embed them in response.
            embedOutboundElementsIfApplicable(response, bundleProcessor);
            // Add all checkpointed residuals to the response.
            response.addAllResidualRoots(bundleProcessor.getSplitListener().getResidualRoots());
            // Add all metrics to the response.
            Map<String, ByteString> monitoringData = monitoringData(bundleProcessor);
            if (runnerAcceptsShortIds) {
                response.putAllMonitoringData(monitoringData);
            } else {
                for (Map.Entry<String, ByteString> metric : monitoringData.entrySet()) {
                    response.addMonitoringInfos(shortIds.get(metric.getKey()).toBuilder().setPayload(metric.getValue()));
                }
            }
            if (!bundleProcessor.getBundleFinalizationCallbackRegistrations().isEmpty()) {
                finalizeBundleHandler.registerCallbacks(bundleProcessor.getInstructionId(), ImmutableList.copyOf(bundleProcessor.getBundleFinalizationCallbackRegistrations()));
                response.setRequiresFinalization(true);
            }
        }
        // Mark the bundle processor as re-usable.
        bundleProcessorCache.release(request.getProcessBundle().getProcessBundleDescriptorId(), bundleProcessor);
        return BeamFnApi.InstructionResponse.newBuilder().setProcessBundle(response);
    } catch (Exception e) {
        // Make sure we clean-up from the active set of bundle processors.
        bundleProcessorCache.discard(bundleProcessor);
        throw e;
    }
}
Also used : PTransformFunctionRegistry(org.apache.beam.fn.harness.data.PTransformFunctionRegistry) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Closeable(java.io.Closeable) IOException(java.io.IOException) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) BeamFnDataInboundObserver2(org.apache.beam.sdk.fn.data.BeamFnDataInboundObserver2) IOException(java.io.IOException) ThrowingRunnable(org.apache.beam.sdk.function.ThrowingRunnable) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) Map(java.util.Map) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) LinkedHashMap(java.util.LinkedHashMap) WeakHashMap(java.util.WeakHashMap) ProcessBundleResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse)

Example 15 with Data

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.

the class RegisterAndProcessBundleOperation method handleBagUserState.

private CompletionStage<BeamFnApi.StateResponse.Builder> handleBagUserState(StateRequest stateRequest) {
    StateKey.BagUserState bagUserStateKey = stateRequest.getStateKey().getBagUserState();
    DataflowStepContext userStepContext = ptransformIdToUserStepContext.get(bagUserStateKey.getTransformId());
    checkState(userStepContext != null, String.format("Unknown PTransform id '%s'", bagUserStateKey.getTransformId()));
    // TODO: We should not be required to hold onto a pointer to the bag states for the
    // user. InMemoryStateInternals assumes that the Java garbage collector does the clean-up work
    // but instead StateInternals should hold its own references and write out any data and
    // clear references when the MapTask within Dataflow completes like how WindmillStateInternals
    // works.
    BagState<ByteString> state = userStateData.computeIfAbsent(stateRequest.getStateKey(), unused -> userStepContext.stateInternals().state(// window.
    StateNamespaces.window(GlobalWindow.Coder.INSTANCE, GlobalWindow.INSTANCE), StateTags.bag(bagUserStateKey.getUserStateId(), ByteStringCoder.of())));
    switch(stateRequest.getRequestCase()) {
        case GET:
            return CompletableFuture.completedFuture(StateResponse.newBuilder().setGet(StateGetResponse.newBuilder().setData(concat(state.read()))));
        case APPEND:
            state.add(stateRequest.getAppend().getData());
            return CompletableFuture.completedFuture(StateResponse.newBuilder().setAppend(StateAppendResponse.getDefaultInstance()));
        case CLEAR:
            state.clear();
            return CompletableFuture.completedFuture(StateResponse.newBuilder().setClear(StateClearResponse.getDefaultInstance()));
        default:
            throw new IllegalArgumentException(String.format("Unknown request type %s", stateRequest.getRequestCase()));
    }
}
Also used : StateKey(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateKey) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) DataflowStepContext(org.apache.beam.runners.dataflow.worker.DataflowExecutionContext.DataflowStepContext)

Aggregations

Elements (org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements)11 Test (org.junit.Test)11 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)10 CountDownLatch (java.util.concurrent.CountDownLatch)8 ManagedChannel (org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ManagedChannel)8 WindowedValue (org.apache.beam.sdk.util.WindowedValue)7 ArrayList (java.util.ArrayList)6 BeamFnApi (org.apache.beam.model.fnexecution.v1.BeamFnApi)6 StreamObserver (org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver)6 ExecutorService (java.util.concurrent.ExecutorService)5 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)5 LogicalEndpoint (org.apache.beam.sdk.fn.data.LogicalEndpoint)5 Map (java.util.Map)3 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)3 ProcessBundleDescriptor (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor)3 Endpoints (org.apache.beam.model.pipeline.v1.Endpoints)3 ShortIdMap (org.apache.beam.runners.core.metrics.ShortIdMap)3 BeamFnDataInboundObserver2 (org.apache.beam.sdk.fn.data.BeamFnDataInboundObserver2)3 Server (org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server)3 CallStreamObserver (org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.CallStreamObserver)3