Search in sources :

Example 1 with Data

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.

the class ProcessBundleDescriptors method fromExecutableStageInternal.

private static ExecutableProcessBundleDescriptor fromExecutableStageInternal(String id, ExecutableStage stage, ApiServiceDescriptor dataEndpoint, @Nullable ApiServiceDescriptor stateEndpoint) throws IOException {
    // Create with all of the processing transforms, and all of the components.
    // TODO: Remove the unreachable subcomponents if the size of the descriptor matters.
    Map<String, PTransform> stageTransforms = stage.getTransforms().stream().collect(Collectors.toMap(PTransformNode::getId, PTransformNode::getTransform));
    Components.Builder components = stage.getComponents().toBuilder().clearTransforms().putAllTransforms(stageTransforms);
    ImmutableList.Builder<RemoteInputDestination> inputDestinationsBuilder = ImmutableList.builder();
    ImmutableMap.Builder<String, Coder> remoteOutputCodersBuilder = ImmutableMap.builder();
    WireCoderSetting wireCoderSetting = stage.getWireCoderSettings().stream().filter(ws -> ws.getInputOrOutputId().equals(stage.getInputPCollection().getId())).findAny().orElse(WireCoderSetting.getDefaultInstance());
    // The order of these does not matter.
    inputDestinationsBuilder.add(addStageInput(dataEndpoint, stage.getInputPCollection(), components, wireCoderSetting));
    remoteOutputCodersBuilder.putAll(addStageOutputs(dataEndpoint, stage.getOutputPCollections(), components, stage.getWireCoderSettings()));
    Map<String, Map<String, SideInputSpec>> sideInputSpecs = addSideInputs(stage, components);
    Map<String, Map<String, BagUserStateSpec>> bagUserStateSpecs = forBagUserStates(stage, components.build());
    Map<String, Map<String, TimerSpec>> timerSpecs = forTimerSpecs(stage, components);
    lengthPrefixAnyInputCoder(stage.getInputPCollection().getId(), components);
    // Copy data from components to ProcessBundleDescriptor.
    ProcessBundleDescriptor.Builder bundleDescriptorBuilder = ProcessBundleDescriptor.newBuilder().setId(id);
    if (stateEndpoint != null) {
        bundleDescriptorBuilder.setStateApiServiceDescriptor(stateEndpoint);
    }
    if (timerSpecs.size() > 0) {
        // By default use the data endpoint for timers, in the future considering enabling specifying
        // a different ApiServiceDescriptor for timers.
        bundleDescriptorBuilder.setTimerApiServiceDescriptor(dataEndpoint);
    }
    bundleDescriptorBuilder.putAllCoders(components.getCodersMap()).putAllEnvironments(components.getEnvironmentsMap()).putAllPcollections(components.getPcollectionsMap()).putAllWindowingStrategies(components.getWindowingStrategiesMap()).putAllTransforms(components.getTransformsMap());
    return ExecutableProcessBundleDescriptor.of(bundleDescriptorBuilder.build(), inputDestinationsBuilder.build(), remoteOutputCodersBuilder.build(), sideInputSpecs, bagUserStateSpecs, timerSpecs);
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) ByteStringCoder(org.apache.beam.runners.fnexecution.wire.ByteStringCoder) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) RemoteInputDestination(org.apache.beam.runners.fnexecution.data.RemoteInputDestination) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) WireCoderSetting(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.WireCoderSetting) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Example 2 with Data

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.

the class ProcessBundleHandlerTest method testInstructionIsUnregisteredFromBeamFnDataClientOnSuccess.

@Test
public void testInstructionIsUnregisteredFromBeamFnDataClientOnSuccess() throws Exception {
    BeamFnApi.ProcessBundleDescriptor processBundleDescriptor = BeamFnApi.ProcessBundleDescriptor.newBuilder().putTransforms("2L", RunnerApi.PTransform.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(DATA_INPUT_URN).build()).build()).build();
    Map<String, BeamFnApi.ProcessBundleDescriptor> fnApiRegistry = ImmutableMap.of("1L", processBundleDescriptor);
    Mockito.doAnswer((invocation) -> {
        String instructionId = invocation.getArgument(0, String.class);
        CloseableFnDataReceiver<BeamFnApi.Elements> data = invocation.getArgument(2, CloseableFnDataReceiver.class);
        data.accept(BeamFnApi.Elements.newBuilder().addData(BeamFnApi.Elements.Data.newBuilder().setInstructionId(instructionId).setTransformId("2L").setIsLast(true)).build());
        return null;
    }).when(beamFnDataClient).registerReceiver(any(), any(), any());
    ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), Collections.emptySet(), fnApiRegistry::get, beamFnDataClient, null, /* beamFnStateGrpcClientCache */
    null, /* finalizeBundleHandler */
    new ShortIdMap(), ImmutableMap.of(DATA_INPUT_URN, (PTransformRunnerFactory<Object>) (context) -> {
        context.addIncomingDataEndpoint(ApiServiceDescriptor.getDefaultInstance(), StringUtf8Coder.of(), (input) -> {
        });
        return null;
    }), Caches.noop(), new BundleProcessorCache());
    handler.processBundle(BeamFnApi.InstructionRequest.newBuilder().setInstructionId("instructionId").setProcessBundle(BeamFnApi.ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L")).build());
    // Ensure that we unregister during successful processing
    verify(beamFnDataClient).registerReceiver(eq("instructionId"), any(), any());
    verify(beamFnDataClient).unregisterReceiver(eq("instructionId"), any());
    verifyNoMoreInteractions(beamFnDataClient);
}
Also used : ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) BundleProcessorCache(org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessorCache) PTransformRunnerFactory(org.apache.beam.fn.harness.PTransformRunnerFactory) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) Test(org.junit.Test)

Example 3 with Data

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.

the class BeamFnDataGrpcClientTest method testForInboundConsumer.

@Test
public void testForInboundConsumer() throws Exception {
    CountDownLatch waitForClientToConnect = new CountDownLatch(1);
    Collection<WindowedValue<String>> inboundValuesA = new ConcurrentLinkedQueue<>();
    Collection<WindowedValue<String>> inboundValuesB = new ConcurrentLinkedQueue<>();
    Collection<BeamFnApi.Elements> inboundServerValues = new ConcurrentLinkedQueue<>();
    AtomicReference<StreamObserver<BeamFnApi.Elements>> outboundServerObserver = new AtomicReference<>();
    CallStreamObserver<BeamFnApi.Elements> inboundServerObserver = TestStreams.withOnNext(inboundServerValues::add).build();
    Endpoints.ApiServiceDescriptor apiServiceDescriptor = Endpoints.ApiServiceDescriptor.newBuilder().setUrl(this.getClass().getName() + "-" + UUID.randomUUID()).build();
    Server server = InProcessServerBuilder.forName(apiServiceDescriptor.getUrl()).addService(new BeamFnDataGrpc.BeamFnDataImplBase() {

        @Override
        public StreamObserver<BeamFnApi.Elements> data(StreamObserver<BeamFnApi.Elements> outboundObserver) {
            outboundServerObserver.set(outboundObserver);
            waitForClientToConnect.countDown();
            return inboundServerObserver;
        }
    }).build();
    server.start();
    try {
        ManagedChannel channel = InProcessChannelBuilder.forName(apiServiceDescriptor.getUrl()).build();
        BeamFnDataGrpcClient clientFactory = new BeamFnDataGrpcClient(PipelineOptionsFactory.create(), (Endpoints.ApiServiceDescriptor descriptor) -> channel, OutboundObserverFactory.trivial());
        BeamFnDataInboundObserver2 observerA = BeamFnDataInboundObserver2.forConsumers(Arrays.asList(DataEndpoint.create(TRANSFORM_ID_A, CODER, inboundValuesA::add)), Collections.emptyList());
        BeamFnDataInboundObserver2 observerB = BeamFnDataInboundObserver2.forConsumers(Arrays.asList(DataEndpoint.create(TRANSFORM_ID_B, CODER, inboundValuesB::add)), Collections.emptyList());
        clientFactory.registerReceiver(INSTRUCTION_ID_A, Arrays.asList(apiServiceDescriptor), observerA);
        waitForClientToConnect.await();
        outboundServerObserver.get().onNext(ELEMENTS_A_1);
        // Purposefully transmit some data before the consumer for B is bound showing that
        // data is not lost
        outboundServerObserver.get().onNext(ELEMENTS_B_1);
        Thread.sleep(100);
        clientFactory.registerReceiver(INSTRUCTION_ID_B, Arrays.asList(apiServiceDescriptor), observerB);
        // Show that out of order stream completion can occur.
        observerB.awaitCompletion();
        assertThat(inboundValuesB, contains(valueInGlobalWindow("JKL"), valueInGlobalWindow("MNO")));
        outboundServerObserver.get().onNext(ELEMENTS_A_2);
        observerA.awaitCompletion();
        assertThat(inboundValuesA, contains(valueInGlobalWindow("ABC"), valueInGlobalWindow("DEF"), valueInGlobalWindow("GHI")));
    } finally {
        server.shutdownNow();
    }
}
Also used : CallStreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.CallStreamObserver) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) Server(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) AtomicReference(java.util.concurrent.atomic.AtomicReference) CountDownLatch(java.util.concurrent.CountDownLatch) BeamFnDataInboundObserver2(org.apache.beam.sdk.fn.data.BeamFnDataInboundObserver2) Endpoints(org.apache.beam.model.pipeline.v1.Endpoints) WindowedValue(org.apache.beam.sdk.util.WindowedValue) ManagedChannel(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ManagedChannel) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Test(org.junit.Test)

Example 4 with Data

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.

the class BeamFnDataOutboundAggregator method sendOrCollectBufferedDataAndFinishOutboundStreams.

/**
 * Closes the streams for all registered outbound endpoints. Should be called at the end of each
 * bundle. Returns the buffered Elements if the BeamFnDataOutboundAggregator started with
 * collectElementsIfNoFlushes=true, and there was no previous flush in this bundle, otherwise
 * returns null.
 */
public Elements sendOrCollectBufferedDataAndFinishOutboundStreams() {
    if (outputTimersReceivers.isEmpty() && outputDataReceivers.isEmpty()) {
        return null;
    }
    Elements.Builder bufferedElements;
    if (timeLimit > 0) {
        synchronized (flushLock) {
            bufferedElements = convertBufferForTransmission();
        }
    } else {
        bufferedElements = convertBufferForTransmission();
    }
    LOG.debug("Closing streams for instruction {} and outbound data {} and timers {}.", processBundleRequestIdSupplier.get(), outputDataReceivers, outputTimersReceivers);
    for (Map.Entry<String, Receiver<?>> entry : outputDataReceivers.entrySet()) {
        String pTransformId = entry.getKey();
        bufferedElements.addDataBuilder().setInstructionId(processBundleRequestIdSupplier.get()).setTransformId(pTransformId).setIsLast(true);
        entry.getValue().resetStats();
    }
    for (Map.Entry<TimerEndpoint, Receiver<?>> entry : outputTimersReceivers.entrySet()) {
        TimerEndpoint timerKey = entry.getKey();
        bufferedElements.addTimersBuilder().setInstructionId(processBundleRequestIdSupplier.get()).setTransformId(timerKey.pTransformId).setTimerFamilyId(timerKey.timerFamilyId).setIsLast(true);
        entry.getValue().resetStats();
    }
    if (collectElementsIfNoFlushes && !hasFlushedForBundle) {
        return bufferedElements.build();
    }
    outboundObserver.onNext(bufferedElements.build());
    // This is now at the end of a bundle, so we reset hasFlushedForBundle to prepare for new
    // bundles.
    hasFlushedForBundle = false;
    return null;
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with Data

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.

the class BeamFnDataInboundObserver2 method multiplexElements.

/**
 * Dispatches the data and timers from the elements to corresponding receivers. Returns true if
 * all the endpoints are done after elements dispatching.
 */
public boolean multiplexElements(Elements elements) throws Exception {
    for (BeamFnApi.Elements.Data data : elements.getDataList()) {
        EndpointStatus<DataEndpoint<?>> endpoint = transformIdToDataEndpoint.get(data.getTransformId());
        if (endpoint == null) {
            throw new IllegalStateException(String.format("Unable to find inbound data receiver for instruction %s and transform %s.", data.getInstructionId(), data.getTransformId()));
        } else if (endpoint.isDone) {
            throw new IllegalStateException(String.format("Received data after inbound data receiver is done for instruction %s and transform %s.", data.getInstructionId(), data.getTransformId()));
        }
        InputStream inputStream = data.getData().newInput();
        Coder<Object> coder = (Coder<Object>) endpoint.endpoint.getCoder();
        FnDataReceiver<Object> receiver = (FnDataReceiver<Object>) endpoint.endpoint.getReceiver();
        while (inputStream.available() > 0) {
            receiver.accept(coder.decode(inputStream));
        }
        if (data.getIsLast()) {
            endpoint.isDone = true;
            numEndpointsThatAreIncomplete -= 1;
        }
    }
    for (BeamFnApi.Elements.Timers timers : elements.getTimersList()) {
        Map<String, EndpointStatus<TimerEndpoint<?>>> timerFamilyIdToEndpoints = transformIdToTimerFamilyIdToTimerEndpoint.get(timers.getTransformId());
        if (timerFamilyIdToEndpoints == null) {
            throw new IllegalStateException(String.format("Unable to find inbound timer receiver for instruction %s, transform %s, and timer family %s.", timers.getInstructionId(), timers.getTransformId(), timers.getTimerFamilyId()));
        }
        EndpointStatus<TimerEndpoint<?>> endpoint = timerFamilyIdToEndpoints.get(timers.getTimerFamilyId());
        if (endpoint == null) {
            throw new IllegalStateException(String.format("Unable to find inbound timer receiver for instruction %s, transform %s, and timer family %s.", timers.getInstructionId(), timers.getTransformId(), timers.getTimerFamilyId()));
        } else if (endpoint.isDone) {
            throw new IllegalStateException(String.format("Received timer after inbound timer receiver is done for instruction %s, transform %s, and timer family %s.", timers.getInstructionId(), timers.getTransformId(), timers.getTimerFamilyId()));
        }
        InputStream inputStream = timers.getTimers().newInput();
        Coder<Object> coder = (Coder<Object>) endpoint.endpoint.getCoder();
        FnDataReceiver<Object> receiver = (FnDataReceiver<Object>) endpoint.endpoint.getReceiver();
        while (inputStream.available() > 0) {
            receiver.accept(coder.decode(inputStream));
        }
        if (timers.getIsLast()) {
            endpoint.isDone = true;
            numEndpointsThatAreIncomplete -= 1;
        }
    }
    return numEndpointsThatAreIncomplete == 0;
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) InputStream(java.io.InputStream) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements)

Aggregations

Elements (org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements)11 Test (org.junit.Test)11 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)10 CountDownLatch (java.util.concurrent.CountDownLatch)8 ManagedChannel (org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ManagedChannel)8 WindowedValue (org.apache.beam.sdk.util.WindowedValue)7 ArrayList (java.util.ArrayList)6 BeamFnApi (org.apache.beam.model.fnexecution.v1.BeamFnApi)6 StreamObserver (org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver)6 ExecutorService (java.util.concurrent.ExecutorService)5 LinkedBlockingQueue (java.util.concurrent.LinkedBlockingQueue)5 LogicalEndpoint (org.apache.beam.sdk.fn.data.LogicalEndpoint)5 Map (java.util.Map)3 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)3 ProcessBundleDescriptor (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor)3 Endpoints (org.apache.beam.model.pipeline.v1.Endpoints)3 ShortIdMap (org.apache.beam.runners.core.metrics.ShortIdMap)3 BeamFnDataInboundObserver2 (org.apache.beam.sdk.fn.data.BeamFnDataInboundObserver2)3 Server (org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server)3 CallStreamObserver (org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.CallStreamObserver)3