use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.
the class ProcessBundleDescriptors method fromExecutableStageInternal.
private static ExecutableProcessBundleDescriptor fromExecutableStageInternal(String id, ExecutableStage stage, ApiServiceDescriptor dataEndpoint, @Nullable ApiServiceDescriptor stateEndpoint) throws IOException {
// Create with all of the processing transforms, and all of the components.
// TODO: Remove the unreachable subcomponents if the size of the descriptor matters.
Map<String, PTransform> stageTransforms = stage.getTransforms().stream().collect(Collectors.toMap(PTransformNode::getId, PTransformNode::getTransform));
Components.Builder components = stage.getComponents().toBuilder().clearTransforms().putAllTransforms(stageTransforms);
ImmutableList.Builder<RemoteInputDestination> inputDestinationsBuilder = ImmutableList.builder();
ImmutableMap.Builder<String, Coder> remoteOutputCodersBuilder = ImmutableMap.builder();
WireCoderSetting wireCoderSetting = stage.getWireCoderSettings().stream().filter(ws -> ws.getInputOrOutputId().equals(stage.getInputPCollection().getId())).findAny().orElse(WireCoderSetting.getDefaultInstance());
// The order of these does not matter.
inputDestinationsBuilder.add(addStageInput(dataEndpoint, stage.getInputPCollection(), components, wireCoderSetting));
remoteOutputCodersBuilder.putAll(addStageOutputs(dataEndpoint, stage.getOutputPCollections(), components, stage.getWireCoderSettings()));
Map<String, Map<String, SideInputSpec>> sideInputSpecs = addSideInputs(stage, components);
Map<String, Map<String, BagUserStateSpec>> bagUserStateSpecs = forBagUserStates(stage, components.build());
Map<String, Map<String, TimerSpec>> timerSpecs = forTimerSpecs(stage, components);
lengthPrefixAnyInputCoder(stage.getInputPCollection().getId(), components);
// Copy data from components to ProcessBundleDescriptor.
ProcessBundleDescriptor.Builder bundleDescriptorBuilder = ProcessBundleDescriptor.newBuilder().setId(id);
if (stateEndpoint != null) {
bundleDescriptorBuilder.setStateApiServiceDescriptor(stateEndpoint);
}
if (timerSpecs.size() > 0) {
// By default use the data endpoint for timers, in the future considering enabling specifying
// a different ApiServiceDescriptor for timers.
bundleDescriptorBuilder.setTimerApiServiceDescriptor(dataEndpoint);
}
bundleDescriptorBuilder.putAllCoders(components.getCodersMap()).putAllEnvironments(components.getEnvironmentsMap()).putAllPcollections(components.getPcollectionsMap()).putAllWindowingStrategies(components.getWindowingStrategiesMap()).putAllTransforms(components.getTransformsMap());
return ExecutableProcessBundleDescriptor.of(bundleDescriptorBuilder.build(), inputDestinationsBuilder.build(), remoteOutputCodersBuilder.build(), sideInputSpecs, bagUserStateSpecs, timerSpecs);
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.
the class ProcessBundleHandlerTest method testInstructionIsUnregisteredFromBeamFnDataClientOnSuccess.
@Test
public void testInstructionIsUnregisteredFromBeamFnDataClientOnSuccess() throws Exception {
BeamFnApi.ProcessBundleDescriptor processBundleDescriptor = BeamFnApi.ProcessBundleDescriptor.newBuilder().putTransforms("2L", RunnerApi.PTransform.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(DATA_INPUT_URN).build()).build()).build();
Map<String, BeamFnApi.ProcessBundleDescriptor> fnApiRegistry = ImmutableMap.of("1L", processBundleDescriptor);
Mockito.doAnswer((invocation) -> {
String instructionId = invocation.getArgument(0, String.class);
CloseableFnDataReceiver<BeamFnApi.Elements> data = invocation.getArgument(2, CloseableFnDataReceiver.class);
data.accept(BeamFnApi.Elements.newBuilder().addData(BeamFnApi.Elements.Data.newBuilder().setInstructionId(instructionId).setTransformId("2L").setIsLast(true)).build());
return null;
}).when(beamFnDataClient).registerReceiver(any(), any(), any());
ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), Collections.emptySet(), fnApiRegistry::get, beamFnDataClient, null, /* beamFnStateGrpcClientCache */
null, /* finalizeBundleHandler */
new ShortIdMap(), ImmutableMap.of(DATA_INPUT_URN, (PTransformRunnerFactory<Object>) (context) -> {
context.addIncomingDataEndpoint(ApiServiceDescriptor.getDefaultInstance(), StringUtf8Coder.of(), (input) -> {
});
return null;
}), Caches.noop(), new BundleProcessorCache());
handler.processBundle(BeamFnApi.InstructionRequest.newBuilder().setInstructionId("instructionId").setProcessBundle(BeamFnApi.ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L")).build());
// Ensure that we unregister during successful processing
verify(beamFnDataClient).registerReceiver(eq("instructionId"), any(), any());
verify(beamFnDataClient).unregisterReceiver(eq("instructionId"), any());
verifyNoMoreInteractions(beamFnDataClient);
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.
the class BeamFnDataGrpcClientTest method testForInboundConsumer.
@Test
public void testForInboundConsumer() throws Exception {
CountDownLatch waitForClientToConnect = new CountDownLatch(1);
Collection<WindowedValue<String>> inboundValuesA = new ConcurrentLinkedQueue<>();
Collection<WindowedValue<String>> inboundValuesB = new ConcurrentLinkedQueue<>();
Collection<BeamFnApi.Elements> inboundServerValues = new ConcurrentLinkedQueue<>();
AtomicReference<StreamObserver<BeamFnApi.Elements>> outboundServerObserver = new AtomicReference<>();
CallStreamObserver<BeamFnApi.Elements> inboundServerObserver = TestStreams.withOnNext(inboundServerValues::add).build();
Endpoints.ApiServiceDescriptor apiServiceDescriptor = Endpoints.ApiServiceDescriptor.newBuilder().setUrl(this.getClass().getName() + "-" + UUID.randomUUID()).build();
Server server = InProcessServerBuilder.forName(apiServiceDescriptor.getUrl()).addService(new BeamFnDataGrpc.BeamFnDataImplBase() {
@Override
public StreamObserver<BeamFnApi.Elements> data(StreamObserver<BeamFnApi.Elements> outboundObserver) {
outboundServerObserver.set(outboundObserver);
waitForClientToConnect.countDown();
return inboundServerObserver;
}
}).build();
server.start();
try {
ManagedChannel channel = InProcessChannelBuilder.forName(apiServiceDescriptor.getUrl()).build();
BeamFnDataGrpcClient clientFactory = new BeamFnDataGrpcClient(PipelineOptionsFactory.create(), (Endpoints.ApiServiceDescriptor descriptor) -> channel, OutboundObserverFactory.trivial());
BeamFnDataInboundObserver2 observerA = BeamFnDataInboundObserver2.forConsumers(Arrays.asList(DataEndpoint.create(TRANSFORM_ID_A, CODER, inboundValuesA::add)), Collections.emptyList());
BeamFnDataInboundObserver2 observerB = BeamFnDataInboundObserver2.forConsumers(Arrays.asList(DataEndpoint.create(TRANSFORM_ID_B, CODER, inboundValuesB::add)), Collections.emptyList());
clientFactory.registerReceiver(INSTRUCTION_ID_A, Arrays.asList(apiServiceDescriptor), observerA);
waitForClientToConnect.await();
outboundServerObserver.get().onNext(ELEMENTS_A_1);
// Purposefully transmit some data before the consumer for B is bound showing that
// data is not lost
outboundServerObserver.get().onNext(ELEMENTS_B_1);
Thread.sleep(100);
clientFactory.registerReceiver(INSTRUCTION_ID_B, Arrays.asList(apiServiceDescriptor), observerB);
// Show that out of order stream completion can occur.
observerB.awaitCompletion();
assertThat(inboundValuesB, contains(valueInGlobalWindow("JKL"), valueInGlobalWindow("MNO")));
outboundServerObserver.get().onNext(ELEMENTS_A_2);
observerA.awaitCompletion();
assertThat(inboundValuesA, contains(valueInGlobalWindow("ABC"), valueInGlobalWindow("DEF"), valueInGlobalWindow("GHI")));
} finally {
server.shutdownNow();
}
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.
the class BeamFnDataOutboundAggregator method sendOrCollectBufferedDataAndFinishOutboundStreams.
/**
* Closes the streams for all registered outbound endpoints. Should be called at the end of each
* bundle. Returns the buffered Elements if the BeamFnDataOutboundAggregator started with
* collectElementsIfNoFlushes=true, and there was no previous flush in this bundle, otherwise
* returns null.
*/
public Elements sendOrCollectBufferedDataAndFinishOutboundStreams() {
if (outputTimersReceivers.isEmpty() && outputDataReceivers.isEmpty()) {
return null;
}
Elements.Builder bufferedElements;
if (timeLimit > 0) {
synchronized (flushLock) {
bufferedElements = convertBufferForTransmission();
}
} else {
bufferedElements = convertBufferForTransmission();
}
LOG.debug("Closing streams for instruction {} and outbound data {} and timers {}.", processBundleRequestIdSupplier.get(), outputDataReceivers, outputTimersReceivers);
for (Map.Entry<String, Receiver<?>> entry : outputDataReceivers.entrySet()) {
String pTransformId = entry.getKey();
bufferedElements.addDataBuilder().setInstructionId(processBundleRequestIdSupplier.get()).setTransformId(pTransformId).setIsLast(true);
entry.getValue().resetStats();
}
for (Map.Entry<TimerEndpoint, Receiver<?>> entry : outputTimersReceivers.entrySet()) {
TimerEndpoint timerKey = entry.getKey();
bufferedElements.addTimersBuilder().setInstructionId(processBundleRequestIdSupplier.get()).setTransformId(timerKey.pTransformId).setTimerFamilyId(timerKey.timerFamilyId).setIsLast(true);
entry.getValue().resetStats();
}
if (collectElementsIfNoFlushes && !hasFlushedForBundle) {
return bufferedElements.build();
}
outboundObserver.onNext(bufferedElements.build());
// This is now at the end of a bundle, so we reset hasFlushedForBundle to prepare for new
// bundles.
hasFlushedForBundle = false;
return null;
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data in project beam by apache.
the class BeamFnDataInboundObserver2 method multiplexElements.
/**
* Dispatches the data and timers from the elements to corresponding receivers. Returns true if
* all the endpoints are done after elements dispatching.
*/
public boolean multiplexElements(Elements elements) throws Exception {
for (BeamFnApi.Elements.Data data : elements.getDataList()) {
EndpointStatus<DataEndpoint<?>> endpoint = transformIdToDataEndpoint.get(data.getTransformId());
if (endpoint == null) {
throw new IllegalStateException(String.format("Unable to find inbound data receiver for instruction %s and transform %s.", data.getInstructionId(), data.getTransformId()));
} else if (endpoint.isDone) {
throw new IllegalStateException(String.format("Received data after inbound data receiver is done for instruction %s and transform %s.", data.getInstructionId(), data.getTransformId()));
}
InputStream inputStream = data.getData().newInput();
Coder<Object> coder = (Coder<Object>) endpoint.endpoint.getCoder();
FnDataReceiver<Object> receiver = (FnDataReceiver<Object>) endpoint.endpoint.getReceiver();
while (inputStream.available() > 0) {
receiver.accept(coder.decode(inputStream));
}
if (data.getIsLast()) {
endpoint.isDone = true;
numEndpointsThatAreIncomplete -= 1;
}
}
for (BeamFnApi.Elements.Timers timers : elements.getTimersList()) {
Map<String, EndpointStatus<TimerEndpoint<?>>> timerFamilyIdToEndpoints = transformIdToTimerFamilyIdToTimerEndpoint.get(timers.getTransformId());
if (timerFamilyIdToEndpoints == null) {
throw new IllegalStateException(String.format("Unable to find inbound timer receiver for instruction %s, transform %s, and timer family %s.", timers.getInstructionId(), timers.getTransformId(), timers.getTimerFamilyId()));
}
EndpointStatus<TimerEndpoint<?>> endpoint = timerFamilyIdToEndpoints.get(timers.getTimerFamilyId());
if (endpoint == null) {
throw new IllegalStateException(String.format("Unable to find inbound timer receiver for instruction %s, transform %s, and timer family %s.", timers.getInstructionId(), timers.getTransformId(), timers.getTimerFamilyId()));
} else if (endpoint.isDone) {
throw new IllegalStateException(String.format("Received timer after inbound timer receiver is done for instruction %s, transform %s, and timer family %s.", timers.getInstructionId(), timers.getTransformId(), timers.getTimerFamilyId()));
}
InputStream inputStream = timers.getTimers().newInput();
Coder<Object> coder = (Coder<Object>) endpoint.endpoint.getCoder();
FnDataReceiver<Object> receiver = (FnDataReceiver<Object>) endpoint.endpoint.getReceiver();
while (inputStream.available() > 0) {
receiver.accept(coder.decode(inputStream));
}
if (timers.getIsLast()) {
endpoint.isDone = true;
numEndpointsThatAreIncomplete -= 1;
}
}
return numEndpointsThatAreIncomplete == 0;
}
Aggregations