use of org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator in project beam by apache.
the class BeamFnDataWriteRunnerTest method testReuseForMultipleBundles.
@Test
public void testReuseForMultipleBundles() throws Exception {
AtomicReference<String> bundleId = new AtomicReference<>("0");
String localInputId = "inputPC";
RunnerApi.PTransform pTransform = RemoteGrpcPortWrite.writeToPort(localInputId, PORT_SPEC).toPTransform();
List<WindowedValue<String>> output0 = new ArrayList<>();
List<WindowedValue<String>> output1 = new ArrayList<>();
Map<ApiServiceDescriptor, BeamFnDataOutboundAggregator> aggregators = new HashMap<>();
BeamFnDataOutboundAggregator aggregator = createRecordingAggregator(ImmutableMap.of("0", output0, "1", output1), bundleId::get);
aggregators.put(PORT_SPEC.getApiServiceDescriptor(), aggregator);
PTransformRunnerFactoryTestContext context = PTransformRunnerFactoryTestContext.builder(TRANSFORM_ID, pTransform).beamFnDataClient(mockBeamFnDataClient).processBundleInstructionIdSupplier(bundleId::get).outboundAggregators(aggregators).pCollections(ImmutableMap.of(localInputId, RunnerApi.PCollection.newBuilder().setCoderId(ELEM_CODER_ID).build())).coders(COMPONENTS.getCodersMap()).windowingStrategies(COMPONENTS.getWindowingStrategiesMap()).build();
new BeamFnDataWriteRunner.Factory<String>().createRunnerForPTransform(context);
assertThat(context.getPCollectionConsumers().keySet(), containsInAnyOrder(localInputId));
FnDataReceiver<Object> pCollectionConsumer = context.getPCollectionConsumer(localInputId);
pCollectionConsumer.accept(valueInGlobalWindow("ABC"));
pCollectionConsumer.accept(valueInGlobalWindow("DEF"));
assertThat(output0, contains(valueInGlobalWindow("ABC"), valueInGlobalWindow("DEF")));
output0.clear();
// Process for bundle id 1
bundleId.set("1");
pCollectionConsumer.accept(valueInGlobalWindow("GHI"));
pCollectionConsumer.accept(valueInGlobalWindow("JKL"));
assertThat(output1, contains(valueInGlobalWindow("GHI"), valueInGlobalWindow("JKL")));
verifyNoMoreInteractions(mockBeamFnDataClient);
}
use of org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator in project beam by apache.
the class PTransformRunnerFactoryTestContext method addOutgoingTimersEndpoint.
@Override
public <T> FnDataReceiver<Timer<T>> addOutgoingTimersEndpoint(String timerFamilyId, Coder<Timer<T>> coder) {
BeamFnDataOutboundAggregator aggregator = getOutboundAggregators().get(getTimerApiServiceDescriptor());
FnDataReceiver<Timer<T>> receiver = aggregator.registerOutputTimersLocation(getPTransformId(), timerFamilyId, coder);
getOutgoingTimersEndpoints().add(TimerEndpoint.create(getPTransformId(), timerFamilyId, coder, receiver));
return receiver;
}
use of org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator in project beam by apache.
the class ProcessBundleHandler method embedOutboundElementsIfApplicable.
private void embedOutboundElementsIfApplicable(ProcessBundleResponse.Builder response, BundleProcessor bundleProcessor) {
if (bundleProcessor.getOutboundAggregators().isEmpty()) {
return;
}
List<Elements> collectedElements = new ArrayList<>(bundleProcessor.getOutboundAggregators().size());
boolean hasFlushedAggregator = false;
for (BeamFnDataOutboundAggregator aggregator : bundleProcessor.getOutboundAggregators().values()) {
Elements elements = aggregator.sendOrCollectBufferedDataAndFinishOutboundStreams();
if (elements == null) {
hasFlushedAggregator = true;
}
collectedElements.add(elements);
}
if (!hasFlushedAggregator) {
Elements.Builder elementsToEmbed = Elements.newBuilder();
for (Elements collectedElement : collectedElements) {
elementsToEmbed.mergeFrom(collectedElement);
}
response.setElements(elementsToEmbed.build());
} else {
// Since there was at least one flushed aggregator, we have to use the aggregators that were
// able to successfully collect their elements to emit them and can not send them as part of
// the ProcessBundleResponse.
int i = 0;
for (BeamFnDataOutboundAggregator aggregator : bundleProcessor.getOutboundAggregators().values()) {
Elements elements = collectedElements.get(i++);
if (elements != null) {
aggregator.sendElements(elements);
}
}
}
}
use of org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator in project beam by apache.
the class ProcessBundleHandlerTest method setupProcessBundleHandlerForSimpleRecordingDoFn.
private ProcessBundleHandler setupProcessBundleHandlerForSimpleRecordingDoFn(List<String> dataOutput, List<Timers> timerOutput, boolean enableOutputEmbedding) throws Exception {
DoFnWithExecutionInformation doFnWithExecutionInformation = DoFnWithExecutionInformation.of(new SimpleDoFn(), SimpleDoFn.MAIN_OUTPUT_TAG, Collections.emptyMap(), DoFnSchemaInformation.create());
RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN).setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(doFnWithExecutionInformation))).build();
RunnerApi.ParDoPayload parDoPayload = ParDoPayload.newBuilder().setDoFn(functionSpec).putTimerFamilySpecs("tfs-" + SimpleDoFn.TIMER_FAMILY_ID, TimerFamilySpec.newBuilder().setTimeDomain(RunnerApi.TimeDomain.Enum.EVENT_TIME).setTimerFamilyCoderId("timer-coder").build()).build();
BeamFnApi.ProcessBundleDescriptor processBundleDescriptor = ProcessBundleDescriptor.newBuilder().putTransforms("2L", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(DATA_INPUT_URN).build()).putOutputs("2L-output", "2L-output-pc").build()).putTransforms("3L", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(parDoPayload.toByteString())).putInputs("3L-input", "2L-output-pc").build()).putPcollections("2L-output-pc", PCollection.newBuilder().setWindowingStrategyId("window-strategy").setCoderId("2L-output-coder").setIsBounded(IsBounded.Enum.BOUNDED).build()).putWindowingStrategies("window-strategy", WindowingStrategy.newBuilder().setWindowCoderId("window-strategy-coder").setWindowFn(FunctionSpec.newBuilder().setUrn("beam:window_fn:global_windows:v1")).setOutputTime(OutputTime.Enum.END_OF_WINDOW).setAccumulationMode(AccumulationMode.Enum.ACCUMULATING).setTrigger(Trigger.newBuilder().setAlways(Always.getDefaultInstance())).setClosingBehavior(ClosingBehavior.Enum.EMIT_ALWAYS).setOnTimeBehavior(OnTimeBehavior.Enum.FIRE_ALWAYS).build()).setTimerApiServiceDescriptor(ApiServiceDescriptor.newBuilder().setUrl("url").build()).putCoders("string_coder", CoderTranslation.toProto(StringUtf8Coder.of()).getCoder()).putCoders("2L-output-coder", Coder.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(ModelCoders.KV_CODER_URN).build()).addComponentCoderIds("string_coder").addComponentCoderIds("string_coder").build()).putCoders("window-strategy-coder", Coder.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(ModelCoders.GLOBAL_WINDOW_CODER_URN).build()).build()).putCoders("timer-coder", Coder.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(ModelCoders.TIMER_CODER_URN)).addComponentCoderIds("string_coder").addComponentCoderIds("window-strategy-coder").build()).build();
Map<String, BeamFnApi.ProcessBundleDescriptor> fnApiRegistry = ImmutableMap.of("1L", processBundleDescriptor);
Map<String, PTransformRunnerFactory> urnToPTransformRunnerFactoryMap = Maps.newHashMap(REGISTERED_RUNNER_FACTORIES);
urnToPTransformRunnerFactoryMap.put(DATA_INPUT_URN, (PTransformRunnerFactory<Object>) (context) -> {
context.addIncomingDataEndpoint(ApiServiceDescriptor.getDefaultInstance(), KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), (input) -> {
dataOutput.add(input.getValue());
});
return null;
});
Mockito.doAnswer((invocation) -> new BeamFnDataOutboundAggregator(PipelineOptionsFactory.create(), invocation.getArgument(1), new StreamObserver<Elements>() {
@Override
public void onNext(Elements elements) {
for (Timers timer : elements.getTimersList()) {
timerOutput.addAll(elements.getTimersList());
}
}
@Override
public void onError(Throwable throwable) {
}
@Override
public void onCompleted() {
}
}, invocation.getArgument(2))).when(beamFnDataClient).createOutboundAggregator(any(), any(), anyBoolean());
return new ProcessBundleHandler(PipelineOptionsFactory.create(), enableOutputEmbedding ? Collections.singleton(BeamUrns.getUrn(StandardRunnerProtocols.Enum.CONTROL_RESPONSE_ELEMENTS_EMBEDDING)) : Collections.emptySet(), fnApiRegistry::get, beamFnDataClient, null, /* beamFnStateClient */
null, /* finalizeBundleHandler */
new ShortIdMap(), urnToPTransformRunnerFactoryMap, Caches.noop(), new BundleProcessorCache());
}
use of org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator in project beam by apache.
the class BeamFnDataWriteRunnerTest method createRecordingAggregator.
private BeamFnDataOutboundAggregator createRecordingAggregator(Map<String, List<WindowedValue<String>>> output, Supplier<String> bundleId) {
PipelineOptions options = PipelineOptionsFactory.create();
options.as(ExperimentalOptions.class).setExperiments(Arrays.asList("data_buffer_size_limit=0"));
return new BeamFnDataOutboundAggregator(options, bundleId, new StreamObserver<Elements>() {
@Override
public void onNext(Elements elements) {
for (Data data : elements.getDataList()) {
try {
output.get(bundleId.get()).add(WIRE_CODER.decode(data.getData().newInput()));
} catch (IOException e) {
throw new RuntimeException("Failed to decode output.");
}
}
}
@Override
public void onError(Throwable throwable) {
}
@Override
public void onCompleted() {
}
}, false);
}
Aggregations