use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.
the class ProcessBundleHandlerTest method testInstructionEmbeddedElementsAreProcessed.
@Test
public void testInstructionEmbeddedElementsAreProcessed() throws Exception {
List<String> dataOutput = new ArrayList<>();
List<Timers> timerOutput = new ArrayList<>();
ProcessBundleHandler handler = setupProcessBundleHandlerForSimpleRecordingDoFn(dataOutput, timerOutput, false);
ByteString.Output encodedData = ByteString.newOutput();
KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()).encode(KV.of("", "data"), encodedData);
ByteString.Output encodedTimer = ByteString.newOutput();
Timer.Coder.of(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE).encode(Timer.of("", "timer_id", Collections.singletonList(GlobalWindow.INSTANCE), Instant.ofEpochMilli(1L), Instant.ofEpochMilli(1L), PaneInfo.ON_TIME_AND_ONLY_FIRING), encodedTimer);
Elements elements = Elements.newBuilder().addData(Data.newBuilder().setInstructionId("998L").setTransformId("2L").setData(encodedData.toByteString()).build()).addData(Data.newBuilder().setInstructionId("998L").setTransformId("2L").setIsLast(true).build()).addTimers(Timers.newBuilder().setInstructionId("998L").setTransformId("3L").setTimerFamilyId(TimerFamilyDeclaration.PREFIX + SimpleDoFn.TIMER_FAMILY_ID).setTimers(encodedTimer.toByteString()).build()).addTimers(Timers.newBuilder().setInstructionId("998L").setTransformId("3L").setTimerFamilyId(TimerFamilyDeclaration.PREFIX + SimpleDoFn.TIMER_FAMILY_ID).setIsLast(true).build()).build();
handler.processBundle(InstructionRequest.newBuilder().setInstructionId("998L").setProcessBundle(ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L").setElements(elements)).build());
handler.shutdown();
assertThat(dataOutput, contains("data"));
Timer<String> timer = Timer.Coder.of(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE).decode(timerOutput.get(0).getTimers().newInput());
assertEquals("output_timer", timer.getDynamicTimerTag());
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.
the class BeamFnDataWriteRunnerTest method createRecordingAggregator.
private BeamFnDataOutboundAggregator createRecordingAggregator(Map<String, List<WindowedValue<String>>> output, Supplier<String> bundleId) {
PipelineOptions options = PipelineOptionsFactory.create();
options.as(ExperimentalOptions.class).setExperiments(Arrays.asList("data_buffer_size_limit=0"));
return new BeamFnDataOutboundAggregator(options, bundleId, new StreamObserver<Elements>() {
@Override
public void onNext(Elements elements) {
for (Data data : elements.getDataList()) {
try {
output.get(bundleId.get()).add(WIRE_CODER.decode(data.getData().newInput()));
} catch (IOException e) {
throw new RuntimeException("Failed to decode output.");
}
}
}
@Override
public void onError(Throwable throwable) {
}
@Override
public void onCompleted() {
}
}, false);
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.
the class BeamFnDataOutboundAggregatorTest method testConfiguredTimeLimit.
@Test
public void testConfiguredTimeLimit() throws Exception {
List<Elements> values = new ArrayList<>();
PipelineOptions options = PipelineOptionsFactory.create();
options.as(ExperimentalOptions.class).setExperiments(Arrays.asList("data_buffer_time_limit_ms=1"));
final CountDownLatch waitForFlush = new CountDownLatch(1);
BeamFnDataOutboundAggregator aggregator = new BeamFnDataOutboundAggregator(options, endpoint::getInstructionId, TestStreams.withOnNext((Consumer<Elements>) e -> {
values.add(e);
waitForFlush.countDown();
}).build(), false);
// Test that it emits when time passed the time limit
FnDataReceiver<byte[]> dataReceiver = registerOutputLocation(aggregator, endpoint, CODER);
aggregator.start();
dataReceiver.accept(new byte[1]);
// wait the flush thread to flush the buffer
waitForFlush.await();
assertEquals(messageWithData(new byte[1]), values.get(0));
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.
the class FnApiDoFnRunner method constructSplitResult.
@VisibleForTesting
static <WatermarkEstimatorStateT> HandlesSplits.SplitResult constructSplitResult(WindowedSplitResult windowedSplitResult, HandlesSplits.SplitResult downstreamElementSplit, Coder fullInputCoder, Instant initialWatermark, KV<Instant, WatermarkEstimatorStateT> watermarkAndState, String pTransformId, String mainInputId, Collection<String> outputIds, Duration resumeDelay) {
// The element split cannot from both windowedSplitResult and downstreamElementSplit.
checkArgument((windowedSplitResult == null || windowedSplitResult.getResidualSplitRoot() == null) || downstreamElementSplit == null);
List<BundleApplication> primaryRoots = new ArrayList<>();
List<DelayedBundleApplication> residualRoots = new ArrayList<>();
// Encode window splits.
if (windowedSplitResult != null && windowedSplitResult.getPrimaryInFullyProcessedWindowsRoot() != null) {
ByteString.Output primaryInOtherWindowsBytes = ByteString.newOutput();
try {
fullInputCoder.encode(windowedSplitResult.getPrimaryInFullyProcessedWindowsRoot(), primaryInOtherWindowsBytes);
} catch (IOException e) {
throw new RuntimeException(e);
}
BundleApplication.Builder primaryApplicationInOtherWindows = BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(primaryInOtherWindowsBytes.toByteString());
primaryRoots.add(primaryApplicationInOtherWindows.build());
}
if (windowedSplitResult != null && windowedSplitResult.getResidualInUnprocessedWindowsRoot() != null) {
ByteString.Output bytesOut = ByteString.newOutput();
try {
fullInputCoder.encode(windowedSplitResult.getResidualInUnprocessedWindowsRoot(), bytesOut);
} catch (IOException e) {
throw new RuntimeException(e);
}
BundleApplication.Builder residualInUnprocessedWindowsRoot = BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(bytesOut.toByteString());
// We don't want to change the output watermarks or set the checkpoint resume time since
// that applies to the current window.
Map<String, org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp> outputWatermarkMapForUnprocessedWindows = new HashMap<>();
if (!initialWatermark.equals(GlobalWindow.TIMESTAMP_MIN_VALUE)) {
org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp outputWatermark = org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp.newBuilder().setSeconds(initialWatermark.getMillis() / 1000).setNanos((int) (initialWatermark.getMillis() % 1000) * 1000000).build();
for (String outputId : outputIds) {
outputWatermarkMapForUnprocessedWindows.put(outputId, outputWatermark);
}
}
residualInUnprocessedWindowsRoot.putAllOutputWatermarks(outputWatermarkMapForUnprocessedWindows);
residualRoots.add(DelayedBundleApplication.newBuilder().setApplication(residualInUnprocessedWindowsRoot).build());
}
ByteString.Output primaryBytes = ByteString.newOutput();
ByteString.Output residualBytes = ByteString.newOutput();
// that there is no element split.
if (windowedSplitResult != null && windowedSplitResult.getResidualSplitRoot() != null) {
// When there is element split in windowedSplitResult, the resumeDelay should not be null.
checkNotNull(resumeDelay);
try {
fullInputCoder.encode(windowedSplitResult.getPrimarySplitRoot(), primaryBytes);
fullInputCoder.encode(windowedSplitResult.getResidualSplitRoot(), residualBytes);
} catch (IOException e) {
throw new RuntimeException(e);
}
primaryRoots.add(BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(primaryBytes.toByteString()).build());
BundleApplication.Builder residualApplication = BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(residualBytes.toByteString());
Map<String, org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp> outputWatermarkMap = new HashMap<>();
if (!watermarkAndState.getKey().equals(GlobalWindow.TIMESTAMP_MIN_VALUE)) {
org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp outputWatermark = org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp.newBuilder().setSeconds(watermarkAndState.getKey().getMillis() / 1000).setNanos((int) (watermarkAndState.getKey().getMillis() % 1000) * 1000000).build();
for (String outputId : outputIds) {
outputWatermarkMap.put(outputId, outputWatermark);
}
}
residualApplication.putAllOutputWatermarks(outputWatermarkMap);
residualRoots.add(DelayedBundleApplication.newBuilder().setApplication(residualApplication).setRequestedTimeDelay(Durations.fromMillis(resumeDelay.getMillis())).build());
} else if (downstreamElementSplit != null) {
primaryRoots.add(Iterables.getOnlyElement(downstreamElementSplit.getPrimaryRoots()));
residualRoots.add(Iterables.getOnlyElement(downstreamElementSplit.getResidualRoots()));
}
return HandlesSplits.SplitResult.of(primaryRoots, residualRoots);
}
use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.
the class ProcessBundleHandler method createBundleProcessor.
private BundleProcessor createBundleProcessor(String bundleId, BeamFnApi.ProcessBundleRequest processBundleRequest) throws IOException {
BeamFnApi.ProcessBundleDescriptor bundleDescriptor = fnApiRegistry.apply(bundleId);
SetMultimap<String, String> pCollectionIdsToConsumingPTransforms = HashMultimap.create();
MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
ExecutionStateTracker stateTracker = new ExecutionStateTracker(ExecutionStateSampler.instance());
PCollectionConsumerRegistry pCollectionConsumerRegistry = new PCollectionConsumerRegistry(metricsContainerRegistry, stateTracker);
HashSet<String> processedPTransformIds = new HashSet<>();
PTransformFunctionRegistry startFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.START_STATE_NAME);
PTransformFunctionRegistry finishFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.FINISH_STATE_NAME);
List<ThrowingRunnable> resetFunctions = new ArrayList<>();
List<ThrowingRunnable> tearDownFunctions = new ArrayList<>();
List<ProgressRequestCallback> progressRequestCallbacks = new ArrayList<>();
// Build a multimap of PCollection ids to PTransform ids which consume said PCollections
for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
for (String pCollectionId : entry.getValue().getInputsMap().values()) {
pCollectionIdsToConsumingPTransforms.put(pCollectionId, entry.getKey());
}
}
// Instantiate a State API call handler depending on whether a State ApiServiceDescriptor was
// specified.
HandleStateCallsForBundle beamFnStateClient;
if (bundleDescriptor.hasStateApiServiceDescriptor()) {
BeamFnStateClient underlyingClient = beamFnStateGrpcClientCache.forApiServiceDescriptor(bundleDescriptor.getStateApiServiceDescriptor());
beamFnStateClient = new BlockTillStateCallsFinish(underlyingClient);
} else {
beamFnStateClient = new FailAllStateCallsForBundle(processBundleRequest);
}
BundleSplitListener.InMemory splitListener = BundleSplitListener.InMemory.create();
Collection<CallbackRegistration> bundleFinalizationCallbackRegistrations = new ArrayList<>();
BundleFinalizer bundleFinalizer = new BundleFinalizer() {
@Override
public void afterBundleCommit(Instant callbackExpiry, Callback callback) {
bundleFinalizationCallbackRegistrations.add(CallbackRegistration.create(callbackExpiry, callback));
}
};
BundleProcessor bundleProcessor = BundleProcessor.create(processWideCache, bundleDescriptor, startFunctionRegistry, finishFunctionRegistry, resetFunctions, tearDownFunctions, progressRequestCallbacks, splitListener, pCollectionConsumerRegistry, metricsContainerRegistry, stateTracker, beamFnStateClient, bundleFinalizationCallbackRegistrations, runnerCapabilities);
// Create a BeamFnStateClient
for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
// TODO: Remove source as a root and have it be triggered by the Runner.
if (!DATA_INPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !DATA_OUTPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !JAVA_SOURCE_URN.equals(entry.getValue().getSpec().getUrn()) && !PTransformTranslation.READ_TRANSFORM_URN.equals(entry.getValue().getSpec().getUrn())) {
continue;
}
createRunnerAndConsumersForPTransformRecursively(beamFnStateClient, beamFnDataClient, entry.getKey(), entry.getValue(), bundleProcessor::getInstructionId, bundleProcessor::getCacheTokens, bundleProcessor::getBundleCache, bundleDescriptor, pCollectionIdsToConsumingPTransforms, pCollectionConsumerRegistry, processedPTransformIds, startFunctionRegistry, finishFunctionRegistry, resetFunctions::add, tearDownFunctions::add, (apiServiceDescriptor, dataEndpoint) -> {
if (!bundleProcessor.getInboundEndpointApiServiceDescriptors().contains(apiServiceDescriptor)) {
bundleProcessor.getInboundEndpointApiServiceDescriptors().add(apiServiceDescriptor);
}
bundleProcessor.getInboundDataEndpoints().add(dataEndpoint);
}, (timerEndpoint) -> {
if (!bundleDescriptor.hasTimerApiServiceDescriptor()) {
throw new IllegalStateException(String.format("Timers are unsupported because the " + "ProcessBundleRequest %s does not provide a timer ApiServiceDescriptor.", bundleId));
}
bundleProcessor.getTimerEndpoints().add(timerEndpoint);
}, progressRequestCallbacks::add, splitListener, bundleFinalizer, bundleProcessor.getChannelRoots(), bundleProcessor.getOutboundAggregators(), bundleProcessor.getRunnerCapabilities());
}
bundleProcessor.finish();
return bundleProcessor;
}
Aggregations