Search in sources :

Example 21 with List

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.

the class ProcessBundleHandlerTest method testInstructionEmbeddedElementsAreProcessed.

@Test
public void testInstructionEmbeddedElementsAreProcessed() throws Exception {
    List<String> dataOutput = new ArrayList<>();
    List<Timers> timerOutput = new ArrayList<>();
    ProcessBundleHandler handler = setupProcessBundleHandlerForSimpleRecordingDoFn(dataOutput, timerOutput, false);
    ByteString.Output encodedData = ByteString.newOutput();
    KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()).encode(KV.of("", "data"), encodedData);
    ByteString.Output encodedTimer = ByteString.newOutput();
    Timer.Coder.of(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE).encode(Timer.of("", "timer_id", Collections.singletonList(GlobalWindow.INSTANCE), Instant.ofEpochMilli(1L), Instant.ofEpochMilli(1L), PaneInfo.ON_TIME_AND_ONLY_FIRING), encodedTimer);
    Elements elements = Elements.newBuilder().addData(Data.newBuilder().setInstructionId("998L").setTransformId("2L").setData(encodedData.toByteString()).build()).addData(Data.newBuilder().setInstructionId("998L").setTransformId("2L").setIsLast(true).build()).addTimers(Timers.newBuilder().setInstructionId("998L").setTransformId("3L").setTimerFamilyId(TimerFamilyDeclaration.PREFIX + SimpleDoFn.TIMER_FAMILY_ID).setTimers(encodedTimer.toByteString()).build()).addTimers(Timers.newBuilder().setInstructionId("998L").setTransformId("3L").setTimerFamilyId(TimerFamilyDeclaration.PREFIX + SimpleDoFn.TIMER_FAMILY_ID).setIsLast(true).build()).build();
    handler.processBundle(InstructionRequest.newBuilder().setInstructionId("998L").setProcessBundle(ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L").setElements(elements)).build());
    handler.shutdown();
    assertThat(dataOutput, contains("data"));
    Timer<String> timer = Timer.Coder.of(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE).decode(timerOutput.get(0).getTimers().newInput());
    assertEquals("output_timer", timer.getDynamicTimerTag());
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ArrayList(java.util.ArrayList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) Timers(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Timers) Test(org.junit.Test)

Example 22 with List

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.

the class BeamFnDataWriteRunnerTest method createRecordingAggregator.

private BeamFnDataOutboundAggregator createRecordingAggregator(Map<String, List<WindowedValue<String>>> output, Supplier<String> bundleId) {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.as(ExperimentalOptions.class).setExperiments(Arrays.asList("data_buffer_size_limit=0"));
    return new BeamFnDataOutboundAggregator(options, bundleId, new StreamObserver<Elements>() {

        @Override
        public void onNext(Elements elements) {
            for (Data data : elements.getDataList()) {
                try {
                    output.get(bundleId.get()).add(WIRE_CODER.decode(data.getData().newInput()));
                } catch (IOException e) {
                    throw new RuntimeException("Failed to decode output.");
                }
            }
        }

        @Override
        public void onError(Throwable throwable) {
        }

        @Override
        public void onCompleted() {
        }
    }, false);
}
Also used : BeamFnDataOutboundAggregator(org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) Data(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data) IOException(java.io.IOException) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements)

Example 23 with List

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.

the class BeamFnDataOutboundAggregatorTest method testConfiguredTimeLimit.

@Test
public void testConfiguredTimeLimit() throws Exception {
    List<Elements> values = new ArrayList<>();
    PipelineOptions options = PipelineOptionsFactory.create();
    options.as(ExperimentalOptions.class).setExperiments(Arrays.asList("data_buffer_time_limit_ms=1"));
    final CountDownLatch waitForFlush = new CountDownLatch(1);
    BeamFnDataOutboundAggregator aggregator = new BeamFnDataOutboundAggregator(options, endpoint::getInstructionId, TestStreams.withOnNext((Consumer<Elements>) e -> {
        values.add(e);
        waitForFlush.countDown();
    }).build(), false);
    // Test that it emits when time passed the time limit
    FnDataReceiver<byte[]> dataReceiver = registerOutputLocation(aggregator, endpoint, CODER);
    aggregator.start();
    dataReceiver.accept(new byte[1]);
    // wait the flush thread to flush the buffer
    waitForFlush.await();
    assertEquals(messageWithData(new byte[1]), values.get(0));
}
Also used : TestStreams(org.apache.beam.sdk.fn.test.TestStreams) Arrays(java.util.Arrays) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Coder(org.apache.beam.sdk.coders.Coder) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) ArrayList(java.util.ArrayList) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Assert.fail(org.junit.Assert.fail) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) LengthPrefixCoder(org.apache.beam.sdk.coders.LengthPrefixCoder) Parameterized(org.junit.runners.Parameterized) Matchers.empty(org.hamcrest.Matchers.empty) Collection(java.util.Collection) Matchers(org.hamcrest.Matchers) IOException(java.io.IOException) Test(org.junit.Test) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) Consumer(java.util.function.Consumer) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Receiver(org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator.Receiver) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) Assert.assertEquals(org.junit.Assert.assertEquals) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) ArrayList(java.util.ArrayList) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) CountDownLatch(java.util.concurrent.CountDownLatch) Test(org.junit.Test)

Example 24 with List

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.

the class FnApiDoFnRunner method constructSplitResult.

@VisibleForTesting
static <WatermarkEstimatorStateT> HandlesSplits.SplitResult constructSplitResult(WindowedSplitResult windowedSplitResult, HandlesSplits.SplitResult downstreamElementSplit, Coder fullInputCoder, Instant initialWatermark, KV<Instant, WatermarkEstimatorStateT> watermarkAndState, String pTransformId, String mainInputId, Collection<String> outputIds, Duration resumeDelay) {
    // The element split cannot from both windowedSplitResult and downstreamElementSplit.
    checkArgument((windowedSplitResult == null || windowedSplitResult.getResidualSplitRoot() == null) || downstreamElementSplit == null);
    List<BundleApplication> primaryRoots = new ArrayList<>();
    List<DelayedBundleApplication> residualRoots = new ArrayList<>();
    // Encode window splits.
    if (windowedSplitResult != null && windowedSplitResult.getPrimaryInFullyProcessedWindowsRoot() != null) {
        ByteString.Output primaryInOtherWindowsBytes = ByteString.newOutput();
        try {
            fullInputCoder.encode(windowedSplitResult.getPrimaryInFullyProcessedWindowsRoot(), primaryInOtherWindowsBytes);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        BundleApplication.Builder primaryApplicationInOtherWindows = BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(primaryInOtherWindowsBytes.toByteString());
        primaryRoots.add(primaryApplicationInOtherWindows.build());
    }
    if (windowedSplitResult != null && windowedSplitResult.getResidualInUnprocessedWindowsRoot() != null) {
        ByteString.Output bytesOut = ByteString.newOutput();
        try {
            fullInputCoder.encode(windowedSplitResult.getResidualInUnprocessedWindowsRoot(), bytesOut);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        BundleApplication.Builder residualInUnprocessedWindowsRoot = BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(bytesOut.toByteString());
        // We don't want to change the output watermarks or set the checkpoint resume time since
        // that applies to the current window.
        Map<String, org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp> outputWatermarkMapForUnprocessedWindows = new HashMap<>();
        if (!initialWatermark.equals(GlobalWindow.TIMESTAMP_MIN_VALUE)) {
            org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp outputWatermark = org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp.newBuilder().setSeconds(initialWatermark.getMillis() / 1000).setNanos((int) (initialWatermark.getMillis() % 1000) * 1000000).build();
            for (String outputId : outputIds) {
                outputWatermarkMapForUnprocessedWindows.put(outputId, outputWatermark);
            }
        }
        residualInUnprocessedWindowsRoot.putAllOutputWatermarks(outputWatermarkMapForUnprocessedWindows);
        residualRoots.add(DelayedBundleApplication.newBuilder().setApplication(residualInUnprocessedWindowsRoot).build());
    }
    ByteString.Output primaryBytes = ByteString.newOutput();
    ByteString.Output residualBytes = ByteString.newOutput();
    // that there is no element split.
    if (windowedSplitResult != null && windowedSplitResult.getResidualSplitRoot() != null) {
        // When there is element split in windowedSplitResult, the resumeDelay should not be null.
        checkNotNull(resumeDelay);
        try {
            fullInputCoder.encode(windowedSplitResult.getPrimarySplitRoot(), primaryBytes);
            fullInputCoder.encode(windowedSplitResult.getResidualSplitRoot(), residualBytes);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        primaryRoots.add(BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(primaryBytes.toByteString()).build());
        BundleApplication.Builder residualApplication = BundleApplication.newBuilder().setTransformId(pTransformId).setInputId(mainInputId).setElement(residualBytes.toByteString());
        Map<String, org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp> outputWatermarkMap = new HashMap<>();
        if (!watermarkAndState.getKey().equals(GlobalWindow.TIMESTAMP_MIN_VALUE)) {
            org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp outputWatermark = org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.Timestamp.newBuilder().setSeconds(watermarkAndState.getKey().getMillis() / 1000).setNanos((int) (watermarkAndState.getKey().getMillis() % 1000) * 1000000).build();
            for (String outputId : outputIds) {
                outputWatermarkMap.put(outputId, outputWatermark);
            }
        }
        residualApplication.putAllOutputWatermarks(outputWatermarkMap);
        residualRoots.add(DelayedBundleApplication.newBuilder().setApplication(residualApplication).setRequestedTimeDelay(Durations.fromMillis(resumeDelay.getMillis())).build());
    } else if (downstreamElementSplit != null) {
        primaryRoots.add(Iterables.getOnlyElement(downstreamElementSplit.getPrimaryRoots()));
        residualRoots.add(Iterables.getOnlyElement(downstreamElementSplit.getResidualRoots()));
    }
    return HandlesSplits.SplitResult.of(primaryRoots, residualRoots);
}
Also used : DelayedBundleApplication(org.apache.beam.model.fnexecution.v1.BeamFnApi.DelayedBundleApplication) BundleApplication(org.apache.beam.model.fnexecution.v1.BeamFnApi.BundleApplication) HashMap(java.util.HashMap) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) DelayedBundleApplication(org.apache.beam.model.fnexecution.v1.BeamFnApi.DelayedBundleApplication) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 25 with List

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry.List in project beam by apache.

the class ProcessBundleHandler method createBundleProcessor.

private BundleProcessor createBundleProcessor(String bundleId, BeamFnApi.ProcessBundleRequest processBundleRequest) throws IOException {
    BeamFnApi.ProcessBundleDescriptor bundleDescriptor = fnApiRegistry.apply(bundleId);
    SetMultimap<String, String> pCollectionIdsToConsumingPTransforms = HashMultimap.create();
    MetricsContainerStepMap metricsContainerRegistry = new MetricsContainerStepMap();
    ExecutionStateTracker stateTracker = new ExecutionStateTracker(ExecutionStateSampler.instance());
    PCollectionConsumerRegistry pCollectionConsumerRegistry = new PCollectionConsumerRegistry(metricsContainerRegistry, stateTracker);
    HashSet<String> processedPTransformIds = new HashSet<>();
    PTransformFunctionRegistry startFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.START_STATE_NAME);
    PTransformFunctionRegistry finishFunctionRegistry = new PTransformFunctionRegistry(metricsContainerRegistry, stateTracker, ExecutionStateTracker.FINISH_STATE_NAME);
    List<ThrowingRunnable> resetFunctions = new ArrayList<>();
    List<ThrowingRunnable> tearDownFunctions = new ArrayList<>();
    List<ProgressRequestCallback> progressRequestCallbacks = new ArrayList<>();
    // Build a multimap of PCollection ids to PTransform ids which consume said PCollections
    for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
        for (String pCollectionId : entry.getValue().getInputsMap().values()) {
            pCollectionIdsToConsumingPTransforms.put(pCollectionId, entry.getKey());
        }
    }
    // Instantiate a State API call handler depending on whether a State ApiServiceDescriptor was
    // specified.
    HandleStateCallsForBundle beamFnStateClient;
    if (bundleDescriptor.hasStateApiServiceDescriptor()) {
        BeamFnStateClient underlyingClient = beamFnStateGrpcClientCache.forApiServiceDescriptor(bundleDescriptor.getStateApiServiceDescriptor());
        beamFnStateClient = new BlockTillStateCallsFinish(underlyingClient);
    } else {
        beamFnStateClient = new FailAllStateCallsForBundle(processBundleRequest);
    }
    BundleSplitListener.InMemory splitListener = BundleSplitListener.InMemory.create();
    Collection<CallbackRegistration> bundleFinalizationCallbackRegistrations = new ArrayList<>();
    BundleFinalizer bundleFinalizer = new BundleFinalizer() {

        @Override
        public void afterBundleCommit(Instant callbackExpiry, Callback callback) {
            bundleFinalizationCallbackRegistrations.add(CallbackRegistration.create(callbackExpiry, callback));
        }
    };
    BundleProcessor bundleProcessor = BundleProcessor.create(processWideCache, bundleDescriptor, startFunctionRegistry, finishFunctionRegistry, resetFunctions, tearDownFunctions, progressRequestCallbacks, splitListener, pCollectionConsumerRegistry, metricsContainerRegistry, stateTracker, beamFnStateClient, bundleFinalizationCallbackRegistrations, runnerCapabilities);
    // Create a BeamFnStateClient
    for (Map.Entry<String, RunnerApi.PTransform> entry : bundleDescriptor.getTransformsMap().entrySet()) {
        // TODO: Remove source as a root and have it be triggered by the Runner.
        if (!DATA_INPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !DATA_OUTPUT_URN.equals(entry.getValue().getSpec().getUrn()) && !JAVA_SOURCE_URN.equals(entry.getValue().getSpec().getUrn()) && !PTransformTranslation.READ_TRANSFORM_URN.equals(entry.getValue().getSpec().getUrn())) {
            continue;
        }
        createRunnerAndConsumersForPTransformRecursively(beamFnStateClient, beamFnDataClient, entry.getKey(), entry.getValue(), bundleProcessor::getInstructionId, bundleProcessor::getCacheTokens, bundleProcessor::getBundleCache, bundleDescriptor, pCollectionIdsToConsumingPTransforms, pCollectionConsumerRegistry, processedPTransformIds, startFunctionRegistry, finishFunctionRegistry, resetFunctions::add, tearDownFunctions::add, (apiServiceDescriptor, dataEndpoint) -> {
            if (!bundleProcessor.getInboundEndpointApiServiceDescriptors().contains(apiServiceDescriptor)) {
                bundleProcessor.getInboundEndpointApiServiceDescriptors().add(apiServiceDescriptor);
            }
            bundleProcessor.getInboundDataEndpoints().add(dataEndpoint);
        }, (timerEndpoint) -> {
            if (!bundleDescriptor.hasTimerApiServiceDescriptor()) {
                throw new IllegalStateException(String.format("Timers are unsupported because the " + "ProcessBundleRequest %s does not provide a timer ApiServiceDescriptor.", bundleId));
            }
            bundleProcessor.getTimerEndpoints().add(timerEndpoint);
        }, progressRequestCallbacks::add, splitListener, bundleFinalizer, bundleProcessor.getChannelRoots(), bundleProcessor.getOutboundAggregators(), bundleProcessor.getRunnerCapabilities());
    }
    bundleProcessor.finish();
    return bundleProcessor;
}
Also used : MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) BeamFnStateClient(org.apache.beam.fn.harness.state.BeamFnStateClient) ProgressRequestCallback(org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback) ArrayList(java.util.ArrayList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) PCollectionConsumerRegistry(org.apache.beam.fn.harness.data.PCollectionConsumerRegistry) ThrowingRunnable(org.apache.beam.sdk.function.ThrowingRunnable) CallbackRegistration(org.apache.beam.fn.harness.control.FinalizeBundleHandler.CallbackRegistration) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) HashSet(java.util.HashSet) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) PTransformFunctionRegistry(org.apache.beam.fn.harness.data.PTransformFunctionRegistry) Instant(org.joda.time.Instant) BundleFinalizer(org.apache.beam.sdk.transforms.DoFn.BundleFinalizer) ProgressRequestCallback(org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) Map(java.util.Map) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) LinkedHashMap(java.util.LinkedHashMap) WeakHashMap(java.util.WeakHashMap)

Aggregations

ArrayList (java.util.ArrayList)27 Test (org.junit.Test)24 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)15 Elements (org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements)13 List (java.util.List)9 ExecutorService (java.util.concurrent.ExecutorService)9 WindowedValue (org.apache.beam.sdk.util.WindowedValue)9 IOException (java.io.IOException)8 Collection (java.util.Collection)8 CountDownLatch (java.util.concurrent.CountDownLatch)8 BeamFnApi (org.apache.beam.model.fnexecution.v1.BeamFnApi)8 Coder (org.apache.beam.sdk.coders.Coder)8 KvCoder (org.apache.beam.sdk.coders.KvCoder)8 Map (java.util.Map)7 ProcessBundleDescriptor (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor)7 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)7 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)7 Iterables (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables)7 Collections (java.util.Collections)6 CompletableFuture (java.util.concurrent.CompletableFuture)6