Search in sources :

Example 21 with ProcessBundleDescriptor

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor in project beam by apache.

the class ProcessBundleHandlerTest method setupProcessBundleHandlerForSimpleRecordingDoFn.

private ProcessBundleHandler setupProcessBundleHandlerForSimpleRecordingDoFn(List<String> dataOutput, List<Timers> timerOutput, boolean enableOutputEmbedding) throws Exception {
    DoFnWithExecutionInformation doFnWithExecutionInformation = DoFnWithExecutionInformation.of(new SimpleDoFn(), SimpleDoFn.MAIN_OUTPUT_TAG, Collections.emptyMap(), DoFnSchemaInformation.create());
    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN).setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(doFnWithExecutionInformation))).build();
    RunnerApi.ParDoPayload parDoPayload = ParDoPayload.newBuilder().setDoFn(functionSpec).putTimerFamilySpecs("tfs-" + SimpleDoFn.TIMER_FAMILY_ID, TimerFamilySpec.newBuilder().setTimeDomain(RunnerApi.TimeDomain.Enum.EVENT_TIME).setTimerFamilyCoderId("timer-coder").build()).build();
    BeamFnApi.ProcessBundleDescriptor processBundleDescriptor = ProcessBundleDescriptor.newBuilder().putTransforms("2L", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(DATA_INPUT_URN).build()).putOutputs("2L-output", "2L-output-pc").build()).putTransforms("3L", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(parDoPayload.toByteString())).putInputs("3L-input", "2L-output-pc").build()).putPcollections("2L-output-pc", PCollection.newBuilder().setWindowingStrategyId("window-strategy").setCoderId("2L-output-coder").setIsBounded(IsBounded.Enum.BOUNDED).build()).putWindowingStrategies("window-strategy", WindowingStrategy.newBuilder().setWindowCoderId("window-strategy-coder").setWindowFn(FunctionSpec.newBuilder().setUrn("beam:window_fn:global_windows:v1")).setOutputTime(OutputTime.Enum.END_OF_WINDOW).setAccumulationMode(AccumulationMode.Enum.ACCUMULATING).setTrigger(Trigger.newBuilder().setAlways(Always.getDefaultInstance())).setClosingBehavior(ClosingBehavior.Enum.EMIT_ALWAYS).setOnTimeBehavior(OnTimeBehavior.Enum.FIRE_ALWAYS).build()).setTimerApiServiceDescriptor(ApiServiceDescriptor.newBuilder().setUrl("url").build()).putCoders("string_coder", CoderTranslation.toProto(StringUtf8Coder.of()).getCoder()).putCoders("2L-output-coder", Coder.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(ModelCoders.KV_CODER_URN).build()).addComponentCoderIds("string_coder").addComponentCoderIds("string_coder").build()).putCoders("window-strategy-coder", Coder.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(ModelCoders.GLOBAL_WINDOW_CODER_URN).build()).build()).putCoders("timer-coder", Coder.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(ModelCoders.TIMER_CODER_URN)).addComponentCoderIds("string_coder").addComponentCoderIds("window-strategy-coder").build()).build();
    Map<String, BeamFnApi.ProcessBundleDescriptor> fnApiRegistry = ImmutableMap.of("1L", processBundleDescriptor);
    Map<String, PTransformRunnerFactory> urnToPTransformRunnerFactoryMap = Maps.newHashMap(REGISTERED_RUNNER_FACTORIES);
    urnToPTransformRunnerFactoryMap.put(DATA_INPUT_URN, (PTransformRunnerFactory<Object>) (context) -> {
        context.addIncomingDataEndpoint(ApiServiceDescriptor.getDefaultInstance(), KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), (input) -> {
            dataOutput.add(input.getValue());
        });
        return null;
    });
    Mockito.doAnswer((invocation) -> new BeamFnDataOutboundAggregator(PipelineOptionsFactory.create(), invocation.getArgument(1), new StreamObserver<Elements>() {

        @Override
        public void onNext(Elements elements) {
            for (Timers timer : elements.getTimersList()) {
                timerOutput.addAll(elements.getTimersList());
            }
        }

        @Override
        public void onError(Throwable throwable) {
        }

        @Override
        public void onCompleted() {
        }
    }, invocation.getArgument(2))).when(beamFnDataClient).createOutboundAggregator(any(), any(), anyBoolean());
    return new ProcessBundleHandler(PipelineOptionsFactory.create(), enableOutputEmbedding ? Collections.singleton(BeamUrns.getUrn(StandardRunnerProtocols.Enum.CONTROL_RESPONSE_ELEMENTS_EMBEDDING)) : Collections.emptySet(), fnApiRegistry::get, beamFnDataClient, null, /* beamFnStateClient */
    null, /* finalizeBundleHandler */
    new ShortIdMap(), urnToPTransformRunnerFactoryMap, Caches.noop(), new BundleProcessorCache());
}
Also used : BeamFnDataOutboundAggregator(org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) TimerSpecs(org.apache.beam.sdk.state.TimerSpecs) Assert.assertNotSame(org.junit.Assert.assertNotSame) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) MockitoAnnotations(org.mockito.MockitoAnnotations) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) MetricsContainerStepMap(org.apache.beam.runners.core.metrics.MetricsContainerStepMap) Arrays.asList(java.util.Arrays.asList) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Map(java.util.Map) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) Uninterruptibles(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.Uninterruptibles) ApiServiceDescriptor(org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) KvCoder(org.apache.beam.sdk.coders.KvCoder) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) TimerEndpoint(org.apache.beam.sdk.fn.data.TimerEndpoint) Set(java.util.Set) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) Data(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Data) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) StandardRunnerProtocols(org.apache.beam.model.pipeline.v1.RunnerApi.StandardRunnerProtocols) Matchers.contains(org.hamcrest.Matchers.contains) PTransformRunnerFactory(org.apache.beam.fn.harness.PTransformRunnerFactory) Matchers.is(org.hamcrest.Matchers.is) Mockito.eq(org.mockito.Mockito.eq) Mockito.mock(org.mockito.Mockito.mock) ClosingBehavior(org.apache.beam.model.pipeline.v1.RunnerApi.ClosingBehavior) KV(org.apache.beam.sdk.values.KV) TimerMap(org.apache.beam.sdk.state.TimerMap) ExecutionStateTracker(org.apache.beam.runners.core.metrics.ExecutionStateTracker) Mock(org.mockito.Mock) BundleFinalizer(org.apache.beam.sdk.transforms.DoFn.BundleFinalizer) RunWith(org.junit.runner.RunWith) TimerFamilySpec(org.apache.beam.model.pipeline.v1.RunnerApi.TimerFamilySpec) ArgumentMatchers.anyBoolean(org.mockito.ArgumentMatchers.anyBoolean) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Assert.assertSame(org.junit.Assert.assertSame) Timers(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Timers) PCollectionConsumerRegistry(org.apache.beam.fn.harness.data.PCollectionConsumerRegistry) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) TimerSpec(org.apache.beam.sdk.state.TimerSpec) TupleTag(org.apache.beam.sdk.values.TupleTag) Cache(org.apache.beam.fn.harness.Cache) BeamFnDataClient(org.apache.beam.fn.harness.data.BeamFnDataClient) Maps(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Before(org.junit.Before) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) PTransformFunctionRegistry(org.apache.beam.fn.harness.data.PTransformFunctionRegistry) CloseableFnDataReceiver(org.apache.beam.sdk.fn.data.CloseableFnDataReceiver) ProcessBundleRequest(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleRequest) TimerFamilyDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.TimerFamilyDeclaration) Assert.assertTrue(org.junit.Assert.assertTrue) Mockito.times(org.mockito.Mockito.times) IOException(java.io.IOException) Test(org.junit.Test) OnTimeBehavior(org.apache.beam.model.pipeline.v1.RunnerApi.OnTimeBehavior) ProgressRequestCallback(org.apache.beam.fn.harness.PTransformRunnerFactory.ProgressRequestCallback) BeamUrns(org.apache.beam.runners.core.construction.BeamUrns) DataEndpoint(org.apache.beam.sdk.fn.data.DataEndpoint) Assert.assertNull(org.junit.Assert.assertNull) AccumulationMode(org.apache.beam.model.pipeline.v1.RunnerApi.AccumulationMode) OutputTime(org.apache.beam.model.pipeline.v1.RunnerApi.OutputTime) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) Timer(org.apache.beam.runners.core.construction.Timer) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TimeDomain(org.apache.beam.sdk.state.TimeDomain) Assert.assertEquals(org.junit.Assert.assertEquals) StateResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateResponse) Coder(org.apache.beam.model.pipeline.v1.RunnerApi.Coder) BeamFnStateClient(org.apache.beam.fn.harness.state.BeamFnStateClient) CoderTranslation(org.apache.beam.runners.core.construction.CoderTranslation) Mockito.argThat(org.mockito.Mockito.argThat) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) BundleProcessorCache(org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessorCache) IsBounded(org.apache.beam.model.pipeline.v1.RunnerApi.IsBounded) Mockito.verifyNoMoreInteractions(org.mockito.Mockito.verifyNoMoreInteractions) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) BundleProcessor(org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessor) REGISTERED_RUNNER_FACTORIES(org.apache.beam.fn.harness.control.ProcessBundleHandler.REGISTERED_RUNNER_FACTORIES) PaneInfo(org.apache.beam.sdk.transforms.windowing.PaneInfo) Collection(java.util.Collection) DoFnWithExecutionInformation(org.apache.beam.sdk.util.DoFnWithExecutionInformation) ModelCoders(org.apache.beam.runners.core.construction.ModelCoders) List(java.util.List) InstructionRequest(org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionRequest) StateRequest(org.apache.beam.model.fnexecution.v1.BeamFnApi.StateRequest) SerializableUtils(org.apache.beam.sdk.util.SerializableUtils) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Trigger(org.apache.beam.model.pipeline.v1.RunnerApi.Trigger) Matchers.equalTo(org.hamcrest.Matchers.equalTo) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) WindowingStrategy(org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy) ThrowingRunnable(org.apache.beam.sdk.function.ThrowingRunnable) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Always(org.apache.beam.model.pipeline.v1.RunnerApi.Trigger.Always) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Assert.assertThrows(org.junit.Assert.assertThrows) IsEmptyCollection.empty(org.hamcrest.collection.IsEmptyCollection.empty) CompletableFuture(java.util.concurrent.CompletableFuture) BeamFnStateGrpcClientCache(org.apache.beam.fn.harness.state.BeamFnStateGrpcClientCache) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) BeamFnDataReadRunner(org.apache.beam.fn.harness.BeamFnDataReadRunner) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) HashSet(java.util.HashSet) CacheToken(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleRequest.CacheToken) ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) InstructionResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionResponse) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ParDoTranslation(org.apache.beam.runners.core.construction.ParDoTranslation) Assert.assertNotNull(org.junit.Assert.assertNotNull) Mockito.when(org.mockito.Mockito.when) JUnit4(org.junit.runners.JUnit4) Mockito.verify(org.mockito.Mockito.verify) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Matchers.emptyIterable(org.hamcrest.Matchers.emptyIterable) Instant(org.joda.time.Instant) Caches(org.apache.beam.fn.harness.Caches) CallbackRegistration(org.apache.beam.fn.harness.control.FinalizeBundleHandler.CallbackRegistration) Collections(java.util.Collections) BeamFnDataOutboundAggregator(org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) BundleProcessorCache(org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessorCache) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) DoFnWithExecutionInformation(org.apache.beam.sdk.util.DoFnWithExecutionInformation) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Elements(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PTransformRunnerFactory(org.apache.beam.fn.harness.PTransformRunnerFactory) Timers(org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements.Timers)

Example 22 with ProcessBundleDescriptor

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor in project beam by apache.

the class ProcessBundleHandlerTest method testBundleFinalizationIsPropagated.

@Test
public void testBundleFinalizationIsPropagated() throws Exception {
    BeamFnApi.ProcessBundleDescriptor processBundleDescriptor = BeamFnApi.ProcessBundleDescriptor.newBuilder().putTransforms("2L", RunnerApi.PTransform.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(DATA_INPUT_URN).build()).build()).build();
    Map<String, BeamFnApi.ProcessBundleDescriptor> fnApiRegistry = ImmutableMap.of("1L", processBundleDescriptor);
    FinalizeBundleHandler mockFinalizeBundleHandler = mock(FinalizeBundleHandler.class);
    BundleFinalizer.Callback mockCallback = mock(BundleFinalizer.Callback.class);
    ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), Collections.emptySet(), fnApiRegistry::get, beamFnDataClient, null, /* beamFnStateGrpcClientCache */
    mockFinalizeBundleHandler, new ShortIdMap(), ImmutableMap.of(DATA_INPUT_URN, (PTransformRunnerFactory<Object>) (context) -> {
        BundleFinalizer bundleFinalizer = context.getBundleFinalizer();
        context.addStartBundleFunction(() -> bundleFinalizer.afterBundleCommit(Instant.ofEpochMilli(42L), mockCallback));
        return null;
    }), Caches.noop(), new BundleProcessorCache());
    BeamFnApi.InstructionResponse.Builder response = handler.processBundle(BeamFnApi.InstructionRequest.newBuilder().setInstructionId("2L").setProcessBundle(BeamFnApi.ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L")).build());
    assertTrue(response.getProcessBundle().getRequiresFinalization());
    verify(mockFinalizeBundleHandler).registerCallbacks(eq("2L"), argThat((Collection<CallbackRegistration> arg) -> {
        CallbackRegistration registration = Iterables.getOnlyElement(arg);
        assertEquals(Instant.ofEpochMilli(42L), registration.getExpiryTime());
        assertSame(mockCallback, registration.getCallback());
        return true;
    }));
}
Also used : ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) BundleProcessorCache(org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessorCache) InstructionResponse(org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionResponse) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) BundleFinalizer(org.apache.beam.sdk.transforms.DoFn.BundleFinalizer) CallbackRegistration(org.apache.beam.fn.harness.control.FinalizeBundleHandler.CallbackRegistration) PTransformRunnerFactory(org.apache.beam.fn.harness.PTransformRunnerFactory) Test(org.junit.Test)

Example 23 with ProcessBundleDescriptor

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor in project beam by apache.

the class FnHarness method main.

/**
 * Run a FnHarness with the given id and options that attaches to the specified logging and
 * control API service descriptors using the given channel factory and outbound observer factory.
 *
 * @param id Harness ID
 * @param options The options for this pipeline
 * @param runnerCapabilites
 * @param loggingApiServiceDescriptor
 * @param controlApiServiceDescriptor
 * @param statusApiServiceDescriptor
 * @param channelFactory
 * @param outboundObserverFactory
 * @param processWideCache
 * @throws Exception
 */
public static void main(String id, PipelineOptions options, Set<String> runnerCapabilites, Endpoints.ApiServiceDescriptor loggingApiServiceDescriptor, Endpoints.ApiServiceDescriptor controlApiServiceDescriptor, Endpoints.ApiServiceDescriptor statusApiServiceDescriptor, ManagedChannelFactory channelFactory, OutboundObserverFactory outboundObserverFactory, Cache<Object, Object> processWideCache) throws Exception {
    channelFactory = channelFactory.withInterceptors(ImmutableList.of(AddHarnessIdInterceptor.create(id)));
    IdGenerator idGenerator = IdGenerators.decrementingLongs();
    ShortIdMap metricsShortIds = new ShortIdMap();
    ExecutorService executorService = options.as(GcsOptions.class).getExecutorService();
    // intercepts logging and sends it to the logging service.
    try (BeamFnLoggingClient logging = new BeamFnLoggingClient(options, loggingApiServiceDescriptor, channelFactory::forDescriptor)) {
        LOG.info("Fn Harness started");
        // Register standard file systems.
        FileSystems.setDefaultPipelineOptions(options);
        EnumMap<BeamFnApi.InstructionRequest.RequestCase, ThrowingFunction<InstructionRequest, BeamFnApi.InstructionResponse.Builder>> handlers = new EnumMap<>(BeamFnApi.InstructionRequest.RequestCase.class);
        ManagedChannel channel = channelFactory.forDescriptor(controlApiServiceDescriptor);
        BeamFnControlGrpc.BeamFnControlStub controlStub = BeamFnControlGrpc.newStub(channel);
        BeamFnControlGrpc.BeamFnControlBlockingStub blockingControlStub = BeamFnControlGrpc.newBlockingStub(channel);
        BeamFnDataGrpcClient beamFnDataMultiplexer = new BeamFnDataGrpcClient(options, channelFactory::forDescriptor, outboundObserverFactory);
        BeamFnStateGrpcClientCache beamFnStateGrpcClientCache = new BeamFnStateGrpcClientCache(idGenerator, channelFactory, outboundObserverFactory);
        FinalizeBundleHandler finalizeBundleHandler = new FinalizeBundleHandler(options.as(GcsOptions.class).getExecutorService());
        Function<String, BeamFnApi.ProcessBundleDescriptor> getProcessBundleDescriptor = new Function<String, ProcessBundleDescriptor>() {

            private static final String PROCESS_BUNDLE_DESCRIPTORS = "ProcessBundleDescriptors";

            private final Cache<String, BeamFnApi.ProcessBundleDescriptor> cache = Caches.subCache(processWideCache, PROCESS_BUNDLE_DESCRIPTORS);

            @Override
            public BeamFnApi.ProcessBundleDescriptor apply(String id) {
                return cache.computeIfAbsent(id, this::loadDescriptor);
            }

            private BeamFnApi.ProcessBundleDescriptor loadDescriptor(String id) {
                return blockingControlStub.getProcessBundleDescriptor(BeamFnApi.GetProcessBundleDescriptorRequest.newBuilder().setProcessBundleDescriptorId(id).build());
            }
        };
        MetricsEnvironment.setProcessWideContainer(MetricsContainerImpl.createProcessWideContainer());
        ProcessBundleHandler processBundleHandler = new ProcessBundleHandler(options, runnerCapabilites, getProcessBundleDescriptor, beamFnDataMultiplexer, beamFnStateGrpcClientCache, finalizeBundleHandler, metricsShortIds, processWideCache);
        BeamFnStatusClient beamFnStatusClient = null;
        if (statusApiServiceDescriptor != null) {
            beamFnStatusClient = new BeamFnStatusClient(statusApiServiceDescriptor, channelFactory::forDescriptor, processBundleHandler.getBundleProcessorCache(), options, processWideCache);
        }
        // TODO(BEAM-9729): Remove once runners no longer send this instruction.
        handlers.put(BeamFnApi.InstructionRequest.RequestCase.REGISTER, request -> BeamFnApi.InstructionResponse.newBuilder().setRegister(BeamFnApi.RegisterResponse.getDefaultInstance()));
        handlers.put(BeamFnApi.InstructionRequest.RequestCase.FINALIZE_BUNDLE, finalizeBundleHandler::finalizeBundle);
        handlers.put(BeamFnApi.InstructionRequest.RequestCase.PROCESS_BUNDLE, processBundleHandler::processBundle);
        handlers.put(BeamFnApi.InstructionRequest.RequestCase.PROCESS_BUNDLE_PROGRESS, processBundleHandler::progress);
        handlers.put(BeamFnApi.InstructionRequest.RequestCase.PROCESS_BUNDLE_SPLIT, processBundleHandler::trySplit);
        handlers.put(InstructionRequest.RequestCase.MONITORING_INFOS, request -> BeamFnApi.InstructionResponse.newBuilder().setMonitoringInfos(BeamFnApi.MonitoringInfosMetadataResponse.newBuilder().putAllMonitoringInfo(StreamSupport.stream(request.getMonitoringInfos().getMonitoringInfoIdList().spliterator(), false).collect(Collectors.toMap(Function.identity(), metricsShortIds::get)))));
        HarnessMonitoringInfosInstructionHandler processWideHandler = new HarnessMonitoringInfosInstructionHandler(metricsShortIds);
        handlers.put(InstructionRequest.RequestCase.HARNESS_MONITORING_INFOS, processWideHandler::harnessMonitoringInfos);
        JvmInitializers.runBeforeProcessing(options);
        String samplingPeriodMills = ExperimentalOptions.getExperimentValue(options, ExperimentalOptions.STATE_SAMPLING_PERIOD_MILLIS);
        if (samplingPeriodMills != null) {
            ExecutionStateSampler.setSamplingPeriod(Integer.parseInt(samplingPeriodMills));
        }
        ExecutionStateSampler.instance().start();
        LOG.info("Entering instruction processing loop");
        // The control client immediately dispatches requests to an executor so we execute on the
        // direct executor. If we created separate channels for different stubs we could use
        // directExecutor() when building the channel.
        BeamFnControlClient control = new BeamFnControlClient(controlStub.withExecutor(MoreExecutors.directExecutor()), outboundObserverFactory, executorService, handlers);
        control.waitForTermination();
        if (beamFnStatusClient != null) {
            beamFnStatusClient.close();
        }
        processBundleHandler.shutdown();
    } finally {
        System.out.println("Shutting SDK harness down.");
        ExecutionStateSampler.instance().stop();
        executorService.shutdown();
    }
}
Also used : BeamFnControlClient(org.apache.beam.fn.harness.control.BeamFnControlClient) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) ProcessBundleHandler(org.apache.beam.fn.harness.control.ProcessBundleHandler) BeamFnStateGrpcClientCache(org.apache.beam.fn.harness.state.BeamFnStateGrpcClientCache) ThrowingFunction(org.apache.beam.sdk.function.ThrowingFunction) Function(java.util.function.Function) BeamFnStatusClient(org.apache.beam.fn.harness.status.BeamFnStatusClient) InstructionRequest(org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionRequest) ManagedChannel(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ManagedChannel) GcsOptions(org.apache.beam.sdk.extensions.gcp.options.GcsOptions) EnumMap(java.util.EnumMap) BeamFnControlGrpc(org.apache.beam.model.fnexecution.v1.BeamFnControlGrpc) HarnessMonitoringInfosInstructionHandler(org.apache.beam.fn.harness.control.HarnessMonitoringInfosInstructionHandler) ThrowingFunction(org.apache.beam.sdk.function.ThrowingFunction) BeamFnDataGrpcClient(org.apache.beam.fn.harness.data.BeamFnDataGrpcClient) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) IdGenerator(org.apache.beam.sdk.fn.IdGenerator) FinalizeBundleHandler(org.apache.beam.fn.harness.control.FinalizeBundleHandler) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) ExecutorService(java.util.concurrent.ExecutorService) BeamFnLoggingClient(org.apache.beam.fn.harness.logging.BeamFnLoggingClient) BeamFnStateGrpcClientCache(org.apache.beam.fn.harness.state.BeamFnStateGrpcClientCache)

Example 24 with ProcessBundleDescriptor

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor in project beam by apache.

the class RegisterNodeFunction method apply.

@Override
public Node apply(MutableNetwork<Node, Edge> input) {
    for (Node node : input.nodes()) {
        if (node instanceof RemoteGrpcPortNode || node instanceof ParallelInstructionNode || node instanceof InstructionOutputNode) {
            continue;
        }
        throw new IllegalArgumentException(String.format("Network contains unknown type of node: %s", input));
    }
    // Fix all non output nodes to have named edges.
    for (Node node : input.nodes()) {
        if (node instanceof InstructionOutputNode) {
            continue;
        }
        for (Node successor : input.successors(node)) {
            for (Edge edge : input.edgesConnecting(node, successor)) {
                if (edge instanceof DefaultEdge) {
                    input.removeEdge(edge);
                    input.addEdge(node, successor, MultiOutputInfoEdge.create(new MultiOutputInfo().setTag(idGenerator.getId())));
                }
            }
        }
    }
    // We start off by replacing all edges within the graph with edges that have the named
    // outputs from the predecessor step. For ParallelInstruction Source nodes and RemoteGrpcPort
    // nodes this is a generated port id. All ParDoInstructions will have already
    ProcessBundleDescriptor.Builder processBundleDescriptor = ProcessBundleDescriptor.newBuilder().setId(idGenerator.getId()).setStateApiServiceDescriptor(stateApiServiceDescriptor);
    // For intermediate PCollections we fabricate, we make a bogus WindowingStrategy
    // TODO: create a correct windowing strategy, including coders and environment
    SdkComponents sdkComponents = SdkComponents.create(pipeline.getComponents(), null);
    // Default to use the Java environment if pipeline doesn't have environment specified.
    if (pipeline.getComponents().getEnvironmentsMap().isEmpty()) {
        sdkComponents.registerEnvironment(Environments.JAVA_SDK_HARNESS_ENVIRONMENT);
    }
    String fakeWindowingStrategyId = "fakeWindowingStrategy" + idGenerator.getId();
    try {
        RunnerApi.MessageWithComponents fakeWindowingStrategyProto = WindowingStrategyTranslation.toMessageProto(WindowingStrategy.globalDefault(), sdkComponents);
        processBundleDescriptor.putWindowingStrategies(fakeWindowingStrategyId, fakeWindowingStrategyProto.getWindowingStrategy()).putAllCoders(fakeWindowingStrategyProto.getComponents().getCodersMap()).putAllEnvironments(fakeWindowingStrategyProto.getComponents().getEnvironmentsMap());
    } catch (IOException exc) {
        throw new RuntimeException("Could not convert default windowing stratey to proto", exc);
    }
    Map<Node, String> nodesToPCollections = new HashMap<>();
    ImmutableMap.Builder<String, NameContext> ptransformIdToNameContexts = ImmutableMap.builder();
    ImmutableMap.Builder<String, Iterable<SideInputInfo>> ptransformIdToSideInputInfos = ImmutableMap.builder();
    ImmutableMap.Builder<String, Iterable<PCollectionView<?>>> ptransformIdToPCollectionViews = ImmutableMap.builder();
    ImmutableMap.Builder<String, NameContext> pcollectionIdToNameContexts = ImmutableMap.builder();
    ImmutableMap.Builder<InstructionOutputNode, String> instructionOutputNodeToCoderIdBuilder = ImmutableMap.builder();
    // 2. Generate new PCollectionId and register it with ProcessBundleDescriptor.
    for (InstructionOutputNode node : Iterables.filter(input.nodes(), InstructionOutputNode.class)) {
        InstructionOutput instructionOutput = node.getInstructionOutput();
        String coderId = "generatedCoder" + idGenerator.getId();
        instructionOutputNodeToCoderIdBuilder.put(node, coderId);
        try (ByteString.Output output = ByteString.newOutput()) {
            try {
                Coder<?> javaCoder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(instructionOutput.getCodec()));
                sdkComponents.registerCoder(javaCoder);
                RunnerApi.Coder coderProto = CoderTranslation.toProto(javaCoder, sdkComponents);
                processBundleDescriptor.putCoders(coderId, coderProto);
            } catch (IOException e) {
                throw new IllegalArgumentException(String.format("Unable to encode coder %s for output %s", instructionOutput.getCodec(), instructionOutput), e);
            } catch (Exception e) {
                // Coder probably wasn't a java coder
                OBJECT_MAPPER.writeValue(output, instructionOutput.getCodec());
                processBundleDescriptor.putCoders(coderId, RunnerApi.Coder.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setPayload(output.toByteString())).build());
            }
        } catch (IOException e) {
            throw new IllegalArgumentException(String.format("Unable to encode coder %s for output %s", instructionOutput.getCodec(), instructionOutput), e);
        }
        // Generate new PCollection ID and map it to relevant node.
        // Will later be used to fill PTransform inputs/outputs information.
        String pcollectionId = "generatedPcollection" + idGenerator.getId();
        processBundleDescriptor.putPcollections(pcollectionId, RunnerApi.PCollection.newBuilder().setCoderId(coderId).setWindowingStrategyId(fakeWindowingStrategyId).build());
        nodesToPCollections.put(node, pcollectionId);
        pcollectionIdToNameContexts.put(pcollectionId, NameContext.create(null, instructionOutput.getOriginalName(), instructionOutput.getSystemName(), instructionOutput.getName()));
    }
    processBundleDescriptor.putAllCoders(sdkComponents.toComponents().getCodersMap());
    Map<InstructionOutputNode, String> instructionOutputNodeToCoderIdMap = instructionOutputNodeToCoderIdBuilder.build();
    for (ParallelInstructionNode node : Iterables.filter(input.nodes(), ParallelInstructionNode.class)) {
        ParallelInstruction parallelInstruction = node.getParallelInstruction();
        String ptransformId = "generatedPtransform" + idGenerator.getId();
        ptransformIdToNameContexts.put(ptransformId, NameContext.create(null, parallelInstruction.getOriginalName(), parallelInstruction.getSystemName(), parallelInstruction.getName()));
        RunnerApi.PTransform.Builder pTransform = RunnerApi.PTransform.newBuilder();
        RunnerApi.FunctionSpec.Builder transformSpec = RunnerApi.FunctionSpec.newBuilder();
        if (parallelInstruction.getParDo() != null) {
            ParDoInstruction parDoInstruction = parallelInstruction.getParDo();
            CloudObject userFnSpec = CloudObject.fromSpec(parDoInstruction.getUserFn());
            String userFnClassName = userFnSpec.getClassName();
            if ("CombineValuesFn".equals(userFnClassName) || "KeyedCombineFn".equals(userFnClassName)) {
                transformSpec = transformCombineValuesFnToFunctionSpec(userFnSpec);
                ptransformIdToPCollectionViews.put(ptransformId, Collections.emptyList());
            } else {
                String parDoPTransformId = getString(userFnSpec, PropertyNames.SERIALIZED_FN);
                RunnerApi.PTransform parDoPTransform = pipeline.getComponents().getTransformsOrDefault(parDoPTransformId, null);
                // TODO: only the non-null branch should exist; for migration ease only
                if (parDoPTransform != null) {
                    checkArgument(parDoPTransform.getSpec().getUrn().equals(PTransformTranslation.PAR_DO_TRANSFORM_URN), "Found transform \"%s\" for ParallelDo instruction, " + " but that transform had unexpected URN \"%s\" (expected \"%s\")", parDoPTransformId, parDoPTransform.getSpec().getUrn(), PTransformTranslation.PAR_DO_TRANSFORM_URN);
                    RunnerApi.ParDoPayload parDoPayload;
                    try {
                        parDoPayload = RunnerApi.ParDoPayload.parseFrom(parDoPTransform.getSpec().getPayload());
                    } catch (InvalidProtocolBufferException exc) {
                        throw new RuntimeException("ParDo did not have a ParDoPayload", exc);
                    }
                    ImmutableList.Builder<PCollectionView<?>> pcollectionViews = ImmutableList.builder();
                    for (Map.Entry<String, SideInput> sideInputEntry : parDoPayload.getSideInputsMap().entrySet()) {
                        pcollectionViews.add(transformSideInputForRunner(pipeline, parDoPTransform, sideInputEntry.getKey(), sideInputEntry.getValue()));
                        transformSideInputForSdk(pipeline, parDoPTransform, sideInputEntry.getKey(), processBundleDescriptor, pTransform);
                    }
                    ptransformIdToPCollectionViews.put(ptransformId, pcollectionViews.build());
                    transformSpec.setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(parDoPayload.toByteString());
                } else {
                    // legacy path - bytes are the FunctionSpec's payload field, basically, and
                    // SDKs expect it in the PTransform's payload field
                    byte[] userFnBytes = getBytes(userFnSpec, PropertyNames.SERIALIZED_FN);
                    transformSpec.setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN).setPayload(ByteString.copyFrom(userFnBytes));
                }
                // Add side input information for batch pipelines
                if (parDoInstruction.getSideInputs() != null) {
                    ptransformIdToSideInputInfos.put(ptransformId, forSideInputInfos(parDoInstruction.getSideInputs(), true));
                }
            }
        } else if (parallelInstruction.getRead() != null) {
            ReadInstruction readInstruction = parallelInstruction.getRead();
            CloudObject sourceSpec = CloudObject.fromSpec(CloudSourceUtils.flattenBaseSpecs(readInstruction.getSource()).getSpec());
            // TODO: Need to plumb through the SDK specific function spec.
            transformSpec.setUrn(JAVA_SOURCE_URN);
            try {
                byte[] serializedSource = Base64.getDecoder().decode(getString(sourceSpec, SERIALIZED_SOURCE));
                ByteString sourceByteString = ByteString.copyFrom(serializedSource);
                transformSpec.setPayload(sourceByteString);
            } catch (Exception e) {
                throw new IllegalArgumentException(String.format("Unable to process Read %s", parallelInstruction), e);
            }
        } else if (parallelInstruction.getFlatten() != null) {
            transformSpec.setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN);
        } else {
            throw new IllegalArgumentException(String.format("Unknown type of ParallelInstruction %s", parallelInstruction));
        }
        for (Node predecessorOutput : input.predecessors(node)) {
            pTransform.putInputs("generatedInput" + idGenerator.getId(), nodesToPCollections.get(predecessorOutput));
        }
        for (Edge edge : input.outEdges(node)) {
            Node nodeOutput = input.incidentNodes(edge).target();
            MultiOutputInfoEdge edge2 = (MultiOutputInfoEdge) edge;
            pTransform.putOutputs(edge2.getMultiOutputInfo().getTag(), nodesToPCollections.get(nodeOutput));
        }
        pTransform.setSpec(transformSpec);
        processBundleDescriptor.putTransforms(ptransformId, pTransform.build());
    }
    // Add the PTransforms representing the remote gRPC nodes
    for (RemoteGrpcPortNode node : Iterables.filter(input.nodes(), RemoteGrpcPortNode.class)) {
        RunnerApi.PTransform.Builder pTransform = RunnerApi.PTransform.newBuilder();
        Set<Node> predecessors = input.predecessors(node);
        Set<Node> successors = input.successors(node);
        if (predecessors.isEmpty() && !successors.isEmpty()) {
            Node instructionOutputNode = Iterables.getOnlyElement(successors);
            pTransform.putOutputs("generatedOutput" + idGenerator.getId(), nodesToPCollections.get(instructionOutputNode));
            pTransform.setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(DATA_INPUT_URN).setPayload(node.getRemoteGrpcPort().toBuilder().setCoderId(instructionOutputNodeToCoderIdMap.get(instructionOutputNode)).build().toByteString()).build());
        } else if (!predecessors.isEmpty() && successors.isEmpty()) {
            Node instructionOutputNode = Iterables.getOnlyElement(predecessors);
            pTransform.putInputs("generatedInput" + idGenerator.getId(), nodesToPCollections.get(instructionOutputNode));
            pTransform.setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(DATA_OUTPUT_URN).setPayload(node.getRemoteGrpcPort().toBuilder().setCoderId(instructionOutputNodeToCoderIdMap.get(instructionOutputNode)).build().toByteString()).build());
        } else {
            throw new IllegalStateException("Expected either one input OR one output " + "InstructionOutputNode for this RemoteGrpcPortNode");
        }
        processBundleDescriptor.putTransforms(node.getPrimitiveTransformId(), pTransform.build());
    }
    return RegisterRequestNode.create(RegisterRequest.newBuilder().addProcessBundleDescriptor(processBundleDescriptor).build(), ptransformIdToNameContexts.build(), ptransformIdToSideInputInfos.build(), ptransformIdToPCollectionViews.build(), pcollectionIdToNameContexts.build());
}
Also used : HashMap(java.util.HashMap) MultiOutputInfo(com.google.api.services.dataflow.model.MultiOutputInfo) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) RegisterRequestNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RegisterRequestNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) InstructionOutput(com.google.api.services.dataflow.model.InstructionOutput) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) SideInput(org.apache.beam.model.pipeline.v1.RunnerApi.SideInput) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) DataflowPortabilityPCollectionView(org.apache.beam.runners.dataflow.worker.DataflowPortabilityPCollectionView) PCollectionView(org.apache.beam.sdk.values.PCollectionView) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) HashMap(java.util.HashMap) RemoteGrpcPortNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.RemoteGrpcPortNode) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) ReadInstruction(com.google.api.services.dataflow.model.ReadInstruction) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) NameContext(org.apache.beam.runners.dataflow.worker.counters.NameContext) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) IOException(java.io.IOException) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) IOException(java.io.IOException) ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge) MultiOutputInfoEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.MultiOutputInfoEdge) DefaultEdge(org.apache.beam.runners.dataflow.worker.graph.Edges.DefaultEdge)

Example 25 with ProcessBundleDescriptor

use of org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor in project beam by apache.

the class FnApiControlClientPoolService method getProcessBundleDescriptor.

@Override
public void getProcessBundleDescriptor(BeamFnApi.GetProcessBundleDescriptorRequest request, StreamObserver<BeamFnApi.ProcessBundleDescriptor> responseObserver) {
    String bundleDescriptorId = request.getProcessBundleDescriptorId();
    LOG.info("getProcessBundleDescriptor request with id {}", bundleDescriptorId);
    BeamFnApi.ProcessBundleDescriptor descriptor = processBundleDescriptors.get(bundleDescriptorId);
    if (descriptor == null) {
        String msg = String.format("ProcessBundleDescriptor with id %s not found", bundleDescriptorId);
        responseObserver.onError(new StatusException(Status.NOT_FOUND.withDescription(msg)));
        LOG.error(msg);
    } else {
        responseObserver.onNext(descriptor);
        responseObserver.onCompleted();
    }
}
Also used : StatusException(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.StatusException) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi)

Aggregations

ProcessBundleDescriptor (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor)21 Test (org.junit.Test)19 BeamFnApi (org.apache.beam.model.fnexecution.v1.BeamFnApi)18 ShortIdMap (org.apache.beam.runners.core.metrics.ShortIdMap)14 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)14 PTransformRunnerFactory (org.apache.beam.fn.harness.PTransformRunnerFactory)13 BundleProcessorCache (org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessorCache)13 Map (java.util.Map)8 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)8 IOException (java.io.IOException)7 BeamFnStateClient (org.apache.beam.fn.harness.state.BeamFnStateClient)6 Elements (org.apache.beam.model.fnexecution.v1.BeamFnApi.Elements)6 InstructionRequest (org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionRequest)6 InstructionResponse (org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionResponse)6 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)6 Collections (java.util.Collections)5 List (java.util.List)5 Iterables (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables)5 ArrayList (java.util.ArrayList)4 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)4