Search in sources :

Example 16 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class ExpansionService method expand.

@VisibleForTesting
/*package*/
ExpansionApi.ExpansionResponse expand(ExpansionApi.ExpansionRequest request) {
    LOG.info("Expanding '{}' with URN '{}'", request.getTransform().getUniqueName(), request.getTransform().getSpec().getUrn());
    LOG.debug("Full transform: {}", request.getTransform());
    Set<String> existingTransformIds = request.getComponents().getTransformsMap().keySet();
    Pipeline pipeline = createPipeline();
    boolean isUseDeprecatedRead = ExperimentalOptions.hasExperiment(pipelineOptions, "use_deprecated_read") || ExperimentalOptions.hasExperiment(pipelineOptions, "beam_fn_api_use_deprecated_read");
    if (!isUseDeprecatedRead) {
        ExperimentalOptions.addExperiment(pipeline.getOptions().as(ExperimentalOptions.class), "beam_fn_api");
        // TODO(BEAM-10670): Remove this when we address performance issue.
        ExperimentalOptions.addExperiment(pipeline.getOptions().as(ExperimentalOptions.class), "use_sdf_read");
    } else {
        LOG.warn("Using use_depreacted_read in portable runners is runner-dependent. The " + "ExpansionService will respect that, but if your runner does not have support for " + "native Read transform, your Pipeline will fail during Pipeline submission.");
    }
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(request.getComponents()).withPipeline(pipeline);
    Map<String, PCollection<?>> inputs = request.getTransform().getInputsMap().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, input -> {
        try {
            return rehydratedComponents.getPCollection(input.getValue());
        } catch (IOException exn) {
            throw new RuntimeException(exn);
        }
    }));
    String urn = request.getTransform().getSpec().getUrn();
    TransformProvider transformProvider = null;
    if (getUrn(ExpansionMethods.Enum.JAVA_CLASS_LOOKUP).equals(urn)) {
        AllowList allowList = pipelineOptions.as(ExpansionServiceOptions.class).getJavaClassLookupAllowlist();
        assert allowList != null;
        transformProvider = new JavaClassLookupTransformProvider(allowList);
    } else {
        transformProvider = getRegisteredTransforms().get(urn);
        if (transformProvider == null) {
            throw new UnsupportedOperationException("Unknown urn: " + request.getTransform().getSpec().getUrn());
        }
    }
    List<String> classpathResources = transformProvider.getDependencies(request.getTransform().getSpec(), pipeline.getOptions());
    pipeline.getOptions().as(PortablePipelineOptions.class).setFilesToStage(classpathResources);
    Map<String, PCollection<?>> outputs = transformProvider.apply(pipeline, request.getTransform().getUniqueName(), request.getTransform().getSpec(), inputs);
    // Needed to find which transform was new...
    SdkComponents sdkComponents = rehydratedComponents.getSdkComponents(Collections.emptyList()).withNewIdPrefix(request.getNamespace());
    sdkComponents.registerEnvironment(Environments.createOrGetDefaultEnvironment(pipeline.getOptions().as(PortablePipelineOptions.class)));
    Map<String, String> outputMap = outputs.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, output -> {
        try {
            return sdkComponents.registerPCollection(output.getValue());
        } catch (IOException exn) {
            throw new RuntimeException(exn);
        }
    }));
    if (isUseDeprecatedRead) {
        SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
    }
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents);
    String expandedTransformId = Iterables.getOnlyElement(pipelineProto.getRootTransformIdsList().stream().filter(id -> !existingTransformIds.contains(id)).collect(Collectors.toList()));
    RunnerApi.Components components = pipelineProto.getComponents();
    RunnerApi.PTransform expandedTransform = components.getTransformsOrThrow(expandedTransformId).toBuilder().setUniqueName(expandedTransformId).clearOutputs().putAllOutputs(outputMap).build();
    LOG.debug("Expanded to {}", expandedTransform);
    return ExpansionApi.ExpansionResponse.newBuilder().setComponents(components.toBuilder().removeTransforms(expandedTransformId)).setTransform(expandedTransform).addAllRequirements(pipelineProto.getRequirementsList()).build();
}
Also used : Arrays(java.util.Arrays) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) ServerBuilder(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ServerBuilder) PipelineResult(org.apache.beam.sdk.PipelineResult) SchemaApi(org.apache.beam.model.pipeline.v1.SchemaApi) LoggerFactory(org.slf4j.LoggerFactory) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) Throwables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Throwables) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) PCollectionList(org.apache.beam.sdk.values.PCollectionList) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Method(java.lang.reflect.Method) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) Set(java.util.Set) ServiceLoader(java.util.ServiceLoader) Converter(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Converter) Collectors(java.util.stream.Collectors) InvocationTargetException(java.lang.reflect.InvocationTargetException) ExpansionMethods(org.apache.beam.model.pipeline.v1.ExternalTransforms.ExpansionMethods) POutput(org.apache.beam.sdk.values.POutput) List(java.util.List) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) ExpansionServiceGrpc(org.apache.beam.model.expansion.v1.ExpansionServiceGrpc) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) Optional(java.util.Optional) AllowList(org.apache.beam.sdk.expansion.service.JavaClassLookupTransformProvider.AllowList) ExternalTransformBuilder(org.apache.beam.sdk.transforms.ExternalTransformBuilder) SchemaTranslation(org.apache.beam.sdk.schemas.SchemaTranslation) NoSuchSchemaException(org.apache.beam.sdk.schemas.NoSuchSchemaException) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) CaseFormat(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.CaseFormat) Coder(org.apache.beam.sdk.coders.Coder) RowCoder(org.apache.beam.sdk.coders.RowCoder) PipelineTranslation(org.apache.beam.runners.core.construction.PipelineTranslation) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Constructor(java.lang.reflect.Constructor) Environments(org.apache.beam.runners.core.construction.Environments) Server(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server) Preconditions.checkArgumentNotNull(org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull) PTransform(org.apache.beam.sdk.transforms.PTransform) ExpansionApi(org.apache.beam.model.expansion.v1.ExpansionApi) PipelineRunner(org.apache.beam.sdk.PipelineRunner) SchemaRegistry(org.apache.beam.sdk.schemas.SchemaRegistry) TupleTag(org.apache.beam.sdk.values.TupleTag) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline) PInput(org.apache.beam.sdk.values.PInput) Row(org.apache.beam.sdk.values.Row) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Nullable(org.checkerframework.checker.nullness.qual.Nullable) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Field(org.apache.beam.sdk.schemas.Schema.Field) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) ExternalConfigurationPayload(org.apache.beam.model.pipeline.v1.ExternalTransforms.ExternalConfigurationPayload) PDone(org.apache.beam.sdk.values.PDone) Logger(org.slf4j.Logger) PipelineResources.detectClassPathResourcesToStage(org.apache.beam.runners.core.construction.resources.PipelineResources.detectClassPathResourcesToStage) IOException(java.io.IOException) SplittableParDo(org.apache.beam.runners.core.construction.SplittableParDo) ExternalTransformRegistrar(org.apache.beam.sdk.expansion.ExternalTransformRegistrar) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) AutoService(com.google.auto.service.AutoService) Preconditions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions) ArtifactRetrievalService(org.apache.beam.runners.fnexecution.artifact.ArtifactRetrievalService) Collections(java.util.Collections) BeamUrns.getUrn(org.apache.beam.runners.core.construction.BeamUrns.getUrn) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) IOException(java.io.IOException) Pipeline(org.apache.beam.sdk.Pipeline) PCollection(org.apache.beam.sdk.values.PCollection) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) AllowList(org.apache.beam.sdk.expansion.service.JavaClassLookupTransformProvider.AllowList) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 17 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class ExpansionServiceTest method testConstruct.

@Test
public void testConstruct() {
    Pipeline p = Pipeline.create();
    p.apply(Impulse.create());
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    String inputPcollId = Iterables.getOnlyElement(Iterables.getOnlyElement(pipelineProto.getComponents().getTransformsMap().values()).getOutputsMap().values());
    ExpansionApi.ExpansionRequest request = ExpansionApi.ExpansionRequest.newBuilder().setComponents(pipelineProto.getComponents()).setTransform(RunnerApi.PTransform.newBuilder().setUniqueName(TEST_NAME).setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(TEST_URN)).putInputs("input", inputPcollId)).setNamespace(TEST_NAMESPACE).build();
    ExpansionApi.ExpansionResponse response = expansionService.expand(request);
    RunnerApi.PTransform expandedTransform = response.getTransform();
    assertEquals(TEST_NAMESPACE + TEST_NAME, expandedTransform.getUniqueName());
    // Verify it has the right input.
    assertThat(expandedTransform.getInputsMap().values(), contains(inputPcollId));
    // Verify it has the right output.
    assertThat(expandedTransform.getOutputsMap().keySet(), contains("output"));
    // Loose check that it's composite, and its children are represented.
    assertThat(expandedTransform.getSubtransformsCount(), greaterThan(0));
    for (String subtransform : expandedTransform.getSubtransformsList()) {
        assertTrue(response.getComponents().containsTransforms(subtransform));
    }
    // Check that any newly generated components are properly namespaced.
    Set<String> originalIds = allIds(request.getComponents());
    for (String id : allIds(response.getComponents())) {
        assertTrue(id, id.startsWith(TEST_NAMESPACE) || originalIds.contains(id));
    }
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ExpansionApi(org.apache.beam.model.expansion.v1.ExpansionApi) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 18 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class ExpansionServiceTest method testConstructGenerateSequenceWithRegistration.

@Test
public void testConstructGenerateSequenceWithRegistration() {
    ExternalTransforms.ExternalConfigurationPayload payload = encodeRowIntoExternalConfigurationPayload(Row.withSchema(Schema.of(Field.of("start", FieldType.INT64), Field.nullable("stop", FieldType.INT64))).withFieldValue("start", 0L).withFieldValue("stop", 1L).build());
    Pipeline p = Pipeline.create();
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    ExpansionApi.ExpansionRequest request = ExpansionApi.ExpansionRequest.newBuilder().setComponents(pipelineProto.getComponents()).setTransform(RunnerApi.PTransform.newBuilder().setUniqueName(TEST_NAME).setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(GenerateSequence.External.URN).setPayload(payload.toByteString()))).setNamespace(TEST_NAMESPACE).build();
    ExpansionApi.ExpansionResponse response = expansionService.expand(request);
    RunnerApi.PTransform expandedTransform = response.getTransform();
    assertEquals(TEST_NAMESPACE + TEST_NAME, expandedTransform.getUniqueName());
    assertThat(expandedTransform.getInputsCount(), Matchers.is(0));
    assertThat(expandedTransform.getOutputsCount(), Matchers.is(1));
    assertThat(expandedTransform.getSubtransformsCount(), greaterThan(0));
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ExternalTransforms(org.apache.beam.model.pipeline.v1.ExternalTransforms) ExpansionApi(org.apache.beam.model.expansion.v1.ExpansionApi) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 19 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class DataflowRunnerHarness method main.

/**
 * Fetches and processes work units from the Dataflow service.
 */
public static void main(String[] unusedArgs) throws Exception {
    RunnerApi.@Nullable Pipeline pipeline = DataflowWorkerHarnessHelper.getPipelineFromEnv();
    // This descriptor is used for all services except logging. They are isolated to keep
    // critical traffic protected from best effort traffic.
    ApiServiceDescriptor controlApiService = DataflowWorkerHarnessHelper.getControlDescriptor();
    ApiServiceDescriptor loggingApiService = DataflowWorkerHarnessHelper.getLoggingDescriptor();
    ApiServiceDescriptor statusApiService = DataflowWorkerHarnessHelper.getStatusDescriptor();
    LOG.info("{} started, using port {} for control, {} for logging.", DataflowRunnerHarness.class, controlApiService, loggingApiService);
    DataflowWorkerHarnessHelper.initializeLogging(DataflowRunnerHarness.class);
    DataflowWorkerHarnessOptions pipelineOptions = DataflowWorkerHarnessHelper.initializeGlobalStateAndPipelineOptions(DataflowRunnerHarness.class);
    DataflowWorkerHarnessHelper.configureLogging(pipelineOptions);
    // Initialized registered file systems.˜
    FileSystems.setDefaultPipelineOptions(pipelineOptions);
    DataflowPipelineDebugOptions dataflowOptions = pipelineOptions.as(DataflowPipelineDebugOptions.class);
    ServerFactory serverFactory;
    if (DataflowRunner.hasExperiment(dataflowOptions, "beam_fn_api_epoll_domain_socket")) {
        serverFactory = ServerFactory.createEpollDomainSocket();
    } else if (DataflowRunner.hasExperiment(dataflowOptions, "beam_fn_api_epoll")) {
        serverFactory = ServerFactory.createEpollSocket();
    } else {
        serverFactory = ServerFactory.createDefault();
    }
    ServerStreamObserverFactory streamObserverFactory = ServerStreamObserverFactory.fromOptions(pipelineOptions);
    Server servicesServer = null;
    Server loggingServer = null;
    Server statusServer = null;
    try (BeamFnLoggingService beamFnLoggingService = new BeamFnLoggingService(loggingApiService, DataflowWorkerLoggingInitializer.getSdkLoggingHandler()::publish, streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
        BeamFnControlService beamFnControlService = new BeamFnControlService(controlApiService, streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
        BeamFnDataGrpcService beamFnDataService = new BeamFnDataGrpcService(pipelineOptions, controlApiService, streamObserverFactory::from, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
        BeamWorkerStatusGrpcService beamWorkerStatusGrpcService = statusApiService == null ? null : BeamWorkerStatusGrpcService.create(statusApiService, GrpcContextHeaderAccessorProvider.getHeaderAccessor());
        GrpcStateService beamFnStateService = GrpcStateService.create()) {
        servicesServer = serverFactory.create(ImmutableList.of(beamFnControlService, beamFnDataService, beamFnStateService), controlApiService);
        loggingServer = serverFactory.create(ImmutableList.of(beamFnLoggingService), loggingApiService);
        // gRPC server for obtaining SDK harness runtime status information.
        if (beamWorkerStatusGrpcService != null) {
            statusServer = serverFactory.create(ImmutableList.of(beamWorkerStatusGrpcService), statusApiService);
        }
        start(pipeline, pipelineOptions, beamFnControlService, beamFnDataService, controlApiService, beamFnStateService, beamWorkerStatusGrpcService);
        if (statusServer != null) {
            statusServer.shutdown();
        }
        servicesServer.shutdown();
        loggingServer.shutdown();
        // wait 30 secs for outstanding requests to finish.
        if (statusServer != null) {
            statusServer.awaitTermination(30, TimeUnit.SECONDS);
        }
        servicesServer.awaitTermination(30, TimeUnit.SECONDS);
        loggingServer.awaitTermination(30, TimeUnit.SECONDS);
    } finally {
        if (statusServer != null && !statusServer.isTerminated()) {
            statusServer.shutdownNow();
        }
        if (servicesServer != null && !servicesServer.isTerminated()) {
            servicesServer.shutdownNow();
        }
        if (loggingServer != null && !loggingServer.isTerminated()) {
            loggingServer.shutdownNow();
        }
    }
}
Also used : GrpcStateService(org.apache.beam.runners.fnexecution.state.GrpcStateService) ApiServiceDescriptor(org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor) Server(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server) DataflowWorkerHarnessOptions(org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions) ServerFactory(org.apache.beam.sdk.fn.server.ServerFactory) BeamFnControlService(org.apache.beam.runners.dataflow.worker.fn.BeamFnControlService) ServerStreamObserverFactory(org.apache.beam.runners.dataflow.worker.fn.stream.ServerStreamObserverFactory) BeamFnDataGrpcService(org.apache.beam.runners.dataflow.worker.fn.data.BeamFnDataGrpcService) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) BeamFnLoggingService(org.apache.beam.runners.dataflow.worker.fn.logging.BeamFnLoggingService) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) BeamWorkerStatusGrpcService(org.apache.beam.runners.fnexecution.status.BeamWorkerStatusGrpcService) Nullable(org.checkerframework.checker.nullness.qual.Nullable)

Example 20 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class DataflowWorkerHarnessHelper method getPipelineFromEnv.

// TODO: make env logic private to main() so it is never done outside of initializing the process
public static RunnerApi.@Nullable Pipeline getPipelineFromEnv() throws IOException {
    String pipelinePath = System.getenv(PIPELINE_PATH);
    if (pipelinePath == null) {
        LOG.warn("Missing pipeline environment variable '{}'", PIPELINE_PATH);
        return null;
    }
    File pipelineFile = new File(System.getenv(PIPELINE_PATH));
    if (!pipelineFile.exists()) {
        LOG.warn("Pipeline path '{}' does not exist", pipelineFile);
        return null;
    }
    try (FileInputStream inputStream = new FileInputStream(pipelineFile)) {
        RunnerApi.Pipeline pipelineProto = RunnerApi.Pipeline.parseFrom(inputStream);
        LOG.info("Found portable pipeline:\n{}", TextFormat.printToString(pipelineProto));
        return pipelineProto;
    }
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) File(java.io.File) FileInputStream(java.io.FileInputStream)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)117 Test (org.junit.Test)87 Pipeline (org.apache.beam.sdk.Pipeline)82 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)44 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)43 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)38 Map (java.util.Map)32 KV (org.apache.beam.sdk.values.KV)26 Job (com.google.api.services.dataflow.model.Job)25 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)24 KvCoder (org.apache.beam.sdk.coders.KvCoder)24 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)23 Coder (org.apache.beam.sdk.coders.Coder)23 ArrayList (java.util.ArrayList)22 WindowedValue (org.apache.beam.sdk.util.WindowedValue)22 HashMap (java.util.HashMap)20 List (java.util.List)20 ExecutableStage (org.apache.beam.runners.core.construction.graph.ExecutableStage)19 IOException (java.io.IOException)18 PCollection (org.apache.beam.sdk.values.PCollection)18