Search in sources :

Example 1 with RehydratedComponents

use of org.apache.beam.runners.core.construction.RehydratedComponents in project beam by apache.

the class ExpansionService method expand.

@VisibleForTesting
/*package*/
ExpansionApi.ExpansionResponse expand(ExpansionApi.ExpansionRequest request) {
    LOG.info("Expanding '{}' with URN '{}'", request.getTransform().getUniqueName(), request.getTransform().getSpec().getUrn());
    LOG.debug("Full transform: {}", request.getTransform());
    Set<String> existingTransformIds = request.getComponents().getTransformsMap().keySet();
    Pipeline pipeline = createPipeline();
    boolean isUseDeprecatedRead = ExperimentalOptions.hasExperiment(pipelineOptions, "use_deprecated_read") || ExperimentalOptions.hasExperiment(pipelineOptions, "beam_fn_api_use_deprecated_read");
    if (!isUseDeprecatedRead) {
        ExperimentalOptions.addExperiment(pipeline.getOptions().as(ExperimentalOptions.class), "beam_fn_api");
        // TODO(BEAM-10670): Remove this when we address performance issue.
        ExperimentalOptions.addExperiment(pipeline.getOptions().as(ExperimentalOptions.class), "use_sdf_read");
    } else {
        LOG.warn("Using use_depreacted_read in portable runners is runner-dependent. The " + "ExpansionService will respect that, but if your runner does not have support for " + "native Read transform, your Pipeline will fail during Pipeline submission.");
    }
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(request.getComponents()).withPipeline(pipeline);
    Map<String, PCollection<?>> inputs = request.getTransform().getInputsMap().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, input -> {
        try {
            return rehydratedComponents.getPCollection(input.getValue());
        } catch (IOException exn) {
            throw new RuntimeException(exn);
        }
    }));
    String urn = request.getTransform().getSpec().getUrn();
    TransformProvider transformProvider = null;
    if (getUrn(ExpansionMethods.Enum.JAVA_CLASS_LOOKUP).equals(urn)) {
        AllowList allowList = pipelineOptions.as(ExpansionServiceOptions.class).getJavaClassLookupAllowlist();
        assert allowList != null;
        transformProvider = new JavaClassLookupTransformProvider(allowList);
    } else {
        transformProvider = getRegisteredTransforms().get(urn);
        if (transformProvider == null) {
            throw new UnsupportedOperationException("Unknown urn: " + request.getTransform().getSpec().getUrn());
        }
    }
    List<String> classpathResources = transformProvider.getDependencies(request.getTransform().getSpec(), pipeline.getOptions());
    pipeline.getOptions().as(PortablePipelineOptions.class).setFilesToStage(classpathResources);
    Map<String, PCollection<?>> outputs = transformProvider.apply(pipeline, request.getTransform().getUniqueName(), request.getTransform().getSpec(), inputs);
    // Needed to find which transform was new...
    SdkComponents sdkComponents = rehydratedComponents.getSdkComponents(Collections.emptyList()).withNewIdPrefix(request.getNamespace());
    sdkComponents.registerEnvironment(Environments.createOrGetDefaultEnvironment(pipeline.getOptions().as(PortablePipelineOptions.class)));
    Map<String, String> outputMap = outputs.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, output -> {
        try {
            return sdkComponents.registerPCollection(output.getValue());
        } catch (IOException exn) {
            throw new RuntimeException(exn);
        }
    }));
    if (isUseDeprecatedRead) {
        SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
    }
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents);
    String expandedTransformId = Iterables.getOnlyElement(pipelineProto.getRootTransformIdsList().stream().filter(id -> !existingTransformIds.contains(id)).collect(Collectors.toList()));
    RunnerApi.Components components = pipelineProto.getComponents();
    RunnerApi.PTransform expandedTransform = components.getTransformsOrThrow(expandedTransformId).toBuilder().setUniqueName(expandedTransformId).clearOutputs().putAllOutputs(outputMap).build();
    LOG.debug("Expanded to {}", expandedTransform);
    return ExpansionApi.ExpansionResponse.newBuilder().setComponents(components.toBuilder().removeTransforms(expandedTransformId)).setTransform(expandedTransform).addAllRequirements(pipelineProto.getRequirementsList()).build();
}
Also used : Arrays(java.util.Arrays) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) ServerBuilder(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ServerBuilder) PipelineResult(org.apache.beam.sdk.PipelineResult) SchemaApi(org.apache.beam.model.pipeline.v1.SchemaApi) LoggerFactory(org.slf4j.LoggerFactory) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) Throwables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Throwables) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) PCollectionList(org.apache.beam.sdk.values.PCollectionList) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Method(java.lang.reflect.Method) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) Set(java.util.Set) ServiceLoader(java.util.ServiceLoader) Converter(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Converter) Collectors(java.util.stream.Collectors) InvocationTargetException(java.lang.reflect.InvocationTargetException) ExpansionMethods(org.apache.beam.model.pipeline.v1.ExternalTransforms.ExpansionMethods) POutput(org.apache.beam.sdk.values.POutput) List(java.util.List) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) ExpansionServiceGrpc(org.apache.beam.model.expansion.v1.ExpansionServiceGrpc) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) Optional(java.util.Optional) AllowList(org.apache.beam.sdk.expansion.service.JavaClassLookupTransformProvider.AllowList) ExternalTransformBuilder(org.apache.beam.sdk.transforms.ExternalTransformBuilder) SchemaTranslation(org.apache.beam.sdk.schemas.SchemaTranslation) NoSuchSchemaException(org.apache.beam.sdk.schemas.NoSuchSchemaException) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) CaseFormat(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.CaseFormat) Coder(org.apache.beam.sdk.coders.Coder) RowCoder(org.apache.beam.sdk.coders.RowCoder) PipelineTranslation(org.apache.beam.runners.core.construction.PipelineTranslation) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Constructor(java.lang.reflect.Constructor) Environments(org.apache.beam.runners.core.construction.Environments) Server(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server) Preconditions.checkArgumentNotNull(org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull) PTransform(org.apache.beam.sdk.transforms.PTransform) ExpansionApi(org.apache.beam.model.expansion.v1.ExpansionApi) PipelineRunner(org.apache.beam.sdk.PipelineRunner) SchemaRegistry(org.apache.beam.sdk.schemas.SchemaRegistry) TupleTag(org.apache.beam.sdk.values.TupleTag) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline) PInput(org.apache.beam.sdk.values.PInput) Row(org.apache.beam.sdk.values.Row) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Nullable(org.checkerframework.checker.nullness.qual.Nullable) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Field(org.apache.beam.sdk.schemas.Schema.Field) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) ExternalConfigurationPayload(org.apache.beam.model.pipeline.v1.ExternalTransforms.ExternalConfigurationPayload) PDone(org.apache.beam.sdk.values.PDone) Logger(org.slf4j.Logger) PipelineResources.detectClassPathResourcesToStage(org.apache.beam.runners.core.construction.resources.PipelineResources.detectClassPathResourcesToStage) IOException(java.io.IOException) SplittableParDo(org.apache.beam.runners.core.construction.SplittableParDo) ExternalTransformRegistrar(org.apache.beam.sdk.expansion.ExternalTransformRegistrar) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) AutoService(com.google.auto.service.AutoService) Preconditions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions) ArtifactRetrievalService(org.apache.beam.runners.fnexecution.artifact.ArtifactRetrievalService) Collections(java.util.Collections) BeamUrns.getUrn(org.apache.beam.runners.core.construction.BeamUrns.getUrn) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) IOException(java.io.IOException) Pipeline(org.apache.beam.sdk.Pipeline) PCollection(org.apache.beam.sdk.values.PCollection) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) AllowList(org.apache.beam.sdk.expansion.service.JavaClassLookupTransformProvider.AllowList) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 2 with RehydratedComponents

use of org.apache.beam.runners.core.construction.RehydratedComponents in project beam by apache.

the class WindowUtils method getWindowStrategy.

/**
 * Get {@link WindowingStrategy} of given collection id from {@link RunnerApi.Components}.
 */
public static WindowingStrategy<?, BoundedWindow> getWindowStrategy(String collectionId, RunnerApi.Components components) {
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components);
    RunnerApi.WindowingStrategy windowingStrategyProto = components.getWindowingStrategiesOrThrow(components.getPcollectionsOrThrow(collectionId).getWindowingStrategyId());
    WindowingStrategy<?, ?> windowingStrategy;
    try {
        windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
    } catch (Exception e) {
        throw new IllegalStateException(String.format("Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), e);
    }
    @SuppressWarnings("unchecked") WindowingStrategy<?, BoundedWindow> ret = (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
    return ret;
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) IOException(java.io.IOException) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy)

Example 3 with RehydratedComponents

use of org.apache.beam.runners.core.construction.RehydratedComponents in project beam by apache.

the class FlinkStreamingPortablePipelineTranslator method getSdkCoder.

/**
 * Get SDK coder for given PCollection. The SDK coder is the coder that the SDK-harness would have
 * used to encode data before passing it to the runner over {@link SdkHarnessClient}.
 *
 * @param pCollectionId ID of PCollection in components
 * @param components the Pipeline components (proto)
 * @return SDK-side coder for the PCollection
 */
private static <T> WindowedValue.FullWindowedValueCoder<T> getSdkCoder(String pCollectionId, RunnerApi.Components components) {
    PipelineNode.PCollectionNode pCollectionNode = PipelineNode.pCollection(pCollectionId, components.getPcollectionsOrThrow(pCollectionId));
    RunnerApi.Components.Builder componentsBuilder = components.toBuilder();
    String coderId = WireCoders.addSdkWireCoder(pCollectionNode, componentsBuilder, RunnerApi.ExecutableStagePayload.WireCoderSetting.getDefaultInstance());
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(componentsBuilder.build());
    try {
        @SuppressWarnings("unchecked") WindowedValue.FullWindowedValueCoder<T> res = (WindowedValue.FullWindowedValueCoder<T>) rehydratedComponents.getCoder(coderId);
        return res;
    } catch (IOException ex) {
        throw new IllegalStateException("Could not get SDK coder.", ex);
    }
}
Also used : IOException(java.io.IOException) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) WindowedValue(org.apache.beam.sdk.util.WindowedValue) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents)

Example 4 with RehydratedComponents

use of org.apache.beam.runners.core.construction.RehydratedComponents in project beam by apache.

the class CreateExecutableStageNodeFunction method getEnvironmentFromPTransform.

private Environment getEnvironmentFromPTransform(RunnerApi.Components components, Set<PTransformNode> sdkTransforms) {
    RehydratedComponents sdkComponents = RehydratedComponents.forComponents(components);
    Environment env = null;
    for (PTransformNode pTransformNode : sdkTransforms) {
        env = Environments.getEnvironment(pTransformNode.getTransform(), sdkComponents).orElse(null);
        if (env != null) {
            break;
        }
    }
    return env;
}
Also used : PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents)

Example 5 with RehydratedComponents

use of org.apache.beam.runners.core.construction.RehydratedComponents in project beam by apache.

the class InsertFetchAndFilterStreamingSideInputNodes method forNetwork.

public MutableNetwork<Node, Edge> forNetwork(MutableNetwork<Node, Edge> network) {
    if (pipeline == null) {
        return network;
    }
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(pipeline.getComponents());
    for (ParallelInstructionNode node : ImmutableList.copyOf(Iterables.filter(network.nodes(), ParallelInstructionNode.class))) {
        // to worry about it.
        if (node.getParallelInstruction().getParDo() == null || !ExecutionLocation.SDK_HARNESS.equals(node.getExecutionLocation())) {
            continue;
        }
        ParDoInstruction parDoInstruction = node.getParallelInstruction().getParDo();
        CloudObject userFnSpec = CloudObject.fromSpec(parDoInstruction.getUserFn());
        String parDoPTransformId = getString(userFnSpec, PropertyNames.SERIALIZED_FN);
        // Skip ParDoInstruction nodes that contain payloads without side inputs.
        String userFnClassName = userFnSpec.getClassName();
        if ("CombineValuesFn".equals(userFnClassName) || "KeyedCombineFn".equals(userFnClassName)) {
            // These nodes have CombinePayloads which have no side inputs.
            continue;
        }
        RunnerApi.PTransform parDoPTransform = pipeline.getComponents().getTransformsOrDefault(parDoPTransformId, null);
        // TODO: only the non-null branch should exist; for migration ease only
        if (parDoPTransform == null) {
            continue;
        }
        RunnerApi.ParDoPayload parDoPayload;
        try {
            parDoPayload = RunnerApi.ParDoPayload.parseFrom(parDoPTransform.getSpec().getPayload());
        } catch (InvalidProtocolBufferException exc) {
            throw new RuntimeException("ParDo did not have a ParDoPayload", exc);
        }
        // Skip any ParDo that doesn't have a side input.
        if (parDoPayload.getSideInputsMap().isEmpty()) {
            continue;
        }
        String mainInputPCollectionLocalName = Iterables.getOnlyElement(Sets.difference(parDoPTransform.getInputsMap().keySet(), parDoPayload.getSideInputsMap().keySet()));
        RunnerApi.WindowingStrategy windowingStrategyProto = pipeline.getComponents().getWindowingStrategiesOrThrow(pipeline.getComponents().getPcollectionsOrThrow(parDoPTransform.getInputsOrThrow(mainInputPCollectionLocalName)).getWindowingStrategyId());
        WindowingStrategy windowingStrategy;
        try {
            windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
        } catch (InvalidProtocolBufferException e) {
            throw new IllegalStateException(String.format("Unable to decode windowing strategy %s.", windowingStrategyProto), e);
        }
        // Gather all the side input window mapping fns which we need to request the SDK to map
        ImmutableMap.Builder<PCollectionView<?>, RunnerApi.FunctionSpec> pCollectionViewsToWindowMapingsFns = ImmutableMap.builder();
        parDoPayload.getSideInputsMap().forEach((sideInputTag, sideInput) -> pCollectionViewsToWindowMapingsFns.put(RegisterNodeFunction.transformSideInputForRunner(pipeline, parDoPTransform, sideInputTag, sideInput), sideInput.getWindowMappingFn()));
        Node streamingSideInputWindowHandlerNode = FetchAndFilterStreamingSideInputsNode.create(windowingStrategy, pCollectionViewsToWindowMapingsFns.build(), NameContext.create(null, node.getParallelInstruction().getOriginalName(), node.getParallelInstruction().getSystemName(), node.getParallelInstruction().getName()));
        // Rewire the graph such that streaming side inputs ParDos are preceded by a
        // node which filters any side inputs that aren't ready and fetches any ready side inputs.
        Edge mainInput = Iterables.getOnlyElement(network.inEdges(node));
        InstructionOutputNode predecessor = (InstructionOutputNode) network.incidentNodes(mainInput).source();
        InstructionOutputNode predecessorCopy = InstructionOutputNode.create(predecessor.getInstructionOutput(), predecessor.getPcollectionId());
        network.removeEdge(mainInput);
        network.addNode(streamingSideInputWindowHandlerNode);
        network.addNode(predecessorCopy);
        network.addEdge(predecessor, streamingSideInputWindowHandlerNode, mainInput.clone());
        network.addEdge(streamingSideInputWindowHandlerNode, predecessorCopy, mainInput.clone());
        network.addEdge(predecessorCopy, node, mainInput.clone());
    }
    return network;
}
Also used : FetchAndFilterStreamingSideInputsNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.FetchAndFilterStreamingSideInputsNode) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) Node(org.apache.beam.runners.dataflow.worker.graph.Nodes.Node) ParallelInstructionNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.ParallelInstructionNode) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) ParDoInstruction(com.google.api.services.dataflow.model.ParDoInstruction) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) InstructionOutputNode(org.apache.beam.runners.dataflow.worker.graph.Nodes.InstructionOutputNode) PCollectionView(org.apache.beam.sdk.values.PCollectionView) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) Edge(org.apache.beam.runners.dataflow.worker.graph.Edges.Edge)

Aggregations

RehydratedComponents (org.apache.beam.runners.core.construction.RehydratedComponents)8 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)6 IOException (java.io.IOException)4 InvalidProtocolBufferException (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException)4 WindowedValue (org.apache.beam.sdk.util.WindowedValue)3 List (java.util.List)2 WindowedValueCoder (org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder)2 KV (org.apache.beam.sdk.values.KV)2 PCollectionView (org.apache.beam.sdk.values.PCollectionView)2 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)1 AutoService (com.google.auto.service.AutoService)1 Constructor (java.lang.reflect.Constructor)1 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 Method (java.lang.reflect.Method)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 Optional (java.util.Optional)1 ServiceLoader (java.util.ServiceLoader)1