Search in sources :

Example 6 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DoFnFunction method prepareSerialization.

/**
 * prepares the DoFnFunction class so it can be serialized properly. This involves using various
 * protobuf's and byte arrays which are later converted back into the proper classes during
 * deserialization.
 */
private void prepareSerialization() {
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createOrGetDefaultEnvironment(pipelineOptions.as(PortablePipelineOptions.class)));
    this.serializedOptions = new SerializablePipelineOptions(pipelineOptions).toString();
    doFnwithEx = ParDoTranslation.translateDoFn(this.doFn, mainOutput, sideInputMapping, doFnSchemaInformation, components);
    doFnwithExBytes = doFnwithEx.getPayload().toByteArray();
    outputCodersBytes = new HashMap<>();
    try {
        coderBytes = SerializableUtils.serializeToByteArray(inputCoder);
        windowStrategyProto = WindowingStrategyTranslation.toMessageProto(windowingStrategy, components);
        windowBytes = windowStrategyProto.toByteArray();
        for (Map.Entry<TupleTag<?>, Coder<?>> entry : outputCoders.entrySet()) {
            outputCodersBytes.put(entry.getKey().getId(), SerializableUtils.serializeToByteArray(entry.getValue()));
        }
        sideInputBytes = new HashMap<>();
        for (Map.Entry<TupleTag<?>, WindowingStrategy<?, ?>> entry : sideInputs.entrySet()) {
            windowStrategyProto = WindowingStrategyTranslation.toMessageProto(entry.getValue(), components);
            sideInputBytes.put(entry.getKey().getId(), windowStrategyProto.toByteArray());
        }
        serializedSideOutputs = new ArrayList<>();
        for (TupleTag<?> sideOutput : sideOutputs) {
            serializedSideOutputs.add(sideOutput.getId());
        }
        serializedOutputMap = new HashMap<>();
        for (Map.Entry<TupleTag<?>, Integer> entry : outputMap.entrySet()) {
            serializedOutputMap.put(entry.getKey().getId(), entry.getValue());
        }
    } catch (IOException e) {
        LOG.info(e.getMessage());
    }
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) TupleTag(org.apache.beam.sdk.values.TupleTag) IOException(java.io.IOException) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) HashMap(java.util.HashMap) Map(java.util.Map)

Example 7 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class GroupByWindowFunction method initTransient.

/**
 * Method used to initialize the transient variables that were sent over as byte arrays or proto
 * buffers.
 */
private void initTransient() {
    if (isInitialized) {
        return;
    }
    SdkComponents components = SdkComponents.create();
    try {
        windowStrategyProto = RunnerApi.MessageWithComponents.parseFrom(windowBytes);
        windowingStrategy = (WindowingStrategy<?, W>) WindowingStrategyTranslation.fromProto(windowStrategyProto.getWindowingStrategy(), RehydratedComponents.forComponents(components.toComponents()));
    } catch (InvalidProtocolBufferException e) {
        LOG.info(e.getMessage());
    }
    this.isInitialized = true;
}
Also used : InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents)

Example 8 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class ExpansionService method expand.

@VisibleForTesting
/*package*/
ExpansionApi.ExpansionResponse expand(ExpansionApi.ExpansionRequest request) {
    LOG.info("Expanding '{}' with URN '{}'", request.getTransform().getUniqueName(), request.getTransform().getSpec().getUrn());
    LOG.debug("Full transform: {}", request.getTransform());
    Set<String> existingTransformIds = request.getComponents().getTransformsMap().keySet();
    Pipeline pipeline = createPipeline();
    boolean isUseDeprecatedRead = ExperimentalOptions.hasExperiment(pipelineOptions, "use_deprecated_read") || ExperimentalOptions.hasExperiment(pipelineOptions, "beam_fn_api_use_deprecated_read");
    if (!isUseDeprecatedRead) {
        ExperimentalOptions.addExperiment(pipeline.getOptions().as(ExperimentalOptions.class), "beam_fn_api");
        // TODO(BEAM-10670): Remove this when we address performance issue.
        ExperimentalOptions.addExperiment(pipeline.getOptions().as(ExperimentalOptions.class), "use_sdf_read");
    } else {
        LOG.warn("Using use_depreacted_read in portable runners is runner-dependent. The " + "ExpansionService will respect that, but if your runner does not have support for " + "native Read transform, your Pipeline will fail during Pipeline submission.");
    }
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(request.getComponents()).withPipeline(pipeline);
    Map<String, PCollection<?>> inputs = request.getTransform().getInputsMap().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, input -> {
        try {
            return rehydratedComponents.getPCollection(input.getValue());
        } catch (IOException exn) {
            throw new RuntimeException(exn);
        }
    }));
    String urn = request.getTransform().getSpec().getUrn();
    TransformProvider transformProvider = null;
    if (getUrn(ExpansionMethods.Enum.JAVA_CLASS_LOOKUP).equals(urn)) {
        AllowList allowList = pipelineOptions.as(ExpansionServiceOptions.class).getJavaClassLookupAllowlist();
        assert allowList != null;
        transformProvider = new JavaClassLookupTransformProvider(allowList);
    } else {
        transformProvider = getRegisteredTransforms().get(urn);
        if (transformProvider == null) {
            throw new UnsupportedOperationException("Unknown urn: " + request.getTransform().getSpec().getUrn());
        }
    }
    List<String> classpathResources = transformProvider.getDependencies(request.getTransform().getSpec(), pipeline.getOptions());
    pipeline.getOptions().as(PortablePipelineOptions.class).setFilesToStage(classpathResources);
    Map<String, PCollection<?>> outputs = transformProvider.apply(pipeline, request.getTransform().getUniqueName(), request.getTransform().getSpec(), inputs);
    // Needed to find which transform was new...
    SdkComponents sdkComponents = rehydratedComponents.getSdkComponents(Collections.emptyList()).withNewIdPrefix(request.getNamespace());
    sdkComponents.registerEnvironment(Environments.createOrGetDefaultEnvironment(pipeline.getOptions().as(PortablePipelineOptions.class)));
    Map<String, String> outputMap = outputs.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, output -> {
        try {
            return sdkComponents.registerPCollection(output.getValue());
        } catch (IOException exn) {
            throw new RuntimeException(exn);
        }
    }));
    if (isUseDeprecatedRead) {
        SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
    }
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents);
    String expandedTransformId = Iterables.getOnlyElement(pipelineProto.getRootTransformIdsList().stream().filter(id -> !existingTransformIds.contains(id)).collect(Collectors.toList()));
    RunnerApi.Components components = pipelineProto.getComponents();
    RunnerApi.PTransform expandedTransform = components.getTransformsOrThrow(expandedTransformId).toBuilder().setUniqueName(expandedTransformId).clearOutputs().putAllOutputs(outputMap).build();
    LOG.debug("Expanded to {}", expandedTransform);
    return ExpansionApi.ExpansionResponse.newBuilder().setComponents(components.toBuilder().removeTransforms(expandedTransformId)).setTransform(expandedTransform).addAllRequirements(pipelineProto.getRequirementsList()).build();
}
Also used : Arrays(java.util.Arrays) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) ServerBuilder(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.ServerBuilder) PipelineResult(org.apache.beam.sdk.PipelineResult) SchemaApi(org.apache.beam.model.pipeline.v1.SchemaApi) LoggerFactory(org.slf4j.LoggerFactory) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) Throwables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Throwables) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) PCollectionList(org.apache.beam.sdk.values.PCollectionList) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Method(java.lang.reflect.Method) SchemaCoder(org.apache.beam.sdk.schemas.SchemaCoder) Set(java.util.Set) ServiceLoader(java.util.ServiceLoader) Converter(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Converter) Collectors(java.util.stream.Collectors) InvocationTargetException(java.lang.reflect.InvocationTargetException) ExpansionMethods(org.apache.beam.model.pipeline.v1.ExternalTransforms.ExpansionMethods) POutput(org.apache.beam.sdk.values.POutput) List(java.util.List) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) ExpansionServiceGrpc(org.apache.beam.model.expansion.v1.ExpansionServiceGrpc) StreamObserver(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.stub.StreamObserver) Optional(java.util.Optional) AllowList(org.apache.beam.sdk.expansion.service.JavaClassLookupTransformProvider.AllowList) ExternalTransformBuilder(org.apache.beam.sdk.transforms.ExternalTransformBuilder) SchemaTranslation(org.apache.beam.sdk.schemas.SchemaTranslation) NoSuchSchemaException(org.apache.beam.sdk.schemas.NoSuchSchemaException) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) CaseFormat(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.CaseFormat) Coder(org.apache.beam.sdk.coders.Coder) RowCoder(org.apache.beam.sdk.coders.RowCoder) PipelineTranslation(org.apache.beam.runners.core.construction.PipelineTranslation) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Constructor(java.lang.reflect.Constructor) Environments(org.apache.beam.runners.core.construction.Environments) Server(org.apache.beam.vendor.grpc.v1p43p2.io.grpc.Server) Preconditions.checkArgumentNotNull(org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull) PTransform(org.apache.beam.sdk.transforms.PTransform) ExpansionApi(org.apache.beam.model.expansion.v1.ExpansionApi) PipelineRunner(org.apache.beam.sdk.PipelineRunner) SchemaRegistry(org.apache.beam.sdk.schemas.SchemaRegistry) TupleTag(org.apache.beam.sdk.values.TupleTag) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline) PInput(org.apache.beam.sdk.values.PInput) Row(org.apache.beam.sdk.values.Row) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Nullable(org.checkerframework.checker.nullness.qual.Nullable) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Field(org.apache.beam.sdk.schemas.Schema.Field) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) ExternalConfigurationPayload(org.apache.beam.model.pipeline.v1.ExternalTransforms.ExternalConfigurationPayload) PDone(org.apache.beam.sdk.values.PDone) Logger(org.slf4j.Logger) PipelineResources.detectClassPathResourcesToStage(org.apache.beam.runners.core.construction.resources.PipelineResources.detectClassPathResourcesToStage) IOException(java.io.IOException) SplittableParDo(org.apache.beam.runners.core.construction.SplittableParDo) ExternalTransformRegistrar(org.apache.beam.sdk.expansion.ExternalTransformRegistrar) PCollection(org.apache.beam.sdk.values.PCollection) Schema(org.apache.beam.sdk.schemas.Schema) AutoService(com.google.auto.service.AutoService) Preconditions(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions) ArtifactRetrievalService(org.apache.beam.runners.fnexecution.artifact.ArtifactRetrievalService) Collections(java.util.Collections) BeamUrns.getUrn(org.apache.beam.runners.core.construction.BeamUrns.getUrn) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) IOException(java.io.IOException) Pipeline(org.apache.beam.sdk.Pipeline) PCollection(org.apache.beam.sdk.values.PCollection) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) AllowList(org.apache.beam.sdk.expansion.service.JavaClassLookupTransformProvider.AllowList) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 9 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslator method serializeWindowingStrategy.

private static byte[] serializeWindowingStrategy(WindowingStrategy<?, ?> windowingStrategy, PipelineOptions options) {
    try {
        SdkComponents sdkComponents = SdkComponents.create();
        String workerHarnessContainerImageURL = DataflowRunner.getContainerImageForJob(options.as(DataflowPipelineOptions.class));
        RunnerApi.Environment defaultEnvironmentForDataflow = Environments.createDockerEnvironment(workerHarnessContainerImageURL);
        sdkComponents.registerEnvironment(defaultEnvironmentForDataflow);
        return WindowingStrategyTranslation.toMessageProto(windowingStrategy, sdkComponents).toByteArray();
    } catch (Exception e) {
        throw new RuntimeException(String.format("Unable to format windowing strategy %s as bytes", windowingStrategy), e);
    }
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) EncoderException(org.apache.commons.codec.EncoderException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException)

Example 10 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class WindowUtilsTest method testInstantiateWindowedCoder.

@Test
public void testInstantiateWindowedCoder() throws IOException {
    Coder<KV<Long, String>> expectedValueCoder = KvCoder.of(VarLongCoder.of(), StringUtf8Coder.of());
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createDockerEnvironment("java"));
    String collectionId = components.registerPCollection(PCollection.createPrimitiveOutputInternal(Pipeline.create(), WindowingStrategy.globalDefault(), PCollection.IsBounded.BOUNDED, expectedValueCoder).setName("name"));
    assertEquals(expectedValueCoder, WindowUtils.instantiateWindowedCoder(collectionId, components.toComponents()).getValueCoder());
}
Also used : KV(org.apache.beam.sdk.values.KV) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Test(org.junit.Test)

Aggregations

SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)61 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)48 Test (org.junit.Test)46 Pipeline (org.apache.beam.sdk.Pipeline)37 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)36 Job (com.google.api.services.dataflow.model.Job)25 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)25 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)21 KV (org.apache.beam.sdk.values.KV)14 Map (java.util.Map)12 Step (com.google.api.services.dataflow.model.Step)11 ArrayList (java.util.ArrayList)11 List (java.util.List)9 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)9 HashMap (java.util.HashMap)8 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)8 WindowedValue (org.apache.beam.sdk.util.WindowedValue)7 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)7 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)6 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)6