Search in sources :

Example 86 with PTransform

use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.

the class FlinkStreamingPortablePipelineTranslator method transformSideInputs.

private TransformedSideInputs transformSideInputs(RunnerApi.ExecutableStagePayload stagePayload, RunnerApi.Components components, StreamingTranslationContext context) {
    LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputs = getSideInputIdToPCollectionViewMap(stagePayload, components);
    Map<TupleTag<?>, Integer> tagToIntMapping = new HashMap<>();
    Map<Integer, PCollectionView<?>> intToViewMapping = new HashMap<>();
    List<WindowedValueCoder<KV<Void, Object>>> kvCoders = new ArrayList<>();
    List<Coder<?>> viewCoders = new ArrayList<>();
    int count = 0;
    for (Map.Entry<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInput : sideInputs.entrySet()) {
        TupleTag<?> tag = sideInput.getValue().getTagInternal();
        intToViewMapping.put(count, sideInput.getValue());
        tagToIntMapping.put(tag, count);
        count++;
        String collectionId = components.getTransformsOrThrow(sideInput.getKey().getTransformId()).getInputsOrThrow(sideInput.getKey().getLocalName());
        DataStream<Object> sideInputStream = context.getDataStreamOrThrow(collectionId);
        TypeInformation<Object> tpe = sideInputStream.getType();
        if (!(tpe instanceof CoderTypeInformation)) {
            throw new IllegalStateException("Input Stream TypeInformation is no CoderTypeInformation.");
        }
        WindowedValueCoder<Object> coder = (WindowedValueCoder) ((CoderTypeInformation) tpe).getCoder();
        Coder<KV<Void, Object>> kvCoder = KvCoder.of(VoidCoder.of(), coder.getValueCoder());
        kvCoders.add(coder.withValueCoder(kvCoder));
        // coder for materialized view matching GBK below
        WindowedValueCoder<KV<Void, Iterable<Object>>> viewCoder = coder.withValueCoder(KvCoder.of(VoidCoder.of(), IterableCoder.of(coder.getValueCoder())));
        viewCoders.add(viewCoder);
    }
    // second pass, now that we gathered the input coders
    UnionCoder unionCoder = UnionCoder.of(viewCoders);
    CoderTypeInformation<RawUnionValue> unionTypeInformation = new CoderTypeInformation<>(unionCoder, context.getPipelineOptions());
    // transform each side input to RawUnionValue and union them
    DataStream<RawUnionValue> sideInputUnion = null;
    for (Map.Entry<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInput : sideInputs.entrySet()) {
        TupleTag<?> tag = sideInput.getValue().getTagInternal();
        final int intTag = tagToIntMapping.get(tag);
        RunnerApi.PTransform pTransform = components.getTransformsOrThrow(sideInput.getKey().getTransformId());
        String collectionId = pTransform.getInputsOrThrow(sideInput.getKey().getLocalName());
        DataStream<WindowedValue<?>> sideInputStream = context.getDataStreamOrThrow(collectionId);
        // insert GBK to materialize side input view
        String viewName = sideInput.getKey().getTransformId() + "-" + sideInput.getKey().getLocalName();
        WindowedValueCoder<KV<Void, Object>> kvCoder = kvCoders.get(intTag);
        DataStream<WindowedValue<KV<Void, Object>>> keyedSideInputStream = sideInputStream.map(new ToVoidKeyValue(context.getPipelineOptions()));
        SingleOutputStreamOperator<WindowedValue<KV<Void, Iterable<Object>>>> viewStream = addGBK(keyedSideInputStream, sideInput.getValue().getWindowingStrategyInternal(), kvCoder, viewName, context);
        // Assign a unique but consistent id to re-map operator state
        viewStream.uid(pTransform.getUniqueName() + "-" + sideInput.getKey().getLocalName());
        DataStream<RawUnionValue> unionValueStream = viewStream.map(new FlinkStreamingTransformTranslators.ToRawUnion<>(intTag, context.getPipelineOptions())).returns(unionTypeInformation);
        if (sideInputUnion == null) {
            sideInputUnion = unionValueStream;
        } else {
            sideInputUnion = sideInputUnion.union(unionValueStream);
        }
    }
    return new TransformedSideInputs(intToViewMapping, sideInputUnion);
}
Also used : LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TupleTag(org.apache.beam.sdk.values.TupleTag) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) WindowedValue(org.apache.beam.sdk.util.WindowedValue) CoderTypeInformation(org.apache.beam.runners.flink.translation.types.CoderTypeInformation) SingletonKeyedWorkItemCoder(org.apache.beam.runners.flink.translation.wrappers.streaming.SingletonKeyedWorkItemCoder) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) PipelineTranslatorUtils.instantiateCoder(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.instantiateCoder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) UnionCoder(org.apache.beam.sdk.transforms.join.UnionCoder) Coder(org.apache.beam.sdk.coders.Coder) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) UnionCoder(org.apache.beam.sdk.transforms.join.UnionCoder) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) KV(org.apache.beam.sdk.values.KV) RunnerPCollectionView(org.apache.beam.runners.core.construction.RunnerPCollectionView) PCollectionView(org.apache.beam.sdk.values.PCollectionView) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) BiMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.BiMap) TreeMap(java.util.TreeMap) PipelineTranslatorUtils.createOutputMap(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.createOutputMap) HashMap(java.util.HashMap)

Example 87 with PTransform

use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.

the class FlinkStreamingPortablePipelineTranslator method translateGroupByKey.

private <K, V> void translateGroupByKey(String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
    RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id);
    String inputPCollectionId = Iterables.getOnlyElement(pTransform.getInputsMap().values());
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(pipeline.getComponents());
    RunnerApi.WindowingStrategy windowingStrategyProto = pipeline.getComponents().getWindowingStrategiesOrThrow(pipeline.getComponents().getPcollectionsOrThrow(inputPCollectionId).getWindowingStrategyId());
    WindowingStrategy<?, ?> windowingStrategy;
    try {
        windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
    } catch (InvalidProtocolBufferException e) {
        throw new IllegalStateException(String.format("Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), e);
    }
    WindowedValueCoder<KV<K, V>> windowedInputCoder = (WindowedValueCoder) instantiateCoder(inputPCollectionId, pipeline.getComponents());
    DataStream<WindowedValue<KV<K, V>>> inputDataStream = context.getDataStreamOrThrow(inputPCollectionId);
    SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream = addGBK(inputDataStream, windowingStrategy, windowedInputCoder, pTransform.getUniqueName(), context);
    // Assign a unique but consistent id to re-map operator state
    outputDataStream.uid(pTransform.getUniqueName());
    context.addDataStream(Iterables.getOnlyElement(pTransform.getOutputsMap().values()), outputDataStream);
}
Also used : InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) KV(org.apache.beam.sdk.values.KV) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents)

Example 88 with PTransform

use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.

the class ProcessBundleDescriptors method addStageInput.

private static RemoteInputDestination<WindowedValue<?>> addStageInput(ApiServiceDescriptor dataEndpoint, PCollectionNode inputPCollection, Components.Builder components, WireCoderSetting wireCoderSetting) throws IOException {
    String inputWireCoderId = WireCoders.addSdkWireCoder(inputPCollection, components, wireCoderSetting);
    @SuppressWarnings("unchecked") Coder<WindowedValue<?>> wireCoder = (Coder) WireCoders.instantiateRunnerWireCoder(inputPCollection, components.build(), wireCoderSetting);
    RemoteGrpcPort inputPort = RemoteGrpcPort.newBuilder().setApiServiceDescriptor(dataEndpoint).setCoderId(inputWireCoderId).build();
    String inputId = uniqueId(String.format("fn/read/%s", inputPCollection.getId()), components::containsTransforms);
    PTransform inputTransform = RemoteGrpcPortRead.readFromPort(inputPort, inputPCollection.getId()).toPTransform();
    components.putTransforms(inputId, inputTransform);
    return RemoteInputDestination.of(wireCoder, inputId);
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) ByteStringCoder(org.apache.beam.runners.fnexecution.wire.ByteStringCoder) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) RemoteGrpcPort(org.apache.beam.model.fnexecution.v1.BeamFnApi.RemoteGrpcPort) WindowedValue(org.apache.beam.sdk.util.WindowedValue) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Example 89 with PTransform

use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.

the class JavaClassLookupTransformProvider method getMethod.

private Method getMethod(PTransform<PInput, POutput> transform, BuilderMethod builderMethod, AllowedClass allowListClass) {
    Row builderMethodRow = decodeRow(builderMethod.getSchema(), builderMethod.getPayload());
    List<Method> matchingMethods = Arrays.stream(transform.getClass().getMethods()).filter(m -> isBuilderMethodForName(m, builderMethod.getName(), allowListClass)).filter(m -> parametersCompatible(m.getParameters(), builderMethodRow)).filter(m -> PTransform.class.isAssignableFrom(m.getReturnType())).collect(Collectors.toList());
    if (matchingMethods.size() == 0) {
        throw new RuntimeException("Could not find a matching method in transform " + transform + " for BuilderMethod" + builderMethod + ". When using field names, make sure they are available in the compiled" + " Java class.");
    } else if (matchingMethods.size() > 1) {
        throw new RuntimeException("Expected to find exactly one matching method in transform " + transform + " for BuilderMethod" + builderMethod + " but found " + matchingMethods.size());
    }
    return matchingMethods.get(0);
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) BuilderMethod(org.apache.beam.model.pipeline.v1.ExternalTransforms.BuilderMethod) Arrays(java.util.Arrays) Array(java.lang.reflect.Array) NoSuchSchemaException(org.apache.beam.sdk.schemas.NoSuchSchemaException) SchemaApi(org.apache.beam.model.pipeline.v1.SchemaApi) RowCoder(org.apache.beam.sdk.coders.RowCoder) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) Constructor(java.lang.reflect.Constructor) ArrayList(java.util.ArrayList) PTransform(org.apache.beam.sdk.transforms.PTransform) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) SchemaRegistry(org.apache.beam.sdk.schemas.SchemaRegistry) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) JavaFieldSchema(org.apache.beam.sdk.schemas.JavaFieldSchema) PInput(org.apache.beam.sdk.values.PInput) Row(org.apache.beam.sdk.values.Row) Method(java.lang.reflect.Method) Nullable(org.checkerframework.checker.nullness.qual.Nullable) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) Field(org.apache.beam.sdk.schemas.Schema.Field) TransformProvider(org.apache.beam.sdk.expansion.service.ExpansionService.TransformProvider) Collection(java.util.Collection) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Schema(org.apache.beam.sdk.schemas.Schema) TypeName(org.apache.beam.sdk.schemas.Schema.TypeName) JavaClassLookupPayload(org.apache.beam.model.pipeline.v1.ExternalTransforms.JavaClassLookupPayload) InvocationTargetException(java.lang.reflect.InvocationTargetException) ExpansionMethods(org.apache.beam.model.pipeline.v1.ExternalTransforms.ExpansionMethods) ClassUtils(org.apache.beam.repackaged.core.org.apache.commons.lang3.ClassUtils) POutput(org.apache.beam.sdk.values.POutput) List(java.util.List) ParameterizedType(java.lang.reflect.ParameterizedType) Type(java.lang.reflect.Type) ReflectHelpers(org.apache.beam.sdk.util.common.ReflectHelpers) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) AutoValue(com.google.auto.value.AutoValue) Annotation(java.lang.annotation.Annotation) Pattern(java.util.regex.Pattern) SchemaTranslation(org.apache.beam.sdk.schemas.SchemaTranslation) Collections(java.util.Collections) BeamUrns.getUrn(org.apache.beam.runners.core.construction.BeamUrns.getUrn) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) Row(org.apache.beam.sdk.values.Row) BuilderMethod(org.apache.beam.model.pipeline.v1.ExternalTransforms.BuilderMethod) Method(java.lang.reflect.Method)

Example 90 with PTransform

use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.

the class JavaClassLookupTransformProvider method applyBuilderMethods.

private PTransform<PInput, POutput> applyBuilderMethods(PTransform<PInput, POutput> transform, JavaClassLookupPayload payload, AllowedClass allowListClass) {
    for (BuilderMethod builderMethod : payload.getBuilderMethodsList()) {
        Method method = getMethod(transform, builderMethod, allowListClass);
        try {
            Row builderMethodRow = decodeRow(builderMethod.getSchema(), builderMethod.getPayload());
            transform = (PTransform<PInput, POutput>) method.invoke(transform, getParameterValues(method.getParameters(), builderMethodRow, method.getGenericParameterTypes()));
        } catch (IllegalAccessException | InvocationTargetException e) {
            throw new IllegalArgumentException("Could not invoke the builder method " + builderMethod + " on transform " + transform + " with parameter schema " + builderMethod.getSchema(), e);
        }
    }
    return transform;
}
Also used : PInput(org.apache.beam.sdk.values.PInput) BuilderMethod(org.apache.beam.model.pipeline.v1.ExternalTransforms.BuilderMethod) POutput(org.apache.beam.sdk.values.POutput) BuilderMethod(org.apache.beam.model.pipeline.v1.ExternalTransforms.BuilderMethod) Method(java.lang.reflect.Method) Row(org.apache.beam.sdk.values.Row) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Aggregations

PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)58 Test (org.junit.Test)41 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)28 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)23 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)22 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)21 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)19 Map (java.util.Map)18 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)18 ArrayList (java.util.ArrayList)16 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)16 WindowedValue (org.apache.beam.sdk.util.WindowedValue)11 Collection (java.util.Collection)9 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)9 Collectors (java.util.stream.Collectors)8 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)8 IOException (java.io.IOException)7 HashSet (java.util.HashSet)7 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)7 Pipeline (org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline)7