Search in sources :

Example 46 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class SideInputReference method fromSideInputId.

/**
 * Create a side input reference from a SideInputId proto and components.
 */
public static SideInputReference fromSideInputId(SideInputId sideInputId, RunnerApi.Components components) {
    String transformId = sideInputId.getTransformId();
    String localName = sideInputId.getLocalName();
    String collectionId = components.getTransformsOrThrow(transformId).getInputsOrThrow(localName);
    PTransform transform = components.getTransformsOrThrow(transformId);
    PCollection collection = components.getPcollectionsOrThrow(collectionId);
    return SideInputReference.of(PipelineNode.pTransform(transformId, transform), localName, PipelineNode.pCollection(collectionId, collection));
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Example 47 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class PipelineTranslationTest method getAccumulatorCoder.

private static Coder<?> getAccumulatorCoder(AppliedPTransform<?, ?, ?> transform) throws IOException {
    SdkComponents sdkComponents = SdkComponents.create(transform.getPipeline().getOptions());
    String id = getCombinePayload(transform, sdkComponents).map(CombinePayload::getAccumulatorCoderId).orElseThrow(() -> new IOException("Transform does not contain an AccumulatorCoder"));
    Components components = sdkComponents.toComponents();
    return CoderTranslation.fromProto(components.getCodersOrThrow(id), RehydratedComponents.forComponents(components), TranslationContext.DEFAULT);
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) IOException(java.io.IOException)

Example 48 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class WindowUtils method getWindowStrategy.

/**
 * Get {@link WindowingStrategy} of given collection id from {@link RunnerApi.Components}.
 */
public static WindowingStrategy<?, BoundedWindow> getWindowStrategy(String collectionId, RunnerApi.Components components) {
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components);
    RunnerApi.WindowingStrategy windowingStrategyProto = components.getWindowingStrategiesOrThrow(components.getPcollectionsOrThrow(collectionId).getWindowingStrategyId());
    WindowingStrategy<?, ?> windowingStrategy;
    try {
        windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
    } catch (Exception e) {
        throw new IllegalStateException(String.format("Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), e);
    }
    @SuppressWarnings("unchecked") WindowingStrategy<?, BoundedWindow> ret = (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
    return ret;
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) IOException(java.io.IOException) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy)

Example 49 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class FlinkStreamingPortablePipelineTranslator method translateExecutableStage.

private <InputT, OutputT> void translateExecutableStage(String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
    // TODO: Fail on splittable DoFns.
    // TODO: Special-case single outputs to avoid multiplexing PCollections.
    RunnerApi.Components components = pipeline.getComponents();
    RunnerApi.PTransform transform = components.getTransformsOrThrow(id);
    Map<String, String> outputs = transform.getOutputsMap();
    final RunnerApi.ExecutableStagePayload stagePayload;
    try {
        stagePayload = RunnerApi.ExecutableStagePayload.parseFrom(transform.getSpec().getPayload());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    String inputPCollectionId = stagePayload.getInput();
    final TransformedSideInputs transformedSideInputs;
    if (stagePayload.getSideInputsCount() > 0) {
        transformedSideInputs = transformSideInputs(stagePayload, components, context);
    } else {
        transformedSideInputs = new TransformedSideInputs(Collections.emptyMap(), null);
    }
    Map<TupleTag<?>, OutputTag<WindowedValue<?>>> tagsToOutputTags = Maps.newLinkedHashMap();
    Map<TupleTag<?>, Coder<WindowedValue<?>>> tagsToCoders = Maps.newLinkedHashMap();
    // TODO: does it matter which output we designate as "main"
    final TupleTag<OutputT> mainOutputTag = outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next());
    // associate output tags with ids, output manager uses these Integer ids to serialize state
    BiMap<String, Integer> outputIndexMap = createOutputMap(outputs.keySet());
    Map<String, Coder<WindowedValue<?>>> outputCoders = Maps.newHashMap();
    Map<TupleTag<?>, Integer> tagsToIds = Maps.newHashMap();
    Map<String, TupleTag<?>> collectionIdToTupleTag = Maps.newHashMap();
    // order output names for deterministic mapping
    for (String localOutputName : new TreeMap<>(outputIndexMap).keySet()) {
        String collectionId = outputs.get(localOutputName);
        Coder<WindowedValue<?>> windowCoder = (Coder) instantiateCoder(collectionId, components);
        outputCoders.put(localOutputName, windowCoder);
        TupleTag<?> tupleTag = new TupleTag<>(localOutputName);
        CoderTypeInformation<WindowedValue<?>> typeInformation = new CoderTypeInformation(windowCoder, context.getPipelineOptions());
        tagsToOutputTags.put(tupleTag, new OutputTag<>(localOutputName, typeInformation));
        tagsToCoders.put(tupleTag, windowCoder);
        tagsToIds.put(tupleTag, outputIndexMap.get(localOutputName));
        collectionIdToTupleTag.put(collectionId, tupleTag);
    }
    final SingleOutputStreamOperator<WindowedValue<OutputT>> outputStream;
    DataStream<WindowedValue<InputT>> inputDataStream = context.getDataStreamOrThrow(inputPCollectionId);
    CoderTypeInformation<WindowedValue<OutputT>> outputTypeInformation = !outputs.isEmpty() ? new CoderTypeInformation(outputCoders.get(mainOutputTag.getId()), context.getPipelineOptions()) : null;
    ArrayList<TupleTag<?>> additionalOutputTags = Lists.newArrayList();
    for (TupleTag<?> tupleTag : tagsToCoders.keySet()) {
        if (!mainOutputTag.getId().equals(tupleTag.getId())) {
            additionalOutputTags.add(tupleTag);
        }
    }
    final Coder<WindowedValue<InputT>> windowedInputCoder = instantiateCoder(inputPCollectionId, components);
    final boolean stateful = stagePayload.getUserStatesCount() > 0 || stagePayload.getTimersCount() > 0;
    final boolean hasSdfProcessFn = stagePayload.getComponents().getTransformsMap().values().stream().anyMatch(pTransform -> pTransform.getSpec().getUrn().equals(PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN));
    Coder keyCoder = null;
    KeySelector<WindowedValue<InputT>, ?> keySelector = null;
    if (stateful || hasSdfProcessFn) {
        // Stateful/SDF stages are only allowed of KV input.
        Coder valueCoder = ((WindowedValue.FullWindowedValueCoder) windowedInputCoder).getValueCoder();
        if (!(valueCoder instanceof KvCoder)) {
            throw new IllegalStateException(String.format(Locale.ENGLISH, "The element coder for stateful DoFn '%s' must be KvCoder but is: %s", inputPCollectionId, valueCoder.getClass().getSimpleName()));
        }
        if (stateful) {
            keyCoder = ((KvCoder) valueCoder).getKeyCoder();
            keySelector = new KvToByteBufferKeySelector(keyCoder, new SerializablePipelineOptions(context.getPipelineOptions()));
        } else {
            // as the key.
            if (!(((KvCoder) valueCoder).getKeyCoder() instanceof KvCoder)) {
                throw new IllegalStateException(String.format(Locale.ENGLISH, "The element coder for splittable DoFn '%s' must be KVCoder(KvCoder, DoubleCoder) but is: %s", inputPCollectionId, valueCoder.getClass().getSimpleName()));
            }
            keyCoder = ((KvCoder) ((KvCoder) valueCoder).getKeyCoder()).getKeyCoder();
            keySelector = new SdfByteBufferKeySelector(keyCoder, new SerializablePipelineOptions(context.getPipelineOptions()));
        }
        inputDataStream = inputDataStream.keyBy(keySelector);
    }
    DoFnOperator.MultiOutputOutputManagerFactory<OutputT> outputManagerFactory = new DoFnOperator.MultiOutputOutputManagerFactory<>(mainOutputTag, tagsToOutputTags, tagsToCoders, tagsToIds, new SerializablePipelineOptions(context.getPipelineOptions()));
    DoFnOperator<InputT, OutputT> doFnOperator = new ExecutableStageDoFnOperator<>(transform.getUniqueName(), windowedInputCoder, Collections.emptyMap(), mainOutputTag, additionalOutputTags, outputManagerFactory, transformedSideInputs.unionTagToView, new ArrayList<>(transformedSideInputs.unionTagToView.values()), getSideInputIdToPCollectionViewMap(stagePayload, components), context.getPipelineOptions(), stagePayload, context.getJobInfo(), FlinkExecutableStageContextFactory.getInstance(), collectionIdToTupleTag, getWindowingStrategy(inputPCollectionId, components), keyCoder, keySelector);
    final String operatorName = generateNameFromStagePayload(stagePayload);
    if (transformedSideInputs.unionTagToView.isEmpty()) {
        outputStream = inputDataStream.transform(operatorName, outputTypeInformation, doFnOperator);
    } else {
        DataStream<RawUnionValue> sideInputStream = transformedSideInputs.unionedSideInputs.broadcast();
        if (stateful || hasSdfProcessFn) {
            // We have to manually construct the two-input transform because we're not
            // allowed to have only one input keyed, normally. Since Flink 1.5.0 it's
            // possible to use the Broadcast State Pattern which provides a more elegant
            // way to process keyed main input with broadcast state, but it's not feasible
            // here because it breaks the DoFnOperator abstraction.
            TwoInputTransformation<WindowedValue<KV<?, InputT>>, RawUnionValue, WindowedValue<OutputT>> rawFlinkTransform = new TwoInputTransformation(inputDataStream.getTransformation(), sideInputStream.getTransformation(), transform.getUniqueName(), doFnOperator, outputTypeInformation, inputDataStream.getParallelism());
            rawFlinkTransform.setStateKeyType(((KeyedStream) inputDataStream).getKeyType());
            rawFlinkTransform.setStateKeySelectors(((KeyedStream) inputDataStream).getKeySelector(), null);
            outputStream = new SingleOutputStreamOperator(inputDataStream.getExecutionEnvironment(), // we have to cheat around the ctor being protected
            rawFlinkTransform) {
            };
        } else {
            outputStream = inputDataStream.connect(sideInputStream).transform(operatorName, outputTypeInformation, doFnOperator);
        }
    }
    // Assign a unique but consistent id to re-map operator state
    outputStream.uid(transform.getUniqueName());
    if (mainOutputTag != null) {
        context.addDataStream(outputs.get(mainOutputTag.getId()), outputStream);
    }
    for (TupleTag<?> tupleTag : additionalOutputTags) {
        context.addDataStream(outputs.get(tupleTag.getId()), outputStream.getSideOutput(tagsToOutputTags.get(tupleTag)));
    }
}
Also used : KvToByteBufferKeySelector(org.apache.beam.runners.flink.translation.wrappers.streaming.KvToByteBufferKeySelector) TupleTag(org.apache.beam.sdk.values.TupleTag) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) WindowedValue(org.apache.beam.sdk.util.WindowedValue) OutputTag(org.apache.flink.util.OutputTag) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) WindowDoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.WindowDoFnOperator) DoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator) ExecutableStageDoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.ExecutableStageDoFnOperator) SdfByteBufferKeySelector(org.apache.beam.runners.flink.translation.wrappers.streaming.SdfByteBufferKeySelector) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) ExecutableStageDoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.ExecutableStageDoFnOperator) CoderTypeInformation(org.apache.beam.runners.flink.translation.types.CoderTypeInformation) SingletonKeyedWorkItemCoder(org.apache.beam.runners.flink.translation.wrappers.streaming.SingletonKeyedWorkItemCoder) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) PipelineTranslatorUtils.instantiateCoder(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.instantiateCoder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) UnionCoder(org.apache.beam.sdk.transforms.join.UnionCoder) Coder(org.apache.beam.sdk.coders.Coder) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) KvCoder(org.apache.beam.sdk.coders.KvCoder) IOException(java.io.IOException)

Example 50 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class CreateExecutableStageNodeFunction method getEnvironmentFromPTransform.

private Environment getEnvironmentFromPTransform(RunnerApi.Components components, Set<PTransformNode> sdkTransforms) {
    RehydratedComponents sdkComponents = RehydratedComponents.forComponents(components);
    Environment env = null;
    for (PTransformNode pTransformNode : sdkTransforms) {
        env = Environments.getEnvironment(pTransformNode.getTransform(), sdkComponents).orElse(null);
        if (env != null) {
            break;
        }
    }
    return env;
}
Also used : PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents)

Aggregations

Test (org.junit.Test)55 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)49 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)40 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)31 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)20 Map (java.util.Map)16 WindowedValue (org.apache.beam.sdk.util.WindowedValue)16 IOException (java.io.IOException)15 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)15 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)14 Coder (org.apache.beam.sdk.coders.Coder)14 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 Pipeline (org.apache.beam.sdk.Pipeline)13 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)12 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)11 KvCoder (org.apache.beam.sdk.coders.KvCoder)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)11 ArrayList (java.util.ArrayList)10 List (java.util.List)10 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)10