Search in sources :

Example 11 with WindowingStrategy

use of org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy in project beam by apache.

the class ProtoOverridesTest method replacesOnlyMatching.

@Test
public void replacesOnlyMatching() {
    RunnerApi.Pipeline p = Pipeline.newBuilder().addAllRootTransformIds(ImmutableList.of("first", "second")).setComponents(Components.newBuilder().putTransforms("first", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn("beam:first")).build()).putTransforms("second", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn("beam:second")).build()).putPcollections("intermediatePc", PCollection.newBuilder().setUniqueName("intermediate").build()).putCoders("coder", Coder.newBuilder().setSpec(FunctionSpec.getDefaultInstance()).build())).build();
    PTransform secondReplacement = PTransform.newBuilder().addSubtransforms("second_sub").setSpec(FunctionSpec.newBuilder().setUrn("beam:second:replacement").setPayload(ByteString.copyFrom("foo-bar-baz".getBytes(StandardCharsets.UTF_8)))).build();
    WindowingStrategy introducedWS = WindowingStrategy.newBuilder().setAccumulationMode(AccumulationMode.Enum.ACCUMULATING).build();
    RunnerApi.Components extraComponents = Components.newBuilder().putPcollections("intermediatePc", PCollection.newBuilder().setUniqueName("intermediate_replacement").build()).putWindowingStrategies("new_ws", introducedWS).putTransforms("second_sub", PTransform.getDefaultInstance()).build();
    Pipeline updated = ProtoOverrides.updateTransform("beam:second", p, new TestReplacer(secondReplacement, extraComponents));
    PTransform updatedSecond = updated.getComponents().getTransformsOrThrow("second");
    assertThat(updatedSecond, equalTo(secondReplacement));
    assertThat(updated.getComponents().getWindowingStrategiesOrThrow("new_ws"), equalTo(introducedWS));
    assertThat(updated.getComponents().getTransformsOrThrow("second_sub"), equalTo(PTransform.getDefaultInstance()));
    // TODO: This might not be appropriate. Merging in the other direction might force that callers
    // are well behaved.
    assertThat(updated.getComponents().getPcollectionsOrThrow("intermediatePc").getUniqueName(), equalTo("intermediate_replacement"));
    // Assert that the untouched components are unchanged.
    assertThat(updated.getComponents().getTransformsOrThrow("first"), equalTo(p.getComponents().getTransformsOrThrow("first")));
    assertThat(updated.getComponents().getCodersOrThrow("coder"), equalTo(p.getComponents().getCodersOrThrow("coder")));
    assertThat(updated.getRootTransformIdsList(), equalTo(p.getRootTransformIdsList()));
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline) WindowingStrategy(org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline) Test(org.junit.Test)

Example 12 with WindowingStrategy

use of org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy in project beam by apache.

the class FlinkBatchPortablePipelineTranslator method translateGroupByKey.

private static <K, V> void translateGroupByKey(PTransformNode transform, RunnerApi.Pipeline pipeline, BatchTranslationContext context) {
    RunnerApi.Components components = pipeline.getComponents();
    String inputPCollectionId = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values());
    PCollectionNode inputCollection = PipelineNode.pCollection(inputPCollectionId, components.getPcollectionsOrThrow(inputPCollectionId));
    DataSet<WindowedValue<KV<K, V>>> inputDataSet = context.getDataSetOrThrow(inputPCollectionId);
    RunnerApi.WindowingStrategy windowingStrategyProto = pipeline.getComponents().getWindowingStrategiesOrThrow(pipeline.getComponents().getPcollectionsOrThrow(inputPCollectionId).getWindowingStrategyId());
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(pipeline.getComponents());
    WindowingStrategy<Object, BoundedWindow> windowingStrategy;
    try {
        windowingStrategy = (WindowingStrategy<Object, BoundedWindow>) WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
    } catch (InvalidProtocolBufferException e) {
        throw new IllegalStateException(String.format("Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), e);
    }
    WindowedValueCoder<KV<K, V>> inputCoder;
    try {
        inputCoder = (WindowedValueCoder) WireCoders.instantiateRunnerWireCoder(inputCollection, pipeline.getComponents());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) inputCoder.getValueCoder();
    Concatenate<V> combineFn = new Concatenate<>();
    Coder<List<V>> accumulatorCoder = combineFn.getAccumulatorCoder(CoderRegistry.createDefault(), inputElementCoder.getValueCoder());
    Coder<WindowedValue<KV<K, List<V>>>> outputCoder = WindowedValue.getFullCoder(KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder), windowingStrategy.getWindowFn().windowCoder());
    TypeInformation<WindowedValue<KV<K, List<V>>>> partialReduceTypeInfo = new CoderTypeInformation<>(outputCoder, context.getPipelineOptions());
    Grouping<WindowedValue<KV<K, V>>> inputGrouping = inputDataSet.groupBy(new KvKeySelector<>(inputElementCoder.getKeyCoder()));
    FlinkPartialReduceFunction<K, V, List<V>, ?> partialReduceFunction = new FlinkPartialReduceFunction<>(combineFn, windowingStrategy, Collections.emptyMap(), context.getPipelineOptions());
    FlinkReduceFunction<K, List<V>, List<V>, ?> reduceFunction = new FlinkReduceFunction<>(combineFn, windowingStrategy, Collections.emptyMap(), context.getPipelineOptions());
    // Partially GroupReduce the values into the intermediate format AccumT (combine)
    GroupCombineOperator<WindowedValue<KV<K, V>>, WindowedValue<KV<K, List<V>>>> groupCombine = new GroupCombineOperator<>(inputGrouping, partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + transform.getTransform().getUniqueName());
    Grouping<WindowedValue<KV<K, List<V>>>> intermediateGrouping = groupCombine.groupBy(new KvKeySelector<>(inputElementCoder.getKeyCoder()));
    // Fully reduce the values and create output format VO
    GroupReduceOperator<WindowedValue<KV<K, List<V>>>, WindowedValue<KV<K, List<V>>>> outputDataSet = new GroupReduceOperator<>(intermediateGrouping, partialReduceTypeInfo, reduceFunction, transform.getTransform().getUniqueName());
    context.addDataSet(Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values()), outputDataSet);
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) FlinkReduceFunction(org.apache.beam.runners.flink.translation.functions.FlinkReduceFunction) List(java.util.List) GroupCombineOperator(org.apache.flink.api.java.operators.GroupCombineOperator) CoderTypeInformation(org.apache.beam.runners.flink.translation.types.CoderTypeInformation) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) FlinkPartialReduceFunction(org.apache.beam.runners.flink.translation.functions.FlinkPartialReduceFunction) KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) IOException(java.io.IOException) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) GroupReduceOperator(org.apache.flink.api.java.operators.GroupReduceOperator) Concatenate(org.apache.beam.runners.core.Concatenate) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents)

Example 13 with WindowingStrategy

use of org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy in project beam by apache.

the class FlinkStreamingPortablePipelineTranslator method getSideInputIdToPCollectionViewMap.

private static LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> getSideInputIdToPCollectionViewMap(RunnerApi.ExecutableStagePayload stagePayload, RunnerApi.Components components) {
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components);
    LinkedHashMap<RunnerApi.ExecutableStagePayload.SideInputId, PCollectionView<?>> sideInputs = new LinkedHashMap<>();
    // for PCollectionView compatibility, not used to transform materialization
    ViewFn<Iterable<WindowedValue<?>>, ?> viewFn = (ViewFn) new PCollectionViews.MultimapViewFn<>((PCollectionViews.TypeDescriptorSupplier<Iterable<WindowedValue<Void>>>) () -> TypeDescriptors.iterables(new TypeDescriptor<WindowedValue<Void>>() {
    }), (PCollectionViews.TypeDescriptorSupplier<Void>) TypeDescriptors::voids);
    for (RunnerApi.ExecutableStagePayload.SideInputId sideInputId : stagePayload.getSideInputsList()) {
        // TODO: local name is unique as long as only one transform with side input can be within a
        // stage
        String sideInputTag = sideInputId.getLocalName();
        String collectionId = components.getTransformsOrThrow(sideInputId.getTransformId()).getInputsOrThrow(sideInputId.getLocalName());
        RunnerApi.WindowingStrategy windowingStrategyProto = components.getWindowingStrategiesOrThrow(components.getPcollectionsOrThrow(collectionId).getWindowingStrategyId());
        final WindowingStrategy<?, ?> windowingStrategy;
        try {
            windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
        } catch (InvalidProtocolBufferException e) {
            throw new IllegalStateException(String.format("Unable to hydrate side input windowing strategy %s.", windowingStrategyProto), e);
        }
        Coder<WindowedValue<Object>> coder = instantiateCoder(collectionId, components);
        // side input materialization via GBK (T -> Iterable<T>)
        WindowedValueCoder wvCoder = (WindowedValueCoder) coder;
        coder = wvCoder.withValueCoder(IterableCoder.of(wvCoder.getValueCoder()));
        sideInputs.put(sideInputId, new RunnerPCollectionView<>(null, new TupleTag<>(sideInputTag), viewFn, // TODO: support custom mapping fn
        windowingStrategy.getWindowFn().getDefaultWindowMappingFn(), windowingStrategy, coder));
    }
    return sideInputs;
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) LinkedHashMap(java.util.LinkedHashMap) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ViewFn(org.apache.beam.sdk.transforms.ViewFn) WindowedValue(org.apache.beam.sdk.util.WindowedValue) PCollectionViews(org.apache.beam.sdk.values.PCollectionViews) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) RunnerPCollectionView(org.apache.beam.runners.core.construction.RunnerPCollectionView) PCollectionView(org.apache.beam.sdk.values.PCollectionView) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents)

Example 14 with WindowingStrategy

use of org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy in project beam by apache.

the class FlinkStreamingPortablePipelineTranslator method translateGroupByKey.

private <K, V> void translateGroupByKey(String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
    RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id);
    String inputPCollectionId = Iterables.getOnlyElement(pTransform.getInputsMap().values());
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(pipeline.getComponents());
    RunnerApi.WindowingStrategy windowingStrategyProto = pipeline.getComponents().getWindowingStrategiesOrThrow(pipeline.getComponents().getPcollectionsOrThrow(inputPCollectionId).getWindowingStrategyId());
    WindowingStrategy<?, ?> windowingStrategy;
    try {
        windowingStrategy = WindowingStrategyTranslation.fromProto(windowingStrategyProto, rehydratedComponents);
    } catch (InvalidProtocolBufferException e) {
        throw new IllegalStateException(String.format("Unable to hydrate GroupByKey windowing strategy %s.", windowingStrategyProto), e);
    }
    WindowedValueCoder<KV<K, V>> windowedInputCoder = (WindowedValueCoder) instantiateCoder(inputPCollectionId, pipeline.getComponents());
    DataStream<WindowedValue<KV<K, V>>> inputDataStream = context.getDataStreamOrThrow(inputPCollectionId);
    SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> outputDataStream = addGBK(inputDataStream, windowingStrategy, windowedInputCoder, pTransform.getUniqueName(), context);
    // Assign a unique but consistent id to re-map operator state
    outputDataStream.uid(pTransform.getUniqueName());
    context.addDataStream(Iterables.getOnlyElement(pTransform.getOutputsMap().values()), outputDataStream);
}
Also used : InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) KV(org.apache.beam.sdk.values.KV) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents)

Example 15 with WindowingStrategy

use of org.apache.beam.model.pipeline.v1.RunnerApi.WindowingStrategy in project beam by apache.

the class ExecutableStageDoFnOperatorTest method getOperator.

@SuppressWarnings("rawtypes")
private ExecutableStageDoFnOperator getOperator(TupleTag<Integer> mainOutput, List<TupleTag<?>> additionalOutputs, DoFnOperator.MultiOutputOutputManagerFactory<Integer> outputManagerFactory, WindowingStrategy windowingStrategy, @Nullable Coder keyCoder, Coder windowedInputCoder) {
    FlinkExecutableStageContextFactory contextFactory = Mockito.mock(FlinkExecutableStageContextFactory.class);
    when(contextFactory.get(any())).thenReturn(stageContext);
    final ExecutableStagePayload stagePayload;
    if (keyCoder != null) {
        stagePayload = this.stagePayloadWithUserState;
    } else {
        stagePayload = this.stagePayload;
    }
    ExecutableStageDoFnOperator<Integer, Integer> operator = new ExecutableStageDoFnOperator<>("transform", windowedInputCoder, Collections.emptyMap(), mainOutput, additionalOutputs, outputManagerFactory, Collections.emptyMap(), /* sideInputTagMapping */
    Collections.emptyList(), /* sideInputs */
    Collections.emptyMap(), /* sideInputId mapping */
    FlinkPipelineOptions.defaults(), stagePayload, jobInfo, contextFactory, createOutputMap(mainOutput, additionalOutputs), windowingStrategy, keyCoder, keyCoder != null ? new KvToByteBufferKeySelector<>(keyCoder, null) : null);
    Whitebox.setInternalState(operator, "stateRequestHandler", stateRequestHandler);
    return operator;
}
Also used : ExecutableStagePayload(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload) FlinkExecutableStageContextFactory(org.apache.beam.runners.flink.translation.functions.FlinkExecutableStageContextFactory)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)15 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)9 WindowedValue (org.apache.beam.sdk.util.WindowedValue)7 RehydratedComponents (org.apache.beam.runners.core.construction.RehydratedComponents)6 KvCoder (org.apache.beam.sdk.coders.KvCoder)6 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)6 KV (org.apache.beam.sdk.values.KV)6 InvalidProtocolBufferException (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException)6 IOException (java.io.IOException)5 PCollectionView (org.apache.beam.sdk.values.PCollectionView)5 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)5 List (java.util.List)4 Map (java.util.Map)4 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)4 WindowedValueCoder (org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder)4 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)3 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)3 PipelineTranslatorUtils.getWindowingStrategy (org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.getWindowingStrategy)3 Coder (org.apache.beam.sdk.coders.Coder)3 TupleTag (org.apache.beam.sdk.values.TupleTag)3