Search in sources :

Example 1 with WorkItemKeySelector

use of org.apache.beam.runners.flink.translation.wrappers.streaming.WorkItemKeySelector in project beam by apache.

the class FlinkStreamingPortablePipelineTranslator method addGBK.

private <K, V> SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> addGBK(DataStream<WindowedValue<KV<K, V>>> inputDataStream, WindowingStrategy<?, ?> windowingStrategy, WindowedValueCoder<KV<K, V>> windowedInputCoder, String operatorName, StreamingTranslationContext context) {
    KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) windowedInputCoder.getValueCoder();
    SingletonKeyedWorkItemCoder<K, V> workItemCoder = SingletonKeyedWorkItemCoder.of(inputElementCoder.getKeyCoder(), inputElementCoder.getValueCoder(), windowingStrategy.getWindowFn().windowCoder());
    WindowedValue.FullWindowedValueCoder<KeyedWorkItem<K, V>> windowedWorkItemCoder = WindowedValue.getFullCoder(workItemCoder, windowingStrategy.getWindowFn().windowCoder());
    CoderTypeInformation<WindowedValue<KeyedWorkItem<K, V>>> workItemTypeInfo = new CoderTypeInformation<>(windowedWorkItemCoder, context.getPipelineOptions());
    DataStream<WindowedValue<KeyedWorkItem<K, V>>> workItemStream = inputDataStream.flatMap(new FlinkStreamingTransformTranslators.ToKeyedWorkItem<>(context.getPipelineOptions())).returns(workItemTypeInfo).name("ToKeyedWorkItem");
    WorkItemKeySelector<K, V> keySelector = new WorkItemKeySelector<>(inputElementCoder.getKeyCoder(), new SerializablePipelineOptions(context.getPipelineOptions()));
    KeyedStream<WindowedValue<KeyedWorkItem<K, V>>, ByteBuffer> keyedWorkItemStream = workItemStream.keyBy(keySelector);
    SystemReduceFn<K, V, Iterable<V>, Iterable<V>, BoundedWindow> reduceFn = SystemReduceFn.buffering(inputElementCoder.getValueCoder());
    Coder<Iterable<V>> accumulatorCoder = IterableCoder.of(inputElementCoder.getValueCoder());
    Coder<WindowedValue<KV<K, Iterable<V>>>> outputCoder = WindowedValue.getFullCoder(KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder), windowingStrategy.getWindowFn().windowCoder());
    TypeInformation<WindowedValue<KV<K, Iterable<V>>>> outputTypeInfo = new CoderTypeInformation<>(outputCoder, context.getPipelineOptions());
    TupleTag<KV<K, Iterable<V>>> mainTag = new TupleTag<>("main output");
    WindowDoFnOperator<K, V, Iterable<V>> doFnOperator = new WindowDoFnOperator<>(reduceFn, operatorName, windowedWorkItemCoder, mainTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, outputCoder, new SerializablePipelineOptions(context.getPipelineOptions())), windowingStrategy, new HashMap<>(), /* side-input mapping */
    Collections.emptyList(), /* side inputs */
    context.getPipelineOptions(), inputElementCoder.getKeyCoder(), keySelector);
    return keyedWorkItemStream.transform(operatorName, outputTypeInfo, doFnOperator);
}
Also used : WorkItemKeySelector(org.apache.beam.runners.flink.translation.wrappers.streaming.WorkItemKeySelector) TupleTag(org.apache.beam.sdk.values.TupleTag) WindowDoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.WindowDoFnOperator) DoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator) ExecutableStageDoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.ExecutableStageDoFnOperator) KV(org.apache.beam.sdk.values.KV) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) CoderTypeInformation(org.apache.beam.runners.flink.translation.types.CoderTypeInformation) WindowDoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.WindowDoFnOperator) KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) ByteBuffer(java.nio.ByteBuffer)

Aggregations

ByteBuffer (java.nio.ByteBuffer)1 KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)1 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)1 CoderTypeInformation (org.apache.beam.runners.flink.translation.types.CoderTypeInformation)1 DoFnOperator (org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator)1 ExecutableStageDoFnOperator (org.apache.beam.runners.flink.translation.wrappers.streaming.ExecutableStageDoFnOperator)1 WindowDoFnOperator (org.apache.beam.runners.flink.translation.wrappers.streaming.WindowDoFnOperator)1 WorkItemKeySelector (org.apache.beam.runners.flink.translation.wrappers.streaming.WorkItemKeySelector)1 KvCoder (org.apache.beam.sdk.coders.KvCoder)1 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)1 WindowedValue (org.apache.beam.sdk.util.WindowedValue)1 KV (org.apache.beam.sdk.values.KV)1 TupleTag (org.apache.beam.sdk.values.TupleTag)1