use of org.apache.beam.runners.flink.translation.wrappers.streaming.WorkItemKeySelector in project beam by apache.
the class FlinkStreamingPortablePipelineTranslator method addGBK.
private <K, V> SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<V>>>> addGBK(DataStream<WindowedValue<KV<K, V>>> inputDataStream, WindowingStrategy<?, ?> windowingStrategy, WindowedValueCoder<KV<K, V>> windowedInputCoder, String operatorName, StreamingTranslationContext context) {
KvCoder<K, V> inputElementCoder = (KvCoder<K, V>) windowedInputCoder.getValueCoder();
SingletonKeyedWorkItemCoder<K, V> workItemCoder = SingletonKeyedWorkItemCoder.of(inputElementCoder.getKeyCoder(), inputElementCoder.getValueCoder(), windowingStrategy.getWindowFn().windowCoder());
WindowedValue.FullWindowedValueCoder<KeyedWorkItem<K, V>> windowedWorkItemCoder = WindowedValue.getFullCoder(workItemCoder, windowingStrategy.getWindowFn().windowCoder());
CoderTypeInformation<WindowedValue<KeyedWorkItem<K, V>>> workItemTypeInfo = new CoderTypeInformation<>(windowedWorkItemCoder, context.getPipelineOptions());
DataStream<WindowedValue<KeyedWorkItem<K, V>>> workItemStream = inputDataStream.flatMap(new FlinkStreamingTransformTranslators.ToKeyedWorkItem<>(context.getPipelineOptions())).returns(workItemTypeInfo).name("ToKeyedWorkItem");
WorkItemKeySelector<K, V> keySelector = new WorkItemKeySelector<>(inputElementCoder.getKeyCoder(), new SerializablePipelineOptions(context.getPipelineOptions()));
KeyedStream<WindowedValue<KeyedWorkItem<K, V>>, ByteBuffer> keyedWorkItemStream = workItemStream.keyBy(keySelector);
SystemReduceFn<K, V, Iterable<V>, Iterable<V>, BoundedWindow> reduceFn = SystemReduceFn.buffering(inputElementCoder.getValueCoder());
Coder<Iterable<V>> accumulatorCoder = IterableCoder.of(inputElementCoder.getValueCoder());
Coder<WindowedValue<KV<K, Iterable<V>>>> outputCoder = WindowedValue.getFullCoder(KvCoder.of(inputElementCoder.getKeyCoder(), accumulatorCoder), windowingStrategy.getWindowFn().windowCoder());
TypeInformation<WindowedValue<KV<K, Iterable<V>>>> outputTypeInfo = new CoderTypeInformation<>(outputCoder, context.getPipelineOptions());
TupleTag<KV<K, Iterable<V>>> mainTag = new TupleTag<>("main output");
WindowDoFnOperator<K, V, Iterable<V>> doFnOperator = new WindowDoFnOperator<>(reduceFn, operatorName, windowedWorkItemCoder, mainTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(mainTag, outputCoder, new SerializablePipelineOptions(context.getPipelineOptions())), windowingStrategy, new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
context.getPipelineOptions(), inputElementCoder.getKeyCoder(), keySelector);
return keyedWorkItemStream.transform(operatorName, outputTypeInfo, doFnOperator);
}
Aggregations