use of org.apache.beam.runners.twister2.translators.functions.ByteToWindowFunction in project beam by apache.
the class GroupByKeyTranslatorBatch method translateNode.
@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
PCollection<KV<K, V>> input = context.getInput(transform);
BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
final KvCoder<K, V> coder = (KvCoder<K, V>) input.getCoder();
Coder<K> inputKeyCoder = coder.getKeyCoder();
WindowingStrategy windowingStrategy = input.getWindowingStrategy();
WindowFn<KV<K, V>, BoundedWindow> windowFn = (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
KeyedTSet<byte[], byte[]> keyedTSet = inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));
// todo add support for a partition function to be specified, this would use
// todo keyedPartition function instead of KeyedGather
ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>> groupedbyKeyTset = keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));
// --- now group also by window.
SystemReduceFnBuffering reduceFnBuffering = new SystemReduceFnBuffering(coder.getValueCoder());
ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>> outputTset = groupedbyKeyTset.direct().<WindowedValue<KV<K, Iterable<V>>>>flatmap(new GroupByWindowFunction(windowingStrategy, reduceFnBuffering, context.getOptions()));
PCollection output = context.getOutput(transform);
context.setOutputDataSet(output, outputTset);
}
use of org.apache.beam.runners.twister2.translators.functions.ByteToWindowFunction in project twister2 by DSC-SPIDAL.
the class GroupByKeyTranslatorBatch method translateNode.
@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
PCollection<KV<K, V>> input = context.getInput(transform);
BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
Coder<K> inputKeyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder();
WindowingStrategy windowingStrategy = input.getWindowingStrategy();
WindowFn<KV<K, V>, BoundedWindow> windowFn = (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
KeyedTSet<byte[], byte[]> keyedTSet = inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));
// todo add support for a partition function to be specified, this would use
// todo keyedPartition function instead of KeyedGather
ComputeTSet<KV<K, Iterable<WindowedValue<V>>>> groupedbyKeyTset = keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));
// --- now group also by window.
ComputeTSet<WindowedValue<KV<K, Iterable<V>>>> outputTset = groupedbyKeyTset.direct().<WindowedValue<KV<K, Iterable<V>>>>flatmap(new GroupByWindowFunction(windowingStrategy, SystemReduceFn.buffering(coder.getValueCoder())));
PCollection output = context.getOutput(transform);
context.setOutputDataSet(output, outputTset);
}
Aggregations