use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.
the class PCollectionViewTranslatorBatch method translateNode.
@Override
public void translateNode(View.CreatePCollectionView<ElemT, ViewT> transform, Twister2BatchTranslationContext context) {
BatchTSet<WindowedValue<ElemT>> inputDataSet = context.getInputDataSet(context.getInput(transform));
@SuppressWarnings("unchecked") AppliedPTransform<PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>> application = (AppliedPTransform<PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>>) context.getCurrentTransform();
org.apache.beam.sdk.values.PCollectionView<ViewT> input;
PCollection<ElemT> inputPCol = context.getInput(transform);
final Coder coder = inputPCol.getCoder();
WindowingStrategy windowingStrategy = inputPCol.getWindowingStrategy();
WindowFn windowFn = windowingStrategy.getWindowFn();
try {
input = CreatePCollectionViewTranslation.getView(application);
} catch (IOException e) {
throw new RuntimeException(e);
}
switch(input.getViewFn().getMaterialization().getUrn()) {
case Materializations.MULTIMAP_MATERIALIZATION_URN:
KvCoder kvCoder = (KvCoder<?, ?>) coder;
final Coder keyCoder = kvCoder.getKeyCoder();
final WindowedValue.WindowedValueCoder kvwvCoder = WindowedValue.FullWindowedValueCoder.of(kvCoder.getValueCoder(), windowFn.windowCoder());
BatchTSet<WindowedValue<ElemT>> multimapMaterialization = inputDataSet.direct().map(new MapToTupleFunction<>(keyCoder, kvwvCoder)).allGather().map(new ByteToWindowFunctionPrimitive(keyCoder, kvwvCoder));
context.setSideInputDataSet(input.getTagInternal().getId(), multimapMaterialization);
break;
case Materializations.ITERABLE_MATERIALIZATION_URN:
final WindowedValue.WindowedValueCoder wvCoder = WindowedValue.FullWindowedValueCoder.of(coder, windowFn.windowCoder());
BatchTSet<WindowedValue<ElemT>> iterableMaterialization = inputDataSet.direct().map(new ElemToBytesFunction<>(wvCoder)).allGather().map(new ByteToElemFunction(wvCoder));
try {
input = CreatePCollectionViewTranslation.getView(application);
} catch (IOException e) {
throw new RuntimeException(e);
}
context.setSideInputDataSet(input.getTagInternal().getId(), iterableMaterialization);
break;
default:
throw new UnsupportedOperationException("Unknown side input materialization " + input.getViewFn().getMaterialization().getUrn());
}
}
use of org.apache.beam.sdk.coders.KvCoder in project twister2 by DSC-SPIDAL.
the class Twister2SideInputReader method getSideInput.
private <T> T getSideInput(PCollectionView<T> view, BoundedWindow window) {
Map<BoundedWindow, List<WindowedValue<KV<?, ?>>>> partitionedElements = new HashMap<>();
DataPartition<?> sideInput = runtimeContext.getInput(view.getTagInternal().getId());
DataPartitionConsumer<?> dataPartitionConsumer = sideInput.getConsumer();
while (dataPartitionConsumer.hasNext()) {
WindowedValue<KV<?, ?>> winValue = (WindowedValue<KV<?, ?>>) dataPartitionConsumer.next();
for (BoundedWindow tbw : winValue.getWindows()) {
List<WindowedValue<KV<?, ?>>> windowedValues = partitionedElements.computeIfAbsent(tbw, k -> new ArrayList<>());
windowedValues.add(winValue);
}
}
Map<BoundedWindow, T> resultMap = new HashMap<>();
for (Map.Entry<BoundedWindow, List<WindowedValue<KV<?, ?>>>> elements : partitionedElements.entrySet()) {
ViewFn<Materializations.MultimapView, T> viewFn = (ViewFn<Materializations.MultimapView, T>) view.getViewFn();
Coder keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
resultMap.put(elements.getKey(), (T) viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList()))));
}
T result = resultMap.get(window);
if (result == null) {
ViewFn<Materializations.MultimapView, T> viewFn = (ViewFn<Materializations.MultimapView, T>) view.getViewFn();
result = viewFn.apply(EMPTY_MULTMAP_VIEW);
}
return result;
}
use of org.apache.beam.sdk.coders.KvCoder in project twister2 by DSC-SPIDAL.
the class GroupByKeyTranslatorBatch method translateNode.
@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
PCollection<KV<K, V>> input = context.getInput(transform);
BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
Coder<K> inputKeyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder();
WindowingStrategy windowingStrategy = input.getWindowingStrategy();
WindowFn<KV<K, V>, BoundedWindow> windowFn = (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
KeyedTSet<byte[], byte[]> keyedTSet = inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));
// todo add support for a partition function to be specified, this would use
// todo keyedPartition function instead of KeyedGather
ComputeTSet<KV<K, Iterable<WindowedValue<V>>>> groupedbyKeyTset = keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));
// --- now group also by window.
ComputeTSet<WindowedValue<KV<K, Iterable<V>>>> outputTset = groupedbyKeyTset.direct().<WindowedValue<KV<K, Iterable<V>>>>flatmap(new GroupByWindowFunction(windowingStrategy, SystemReduceFn.buffering(coder.getValueCoder())));
PCollection output = context.getOutput(transform);
context.setOutputDataSet(output, outputTset);
}
use of org.apache.beam.sdk.coders.KvCoder in project proxima-platform by O2-Czech-Republic.
the class BeamStream method integratePerKey.
@Override
public <K, V> Stream<Pair<K, V>> integratePerKey(@Nullable String name, Closure<K> keyExtractor, Closure<V> valueExtractor, Closure<V> initialValue, Closure<V> combiner) {
Closure<K> keyDehydrated = dehydrate(keyExtractor);
Closure<V> valueDehydrated = dehydrate(valueExtractor);
Closure<V> combinerDehydrated = dehydrate(combiner);
Closure<V> initialValueDehydrated = dehydrate(initialValue);
return descendant(pipeline -> {
PCollection<T> in = collection.materialize(pipeline);
Coder<K> keyCoder = coderOf(pipeline, keyDehydrated);
Coder<V> valueCoder = coderOf(pipeline, valueDehydrated);
if (!in.getWindowingStrategy().equals(windowingStrategy)) {
@SuppressWarnings("unchecked") WindowingStrategy<T, ?> strategy = (WindowingStrategy<T, ?>) windowingStrategy;
in = in.apply(withWindowingStrategy(strategy));
}
PCollection<KV<K, V>> kvs = MapElements.named(withSuffix(name, ".mapToKv")).of(in).using(e -> KV.of(keyDehydrated.call(e), valueDehydrated.call(e))).output().setCoder(KvCoder.of(keyCoder, valueCoder));
KvCoder<K, V> coder = (KvCoder<K, V>) kvs.getCoder();
PCollection<Pair<K, V>> ret = kvs.apply(ParDo.of(IntegrateDoFn.of(combinerDehydrated, initialValueDehydrated, coder))).setCoder(PairCoder.of(keyCoder, valueCoder));
if (!ret.getWindowingStrategy().equals(WindowingStrategy.globalDefault())) {
ret = ret.apply(Window.into(new GlobalWindows()));
}
return ret;
});
}
Aggregations