Search in sources :

Example 41 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class PCollectionViewTranslatorBatch method translateNode.

@Override
public void translateNode(View.CreatePCollectionView<ElemT, ViewT> transform, Twister2BatchTranslationContext context) {
    BatchTSet<WindowedValue<ElemT>> inputDataSet = context.getInputDataSet(context.getInput(transform));
    @SuppressWarnings("unchecked") AppliedPTransform<PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>> application = (AppliedPTransform<PCollection<ElemT>, PCollection<ElemT>, PTransform<PCollection<ElemT>, PCollection<ElemT>>>) context.getCurrentTransform();
    org.apache.beam.sdk.values.PCollectionView<ViewT> input;
    PCollection<ElemT> inputPCol = context.getInput(transform);
    final Coder coder = inputPCol.getCoder();
    WindowingStrategy windowingStrategy = inputPCol.getWindowingStrategy();
    WindowFn windowFn = windowingStrategy.getWindowFn();
    try {
        input = CreatePCollectionViewTranslation.getView(application);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    switch(input.getViewFn().getMaterialization().getUrn()) {
        case Materializations.MULTIMAP_MATERIALIZATION_URN:
            KvCoder kvCoder = (KvCoder<?, ?>) coder;
            final Coder keyCoder = kvCoder.getKeyCoder();
            final WindowedValue.WindowedValueCoder kvwvCoder = WindowedValue.FullWindowedValueCoder.of(kvCoder.getValueCoder(), windowFn.windowCoder());
            BatchTSet<WindowedValue<ElemT>> multimapMaterialization = inputDataSet.direct().map(new MapToTupleFunction<>(keyCoder, kvwvCoder)).allGather().map(new ByteToWindowFunctionPrimitive(keyCoder, kvwvCoder));
            context.setSideInputDataSet(input.getTagInternal().getId(), multimapMaterialization);
            break;
        case Materializations.ITERABLE_MATERIALIZATION_URN:
            final WindowedValue.WindowedValueCoder wvCoder = WindowedValue.FullWindowedValueCoder.of(coder, windowFn.windowCoder());
            BatchTSet<WindowedValue<ElemT>> iterableMaterialization = inputDataSet.direct().map(new ElemToBytesFunction<>(wvCoder)).allGather().map(new ByteToElemFunction(wvCoder));
            try {
                input = CreatePCollectionViewTranslation.getView(application);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            context.setSideInputDataSet(input.getTagInternal().getId(), iterableMaterialization);
            break;
        default:
            throw new UnsupportedOperationException("Unknown side input materialization " + input.getViewFn().getMaterialization().getUrn());
    }
}
Also used : WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) WindowedValue(org.apache.beam.sdk.util.WindowedValue) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) PTransform(org.apache.beam.sdk.transforms.PTransform) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) ByteToElemFunction(org.apache.beam.runners.twister2.translators.functions.ByteToElemFunction) WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) KvCoder(org.apache.beam.sdk.coders.KvCoder) IOException(java.io.IOException) PCollection(org.apache.beam.sdk.values.PCollection) ByteToWindowFunctionPrimitive(org.apache.beam.runners.twister2.translators.functions.ByteToWindowFunctionPrimitive)

Example 42 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project twister2 by DSC-SPIDAL.

the class Twister2SideInputReader method getSideInput.

private <T> T getSideInput(PCollectionView<T> view, BoundedWindow window) {
    Map<BoundedWindow, List<WindowedValue<KV<?, ?>>>> partitionedElements = new HashMap<>();
    DataPartition<?> sideInput = runtimeContext.getInput(view.getTagInternal().getId());
    DataPartitionConsumer<?> dataPartitionConsumer = sideInput.getConsumer();
    while (dataPartitionConsumer.hasNext()) {
        WindowedValue<KV<?, ?>> winValue = (WindowedValue<KV<?, ?>>) dataPartitionConsumer.next();
        for (BoundedWindow tbw : winValue.getWindows()) {
            List<WindowedValue<KV<?, ?>>> windowedValues = partitionedElements.computeIfAbsent(tbw, k -> new ArrayList<>());
            windowedValues.add(winValue);
        }
    }
    Map<BoundedWindow, T> resultMap = new HashMap<>();
    for (Map.Entry<BoundedWindow, List<WindowedValue<KV<?, ?>>>> elements : partitionedElements.entrySet()) {
        ViewFn<Materializations.MultimapView, T> viewFn = (ViewFn<Materializations.MultimapView, T>) view.getViewFn();
        Coder keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
        resultMap.put(elements.getKey(), (T) viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList()))));
    }
    T result = resultMap.get(window);
    if (result == null) {
        ViewFn<Materializations.MultimapView, T> viewFn = (ViewFn<Materializations.MultimapView, T>) view.getViewFn();
        result = viewFn.apply(EMPTY_MULTMAP_VIEW);
    }
    return result;
}
Also used : KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) Materializations(org.apache.beam.sdk.transforms.Materializations) ViewFn(org.apache.beam.sdk.transforms.ViewFn) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 43 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project twister2 by DSC-SPIDAL.

the class GroupByKeyTranslatorBatch method translateNode.

@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
    PCollection<KV<K, V>> input = context.getInput(transform);
    BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
    final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
    Coder<K> inputKeyCoder = ((KvCoder<K, V>) input.getCoder()).getKeyCoder();
    WindowingStrategy windowingStrategy = input.getWindowingStrategy();
    WindowFn<KV<K, V>, BoundedWindow> windowFn = (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
    final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
    KeyedTSet<byte[], byte[]> keyedTSet = inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));
    // todo add support for a partition function to be specified, this would use
    // todo keyedPartition function instead of KeyedGather
    ComputeTSet<KV<K, Iterable<WindowedValue<V>>>> groupedbyKeyTset = keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));
    // --- now group also by window.
    ComputeTSet<WindowedValue<KV<K, Iterable<V>>>> outputTset = groupedbyKeyTset.direct().<WindowedValue<KV<K, Iterable<V>>>>flatmap(new GroupByWindowFunction(windowingStrategy, SystemReduceFn.buffering(coder.getValueCoder())));
    PCollection output = context.getOutput(transform);
    context.setOutputDataSet(output, outputTset);
}
Also used : WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) PCollection(org.apache.beam.sdk.values.PCollection) ByteToWindowFunction(org.apache.beam.runners.twister2.translators.functions.ByteToWindowFunction) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) GroupByWindowFunction(org.apache.beam.runners.twister2.translators.functions.GroupByWindowFunction)

Example 44 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project proxima-platform by O2-Czech-Republic.

the class BeamStream method integratePerKey.

@Override
public <K, V> Stream<Pair<K, V>> integratePerKey(@Nullable String name, Closure<K> keyExtractor, Closure<V> valueExtractor, Closure<V> initialValue, Closure<V> combiner) {
    Closure<K> keyDehydrated = dehydrate(keyExtractor);
    Closure<V> valueDehydrated = dehydrate(valueExtractor);
    Closure<V> combinerDehydrated = dehydrate(combiner);
    Closure<V> initialValueDehydrated = dehydrate(initialValue);
    return descendant(pipeline -> {
        PCollection<T> in = collection.materialize(pipeline);
        Coder<K> keyCoder = coderOf(pipeline, keyDehydrated);
        Coder<V> valueCoder = coderOf(pipeline, valueDehydrated);
        if (!in.getWindowingStrategy().equals(windowingStrategy)) {
            @SuppressWarnings("unchecked") WindowingStrategy<T, ?> strategy = (WindowingStrategy<T, ?>) windowingStrategy;
            in = in.apply(withWindowingStrategy(strategy));
        }
        PCollection<KV<K, V>> kvs = MapElements.named(withSuffix(name, ".mapToKv")).of(in).using(e -> KV.of(keyDehydrated.call(e), valueDehydrated.call(e))).output().setCoder(KvCoder.of(keyCoder, valueCoder));
        KvCoder<K, V> coder = (KvCoder<K, V>) kvs.getCoder();
        PCollection<Pair<K, V>> ret = kvs.apply(ParDo.of(IntegrateDoFn.of(combinerDehydrated, initialValueDehydrated, coder))).setCoder(PairCoder.of(keyCoder, valueCoder));
        if (!ret.getWindowingStrategy().equals(WindowingStrategy.globalDefault())) {
            ret = ret.apply(Window.into(new GlobalWindows()));
        }
        return ret;
    });
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) KV(org.apache.beam.sdk.values.KV) Pair(cz.o2.proxima.util.Pair)

Aggregations

KvCoder (org.apache.beam.sdk.coders.KvCoder)44 Coder (org.apache.beam.sdk.coders.Coder)26 WindowedValue (org.apache.beam.sdk.util.WindowedValue)25 KV (org.apache.beam.sdk.values.KV)21 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)20 Map (java.util.Map)17 List (java.util.List)16 ArrayList (java.util.ArrayList)15 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)15 IOException (java.io.IOException)14 HashMap (java.util.HashMap)14 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)13 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)11 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)10 VoidCoder (org.apache.beam.sdk.coders.VoidCoder)10 PCollectionView (org.apache.beam.sdk.values.PCollectionView)10 Test (org.junit.Test)10 WindowedValueCoder (org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder)8 ViewFn (org.apache.beam.sdk.transforms.ViewFn)7 PCollection (org.apache.beam.sdk.values.PCollection)7