Search in sources :

Example 1 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class StreamingTransformTranslator method groupByKey.

private static <K, V, W extends BoundedWindow> TransformEvaluator<GroupByKey<K, V>> groupByKey() {
    return new TransformEvaluator<GroupByKey<K, V>>() {

        @Override
        public void evaluate(GroupByKey<K, V> transform, EvaluationContext context) {
            @SuppressWarnings("unchecked") UnboundedDataset<KV<K, V>> inputDataset = (UnboundedDataset<KV<K, V>>) context.borrowDataset(transform);
            List<Integer> streamSources = inputDataset.getStreamSources();
            JavaDStream<WindowedValue<KV<K, V>>> dStream = inputDataset.getDStream();
            @SuppressWarnings("unchecked") final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
            final SparkRuntimeContext runtimeContext = context.getRuntimeContext();
            @SuppressWarnings("unchecked") final WindowingStrategy<?, W> windowingStrategy = (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy();
            @SuppressWarnings("unchecked") final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn();
            //--- coders.
            final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
            //--- group by key only.
            JavaDStream<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> groupedByKeyStream = dStream.transform(new Function<JavaRDD<WindowedValue<KV<K, V>>>, JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>>>() {

                @Override
                public JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> call(JavaRDD<WindowedValue<KV<K, V>>> rdd) throws Exception {
                    return GroupCombineFunctions.groupByKeyOnly(rdd, coder.getKeyCoder(), wvCoder);
                }
            });
            //--- now group also by window.
            JavaDStream<WindowedValue<KV<K, Iterable<V>>>> outStream = SparkGroupAlsoByWindowViaWindowSet.groupAlsoByWindow(groupedByKeyStream, coder.getKeyCoder(), wvCoder, windowingStrategy, runtimeContext, streamSources);
            context.putDataset(transform, new UnboundedDataset<>(outStream, streamSources));
        }

        @Override
        public String toNativeString() {
            return "groupByKey()";
        }
    };
}
Also used : GroupByKey(org.apache.beam.sdk.transforms.GroupByKey) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) KV(org.apache.beam.sdk.values.KV) WindowedValue(org.apache.beam.sdk.util.WindowedValue) SparkRuntimeContext(org.apache.beam.runners.spark.translation.SparkRuntimeContext) WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) SparkAssignWindowFn(org.apache.beam.runners.spark.translation.SparkAssignWindowFn) KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) TransformEvaluator(org.apache.beam.runners.spark.translation.TransformEvaluator) JavaRDD(org.apache.spark.api.java.JavaRDD) EvaluationContext(org.apache.beam.runners.spark.translation.EvaluationContext)

Example 2 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class KryoCoderTest method testCodingWithKvCoderKeyIsKryoCoder.

@Test
public void testCodingWithKvCoderKeyIsKryoCoder() throws IOException {
    final KryoRegistrar registrar = k -> k.register(TestClass.class);
    final ListCoder<Void> listCoder = ListCoder.of(VoidCoder.of());
    final KvCoder<TestClass, List<Void>> kvCoder = KvCoder.of(KryoCoder.of(OPTIONS, registrar), listCoder);
    final List<Void> inputValue = new ArrayList<>();
    inputValue.add(null);
    inputValue.add(null);
    final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    final TestClass inputKey = new TestClass("something");
    kvCoder.encode(KV.of(inputKey, inputValue), byteArrayOutputStream);
    final KV<TestClass, List<Void>> decoded = kvCoder.decode(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()));
    assertNotNull(decoded);
    assertNotNull(decoded.getKey());
    assertEquals(inputKey, decoded.getKey());
    assertNotNull(decoded.getValue());
    assertEquals(inputValue, decoded.getValue());
}
Also used : KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Assert.assertNotNull(org.junit.Assert.assertNotNull) ObjectInputStream(java.io.ObjectInputStream) ListCoder(org.apache.beam.sdk.coders.ListCoder) IOException(java.io.IOException) Test(org.junit.Test) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Objects(java.util.Objects) CoderException(org.apache.beam.sdk.coders.CoderException) List(java.util.List) ByteArrayInputStream(java.io.ByteArrayInputStream) ObjectOutputStream(java.io.ObjectOutputStream) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Assert.assertEquals(org.junit.Assert.assertEquals) Nullable(org.checkerframework.checker.nullness.qual.Nullable) ByteArrayInputStream(java.io.ByteArrayInputStream) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Test(org.junit.Test)

Example 3 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class KryoCoderTest method testCodingWithKvCoderValueIsKryoCoder.

@Test
public void testCodingWithKvCoderValueIsKryoCoder() throws IOException {
    final KryoRegistrar registrar = k -> k.register(TestClass.class);
    final KvCoder<String, TestClass> kvCoder = KvCoder.of(StringUtf8Coder.of(), KryoCoder.of(OPTIONS, registrar));
    final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    final String inputKey = "key";
    final TestClass inputValue = new TestClass("something");
    kvCoder.encode(KV.of(inputKey, inputValue), byteArrayOutputStream);
    final KV<String, TestClass> decoded = kvCoder.decode(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()));
    assertNotNull(decoded);
    assertNotNull(decoded.getKey());
    assertEquals(inputKey, decoded.getKey());
    assertNotNull(decoded.getValue());
    assertEquals(inputValue, decoded.getValue());
}
Also used : KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Assert.assertNotNull(org.junit.Assert.assertNotNull) ObjectInputStream(java.io.ObjectInputStream) ListCoder(org.apache.beam.sdk.coders.ListCoder) IOException(java.io.IOException) Test(org.junit.Test) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) ArrayList(java.util.ArrayList) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Objects(java.util.Objects) CoderException(org.apache.beam.sdk.coders.CoderException) List(java.util.List) ByteArrayInputStream(java.io.ByteArrayInputStream) ObjectOutputStream(java.io.ObjectOutputStream) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Assert.assertEquals(org.junit.Assert.assertEquals) Nullable(org.checkerframework.checker.nullness.qual.Nullable) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Test(org.junit.Test)

Example 4 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class GroupByKeyTranslatorBatch method translateNode.

@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
    PCollection<KV<K, V>> input = context.getInput(transform);
    BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
    final KvCoder<K, V> coder = (KvCoder<K, V>) input.getCoder();
    Coder<K> inputKeyCoder = coder.getKeyCoder();
    WindowingStrategy windowingStrategy = input.getWindowingStrategy();
    WindowFn<KV<K, V>, BoundedWindow> windowFn = (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
    final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
    KeyedTSet<byte[], byte[]> keyedTSet = inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));
    // todo add support for a partition function to be specified, this would use
    // todo keyedPartition function instead of KeyedGather
    ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>> groupedbyKeyTset = keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));
    // --- now group also by window.
    SystemReduceFnBuffering reduceFnBuffering = new SystemReduceFnBuffering(coder.getValueCoder());
    ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>> outputTset = groupedbyKeyTset.direct().<WindowedValue<KV<K, Iterable<V>>>>flatmap(new GroupByWindowFunction(windowingStrategy, reduceFnBuffering, context.getOptions()));
    PCollection output = context.getOutput(transform);
    context.setOutputDataSet(output, outputTset);
}
Also used : WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) SystemReduceFnBuffering(org.apache.beam.runners.twister2.translators.functions.internal.SystemReduceFnBuffering) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) PCollection(org.apache.beam.sdk.values.PCollection) ByteToWindowFunction(org.apache.beam.runners.twister2.translators.functions.ByteToWindowFunction) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) Iterator(java.util.Iterator) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) GroupByWindowFunction(org.apache.beam.runners.twister2.translators.functions.GroupByWindowFunction)

Example 5 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class SparkSideInputReader method initializeBroadcastVariable.

private <T> Map<BoundedWindow, T> initializeBroadcastVariable(Iterable<WindowedValue<?>> inputValues, PCollectionView<T> view) {
    // first partition into windows
    Map<BoundedWindow, List<WindowedValue<?>>> partitionedElements = new HashMap<>();
    for (WindowedValue<?> value : inputValues) {
        for (BoundedWindow window : value.getWindows()) {
            List<WindowedValue<?>> windowedValues = partitionedElements.computeIfAbsent(window, k -> new ArrayList<>());
            windowedValues.add(value);
        }
    }
    Map<BoundedWindow, T> resultMap = new HashMap<>();
    for (Map.Entry<BoundedWindow, List<WindowedValue<?>>> elements : partitionedElements.entrySet()) {
        switch(view.getViewFn().getMaterialization().getUrn()) {
            case Materializations.ITERABLE_MATERIALIZATION_URN:
                {
                    ViewFn<IterableView, T> viewFn = (ViewFn<IterableView, T>) view.getViewFn();
                    resultMap.put(elements.getKey(), viewFn.apply(() -> elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList())));
                }
                break;
            case Materializations.MULTIMAP_MATERIALIZATION_URN:
                {
                    ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
                    Coder<?> keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
                    resultMap.put(elements.getKey(), viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList()))));
                }
                break;
            default:
                throw new IllegalStateException(String.format("Unknown side input materialization format requested '%s'", view.getViewFn().getMaterialization().getUrn()));
        }
    }
    return resultMap;
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) KvCoder(org.apache.beam.sdk.coders.KvCoder) IterableView(org.apache.beam.sdk.transforms.Materializations.IterableView) HashMap(java.util.HashMap) MultimapView(org.apache.beam.sdk.transforms.Materializations.MultimapView) ViewFn(org.apache.beam.sdk.transforms.ViewFn) WindowedValue(org.apache.beam.sdk.util.WindowedValue) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

KvCoder (org.apache.beam.sdk.coders.KvCoder)41 Coder (org.apache.beam.sdk.coders.Coder)25 WindowedValue (org.apache.beam.sdk.util.WindowedValue)23 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)18 KV (org.apache.beam.sdk.values.KV)18 Map (java.util.Map)16 List (java.util.List)15 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)15 IOException (java.io.IOException)14 ArrayList (java.util.ArrayList)14 HashMap (java.util.HashMap)13 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)11 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)11 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)10 VoidCoder (org.apache.beam.sdk.coders.VoidCoder)10 PCollectionView (org.apache.beam.sdk.values.PCollectionView)10 Test (org.junit.Test)10 WindowedValueCoder (org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder)8 TupleTag (org.apache.beam.sdk.values.TupleTag)7 Nullable (org.checkerframework.checker.nullness.qual.Nullable)7