use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.
the class StreamingTransformTranslator method groupByKey.
private static <K, V, W extends BoundedWindow> TransformEvaluator<GroupByKey<K, V>> groupByKey() {
return new TransformEvaluator<GroupByKey<K, V>>() {
@Override
public void evaluate(GroupByKey<K, V> transform, EvaluationContext context) {
@SuppressWarnings("unchecked") UnboundedDataset<KV<K, V>> inputDataset = (UnboundedDataset<KV<K, V>>) context.borrowDataset(transform);
List<Integer> streamSources = inputDataset.getStreamSources();
JavaDStream<WindowedValue<KV<K, V>>> dStream = inputDataset.getDStream();
@SuppressWarnings("unchecked") final KvCoder<K, V> coder = (KvCoder<K, V>) context.getInput(transform).getCoder();
final SparkRuntimeContext runtimeContext = context.getRuntimeContext();
@SuppressWarnings("unchecked") final WindowingStrategy<?, W> windowingStrategy = (WindowingStrategy<?, W>) context.getInput(transform).getWindowingStrategy();
@SuppressWarnings("unchecked") final WindowFn<Object, W> windowFn = (WindowFn<Object, W>) windowingStrategy.getWindowFn();
//--- coders.
final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
//--- group by key only.
JavaDStream<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> groupedByKeyStream = dStream.transform(new Function<JavaRDD<WindowedValue<KV<K, V>>>, JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>>>() {
@Override
public JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> call(JavaRDD<WindowedValue<KV<K, V>>> rdd) throws Exception {
return GroupCombineFunctions.groupByKeyOnly(rdd, coder.getKeyCoder(), wvCoder);
}
});
//--- now group also by window.
JavaDStream<WindowedValue<KV<K, Iterable<V>>>> outStream = SparkGroupAlsoByWindowViaWindowSet.groupAlsoByWindow(groupedByKeyStream, coder.getKeyCoder(), wvCoder, windowingStrategy, runtimeContext, streamSources);
context.putDataset(transform, new UnboundedDataset<>(outStream, streamSources));
}
@Override
public String toNativeString() {
return "groupByKey()";
}
};
}
use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.
the class KryoCoderTest method testCodingWithKvCoderKeyIsKryoCoder.
@Test
public void testCodingWithKvCoderKeyIsKryoCoder() throws IOException {
final KryoRegistrar registrar = k -> k.register(TestClass.class);
final ListCoder<Void> listCoder = ListCoder.of(VoidCoder.of());
final KvCoder<TestClass, List<Void>> kvCoder = KvCoder.of(KryoCoder.of(OPTIONS, registrar), listCoder);
final List<Void> inputValue = new ArrayList<>();
inputValue.add(null);
inputValue.add(null);
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
final TestClass inputKey = new TestClass("something");
kvCoder.encode(KV.of(inputKey, inputValue), byteArrayOutputStream);
final KV<TestClass, List<Void>> decoded = kvCoder.decode(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()));
assertNotNull(decoded);
assertNotNull(decoded.getKey());
assertEquals(inputKey, decoded.getKey());
assertNotNull(decoded.getValue());
assertEquals(inputValue, decoded.getValue());
}
use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.
the class KryoCoderTest method testCodingWithKvCoderValueIsKryoCoder.
@Test
public void testCodingWithKvCoderValueIsKryoCoder() throws IOException {
final KryoRegistrar registrar = k -> k.register(TestClass.class);
final KvCoder<String, TestClass> kvCoder = KvCoder.of(StringUtf8Coder.of(), KryoCoder.of(OPTIONS, registrar));
final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
final String inputKey = "key";
final TestClass inputValue = new TestClass("something");
kvCoder.encode(KV.of(inputKey, inputValue), byteArrayOutputStream);
final KV<String, TestClass> decoded = kvCoder.decode(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()));
assertNotNull(decoded);
assertNotNull(decoded.getKey());
assertEquals(inputKey, decoded.getKey());
assertNotNull(decoded.getValue());
assertEquals(inputValue, decoded.getValue());
}
use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.
the class GroupByKeyTranslatorBatch method translateNode.
@Override
public void translateNode(GroupByKey<K, V> transform, Twister2BatchTranslationContext context) {
PCollection<KV<K, V>> input = context.getInput(transform);
BatchTSetImpl<WindowedValue<KV<K, V>>> inputTTSet = context.getInputDataSet(input);
final KvCoder<K, V> coder = (KvCoder<K, V>) input.getCoder();
Coder<K> inputKeyCoder = coder.getKeyCoder();
WindowingStrategy windowingStrategy = input.getWindowingStrategy();
WindowFn<KV<K, V>, BoundedWindow> windowFn = (WindowFn<KV<K, V>, BoundedWindow>) windowingStrategy.getWindowFn();
final WindowedValue.WindowedValueCoder<V> wvCoder = WindowedValue.FullWindowedValueCoder.of(coder.getValueCoder(), windowFn.windowCoder());
KeyedTSet<byte[], byte[]> keyedTSet = inputTTSet.mapToTuple(new MapToTupleFunction<K, V>(inputKeyCoder, wvCoder));
// todo add support for a partition function to be specified, this would use
// todo keyedPartition function instead of KeyedGather
ComputeTSet<KV<K, Iterable<WindowedValue<V>>>, Iterator<Tuple<byte[], Iterator<byte[]>>>> groupedbyKeyTset = keyedTSet.keyedGather().map(new ByteToWindowFunction(inputKeyCoder, wvCoder));
// --- now group also by window.
SystemReduceFnBuffering reduceFnBuffering = new SystemReduceFnBuffering(coder.getValueCoder());
ComputeTSet<WindowedValue<KV<K, Iterable<V>>>, Iterable<KV<K, Iterator<WindowedValue<V>>>>> outputTset = groupedbyKeyTset.direct().<WindowedValue<KV<K, Iterable<V>>>>flatmap(new GroupByWindowFunction(windowingStrategy, reduceFnBuffering, context.getOptions()));
PCollection output = context.getOutput(transform);
context.setOutputDataSet(output, outputTset);
}
use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.
the class SparkSideInputReader method initializeBroadcastVariable.
private <T> Map<BoundedWindow, T> initializeBroadcastVariable(Iterable<WindowedValue<?>> inputValues, PCollectionView<T> view) {
// first partition into windows
Map<BoundedWindow, List<WindowedValue<?>>> partitionedElements = new HashMap<>();
for (WindowedValue<?> value : inputValues) {
for (BoundedWindow window : value.getWindows()) {
List<WindowedValue<?>> windowedValues = partitionedElements.computeIfAbsent(window, k -> new ArrayList<>());
windowedValues.add(value);
}
}
Map<BoundedWindow, T> resultMap = new HashMap<>();
for (Map.Entry<BoundedWindow, List<WindowedValue<?>>> elements : partitionedElements.entrySet()) {
switch(view.getViewFn().getMaterialization().getUrn()) {
case Materializations.ITERABLE_MATERIALIZATION_URN:
{
ViewFn<IterableView, T> viewFn = (ViewFn<IterableView, T>) view.getViewFn();
resultMap.put(elements.getKey(), viewFn.apply(() -> elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList())));
}
break;
case Materializations.MULTIMAP_MATERIALIZATION_URN:
{
ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn();
Coder<?> keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder();
resultMap.put(elements.getKey(), viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements.getValue().stream().map(WindowedValue::getValue).collect(Collectors.toList()))));
}
break;
default:
throw new IllegalStateException(String.format("Unknown side input materialization format requested '%s'", view.getViewFn().getMaterialization().getUrn()));
}
}
return resultMap;
}
Aggregations