use of org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testPartialGroupByKey.
@Test
public void testPartialGroupByKey() throws Exception {
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, IterableCoder.of(valueCoder)))), counterSet, NameContextsForTests.nameContextForTest());
ParDoFn pgbkParDoFn = new SimplePartialGroupByKeyParDoFn(GroupingTables.buffering(new WindowingCoderGroupingKeyCreator(keyCoder), PairInfo.create(), new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)), new CoderSizeEstimator(valueCoder)), receiver);
pgbkParDoFn.startBundle(receiver);
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 5)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));
pgbkParDoFn.finishBundle();
assertThat(receiver.outputElems, IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(WindowedValue.valueInGlobalWindow(KV.of("hi", Arrays.asList(4, 6, 9))), WindowedValue.valueInGlobalWindow(KV.of("there", Arrays.asList(5, 8))), WindowedValue.valueInGlobalWindow(KV.of("joe", Arrays.asList(7)))));
// Exact counter values depend on size of encoded data. If encoding
// changes, then these expected counters should change to match.
CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
counterSet.extractUpdates(false, updateExtractor);
verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
verify(updateExtractor).longMean(getMeanByteCounterName("test_receiver_out"), false, LongCounterMean.ZERO.addValue(49L, 3));
verifyNoMoreInteractions(updateExtractor);
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testCreateWithCombinerAndStreaming.
@Test
public void testCreateWithCombinerAndStreaming() throws Exception {
StreamingOptions options = PipelineOptionsFactory.as(StreamingOptions.class);
options.setStreaming(true);
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
KvCoder<String, Integer> kvCoder = KvCoder.of(keyCoder, valueCoder);
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(kvCoder)), counterSet, NameContextsForTests.nameContextForTest());
ParDoFn pgbk = PartialGroupByKeyParDoFns.create(options, kvCoder, AppliedCombineFn.withInputCoder(Sum.ofIntegers(), CoderRegistry.createDefault(), kvCoder), NullSideInputReader.empty(), receiver, null);
assertTrue(pgbk instanceof SimplePartialGroupByKeyParDoFn);
}
use of org.apache.beam.runners.dataflow.worker.util.common.worker.SimplePartialGroupByKeyParDoFn in project beam by apache.
the class PartialGroupByKeyParDoFnsTest method testPartialGroupByKeyWithCombiner.
@Test
public void testPartialGroupByKeyWithCombiner() throws Exception {
Coder keyCoder = StringUtf8Coder.of();
Coder valueCoder = BigEndianIntegerCoder.of();
TestOutputReceiver receiver = new TestOutputReceiver(new ElementByteSizeObservableCoder(WindowedValue.getValueOnlyCoder(KvCoder.of(keyCoder, valueCoder))), counterSet, NameContextsForTests.nameContextForTest());
Combiner<WindowedValue<String>, Integer, Integer, Integer> combineFn = new TestCombiner();
ParDoFn pgbkParDoFn = new SimplePartialGroupByKeyParDoFn(GroupingTables.combining(new WindowingCoderGroupingKeyCreator(keyCoder), PairInfo.create(), combineFn, new CoderSizeEstimator(WindowedValue.getValueOnlyCoder(keyCoder)), new CoderSizeEstimator(valueCoder)), receiver);
pgbkParDoFn.startBundle(receiver);
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 4)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 5)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 6)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("joe", 7)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("there", 8)));
pgbkParDoFn.processElement(WindowedValue.valueInGlobalWindow(KV.of("hi", 9)));
pgbkParDoFn.finishBundle();
assertThat(receiver.outputElems, IsIterableContainingInAnyOrder.<Object>containsInAnyOrder(WindowedValue.valueInGlobalWindow(KV.of("hi", 19)), WindowedValue.valueInGlobalWindow(KV.of("there", 13)), WindowedValue.valueInGlobalWindow(KV.of("joe", 7))));
// Exact counter values depend on size of encoded data. If encoding
// changes, then these expected counters should change to match.
CounterUpdateExtractor<?> updateExtractor = Mockito.mock(CounterUpdateExtractor.class);
counterSet.extractUpdates(false, updateExtractor);
verify(updateExtractor).longSum(getObjectCounterName("test_receiver_out"), false, 3L);
verify(updateExtractor).longMean(getMeanByteCounterName("test_receiver_out"), false, LongCounterMean.ZERO.addValue(25L, 3));
verifyNoMoreInteractions(updateExtractor);
}
Aggregations