use of org.apache.druid.query.groupby.epinephelinae.Grouper.Entry in project druid by druid-io.
the class BufferArrayGrouperTest method testAggregate.
@Test
public void testAggregate() {
final TestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory();
final IntGrouper grouper = newGrouper(columnSelectorFactory, 32768);
columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L)));
grouper.aggregate(12);
grouper.aggregate(6);
grouper.aggregate(10);
grouper.aggregate(6);
grouper.aggregate(12);
grouper.aggregate(6);
final List<Entry<Integer>> expected = ImmutableList.of(new Grouper.Entry<>(6, new Object[] { 30L, 3L }), new Grouper.Entry<>(10, new Object[] { 10L, 1L }), new Grouper.Entry<>(12, new Object[] { 20L, 2L }));
final List<Entry<Integer>> unsortedEntries = Lists.newArrayList(grouper.iterator(false));
Assert.assertEquals(expected, Ordering.from((Comparator<Entry<Integer>>) (o1, o2) -> Ints.compare(o1.getKey(), o2.getKey())).sortedCopy(unsortedEntries));
}
use of org.apache.druid.query.groupby.epinephelinae.Grouper.Entry in project druid by druid-io.
the class StreamingMergeSortedGrouperTest method testAggregate.
@Test
public void testAggregate() {
final TestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory();
final StreamingMergeSortedGrouper<Integer> grouper = newGrouper(columnSelectorFactory, 1024);
columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L)));
grouper.aggregate(6);
grouper.aggregate(6);
grouper.aggregate(6);
grouper.aggregate(10);
grouper.aggregate(12);
grouper.aggregate(12);
grouper.finish();
final List<Entry<Integer>> expected = ImmutableList.of(new Grouper.Entry<>(6, new Object[] { 30L, 3L }), new Grouper.Entry<>(10, new Object[] { 10L, 1L }), new Grouper.Entry<>(12, new Object[] { 20L, 2L }));
final List<Entry<Integer>> unsortedEntries = Lists.newArrayList(grouper.iterator(true));
Assert.assertEquals(expected, unsortedEntries);
}
use of org.apache.druid.query.groupby.epinephelinae.Grouper.Entry in project druid by druid-io.
the class StreamingMergeSortedGrouperTest method testStreamingAggregate.
private void testStreamingAggregate(int bufferSize) throws ExecutionException, InterruptedException {
final ExecutorService exec = Execs.multiThreaded(2, "merge-sorted-grouper-test-%d");
final TestColumnSelectorFactory columnSelectorFactory = GrouperTestUtil.newColumnSelectorFactory();
final StreamingMergeSortedGrouper<Integer> grouper = newGrouper(columnSelectorFactory, bufferSize);
final List<Entry<Integer>> expected = new ArrayList<>(1024);
for (int i = 0; i < 1024; i++) {
expected.add(new Entry<>(i, new Object[] { 100L, 10L }));
}
try {
final Future future = exec.submit(() -> {
columnSelectorFactory.setRow(new MapBasedRow(0, ImmutableMap.of("value", 10L)));
for (int i = 0; i < 1024; i++) {
for (int j = 0; j < 10; j++) {
grouper.aggregate(i);
}
}
grouper.finish();
});
final List<Entry<Integer>> unsortedEntries = Lists.newArrayList(grouper.iterator(true));
final List<Entry<Integer>> actual = Ordering.from((Comparator<Entry<Integer>>) (o1, o2) -> Ints.compare(o1.getKey(), o2.getKey())).sortedCopy(unsortedEntries);
if (!actual.equals(expected)) {
// Check there is an exception occured
future.get();
Assert.fail();
}
} finally {
exec.shutdownNow();
}
}
use of org.apache.druid.query.groupby.epinephelinae.Grouper.Entry in project druid by druid-io.
the class ParallelCombiner method runCombiner.
private Pair<CloseableIterator<Entry<KeyType>>, Future> runCombiner(List<? extends CloseableIterator<Entry<KeyType>>> iterators, ByteBuffer combineBuffer, AggregatorFactory[] combiningFactories, List<String> dictionary) {
final SettableColumnSelectorFactory settableColumnSelectorFactory = new SettableColumnSelectorFactory(combiningFactories);
final StreamingMergeSortedGrouper<KeyType> grouper = new StreamingMergeSortedGrouper<>(Suppliers.ofInstance(combineBuffer), combineKeySerdeFactory.factorizeWithDictionary(dictionary), settableColumnSelectorFactory, combiningFactories, queryTimeoutAt);
// init() must be called before iterator(), so cannot be called inside the below callable.
grouper.init();
final ListenableFuture future = executor.submit(new AbstractPrioritizedCallable<Void>(priority) {
@Override
public Void call() {
try (CloseableIterator<Entry<KeyType>> mergedIterator = CloseableIterators.mergeSorted(iterators, keyObjComparator);
// This variable is used to close releaser automatically.
@SuppressWarnings("unused") final Releaser releaser = combineBufferHolder.increment()) {
while (mergedIterator.hasNext()) {
final Entry<KeyType> next = mergedIterator.next();
settableColumnSelectorFactory.set(next.values);
// grouper always returns ok or throws an exception
grouper.aggregate(next.key);
settableColumnSelectorFactory.set(null);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
grouper.finish();
return null;
}
});
return new Pair<>(grouper.iterator(), future);
}
use of org.apache.druid.query.groupby.epinephelinae.Grouper.Entry in project druid by druid-io.
the class ParallelCombiner method combine.
/**
* Build a combining tree for the input iterators which combine input entries asynchronously. Each node in the tree
* is a combining task which iterates through child iterators, aggregates the inputs from those iterators, and returns
* an iterator for the result of aggregation.
* <p>
* This method is called when data is spilled and thus streaming combine is preferred to avoid too many disk accesses.
*
* @return an iterator of the root grouper of the combining tree
*/
public CloseableIterator<Entry<KeyType>> combine(List<? extends CloseableIterator<Entry<KeyType>>> sortedIterators, List<String> mergedDictionary) {
// CombineBuffer is initialized when this method is called and closed after the result iterator is done
final Closer closer = Closer.create();
try {
final ByteBuffer combineBuffer = combineBufferHolder.get();
final int minimumRequiredBufferCapacity = StreamingMergeSortedGrouper.requiredBufferCapacity(combineKeySerdeFactory.factorizeWithDictionary(mergedDictionary), combiningFactories);
// We want to maximize the parallelism while the size of buffer slice is greater than the minimum buffer size
// required by StreamingMergeSortedGrouper. Here, we find the leafCombineDegree of the cominbing tree and the
// required number of buffers maximizing the parallelism.
final Pair<Integer, Integer> degreeAndNumBuffers = findLeafCombineDegreeAndNumBuffers(combineBuffer, minimumRequiredBufferCapacity, concurrencyHint, sortedIterators.size());
final int leafCombineDegree = degreeAndNumBuffers.lhs;
final int numBuffers = degreeAndNumBuffers.rhs;
final int sliceSize = combineBuffer.capacity() / numBuffers;
final Supplier<ByteBuffer> bufferSupplier = createCombineBufferSupplier(combineBuffer, numBuffers, sliceSize);
final Pair<List<CloseableIterator<Entry<KeyType>>>, List<Future>> combineIteratorAndFutures = buildCombineTree(sortedIterators, bufferSupplier, combiningFactories, leafCombineDegree, mergedDictionary);
final CloseableIterator<Entry<KeyType>> combineIterator = Iterables.getOnlyElement(combineIteratorAndFutures.lhs);
final List<Future> combineFutures = combineIteratorAndFutures.rhs;
closer.register(() -> checkCombineFutures(combineFutures));
return CloseableIterators.wrap(combineIterator, closer);
} catch (Throwable t) {
try {
closer.close();
} catch (Throwable t2) {
t.addSuppressed(t2);
}
throw t;
}
}
Aggregations