use of org.apache.druid.query.aggregation.Aggregator in project druid by druid-io.
the class StringTopNColumnAggregatesProcessor method scanAndAggregateWithCardinalityKnown.
private long scanAndAggregateWithCardinalityKnown(TopNQuery query, Cursor cursor, DimensionSelector selector, Aggregator[][] rowSelector) {
long processedRows = 0;
while (!cursor.isDone()) {
final IndexedInts dimValues = selector.getRow();
for (int i = 0, size = dimValues.size(); i < size; ++i) {
final int dimIndex = dimValues.get(i);
Aggregator[] aggs = rowSelector[dimIndex];
if (aggs == null) {
final Comparable<?> key = dimensionValueConverter.apply(selector.lookupName(dimIndex));
aggs = aggregatesStore.computeIfAbsent(key, k -> BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs()));
rowSelector[dimIndex] = aggs;
}
for (Aggregator aggregator : aggs) {
aggregator.aggregate();
}
}
cursor.advance();
processedRows++;
}
return processedRows;
}
use of org.apache.druid.query.aggregation.Aggregator in project druid by druid-io.
the class OnheapIncrementalIndex method addToFacts.
@Override
protected AddToFactsResult addToFacts(InputRow row, IncrementalIndexRow key, ThreadLocal<InputRow> rowContainer, Supplier<InputRow> rowSupplier, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException {
final List<String> parseExceptionMessages = new ArrayList<>();
final int priorIndex = facts.getPriorIndex(key);
Aggregator[] aggs;
final AggregatorFactory[] metrics = getMetrics();
final AtomicInteger numEntries = getNumEntries();
final AtomicLong totalSizeInBytes = getBytesInMemory();
if (IncrementalIndexRow.EMPTY_ROW_INDEX != priorIndex) {
aggs = concurrentGet(priorIndex);
long aggSizeDelta = doAggregate(metrics, aggs, rowContainer, row, parseExceptionMessages);
totalSizeInBytes.addAndGet(useMaxMemoryEstimates ? 0 : aggSizeDelta);
} else {
aggs = new Aggregator[metrics.length];
long aggSizeForRow = factorizeAggs(metrics, aggs, rowContainer, row);
aggSizeForRow += doAggregate(metrics, aggs, rowContainer, row, parseExceptionMessages);
final int rowIndex = indexIncrement.getAndIncrement();
concurrentSet(rowIndex, aggs);
// Last ditch sanity checks
if ((numEntries.get() >= maxRowCount || totalSizeInBytes.get() >= maxBytesInMemory) && facts.getPriorIndex(key) == IncrementalIndexRow.EMPTY_ROW_INDEX && !skipMaxRowsInMemoryCheck) {
throw new IndexSizeExceededException("Maximum number of rows [%d] or max size in bytes [%d] reached", maxRowCount, maxBytesInMemory);
}
final int prev = facts.putIfAbsent(key, rowIndex);
if (IncrementalIndexRow.EMPTY_ROW_INDEX == prev) {
numEntries.incrementAndGet();
} else {
// This would happen in a race condition where there are multiple write threads
// which could be possible in case of GroupBy v1 strategy
parseExceptionMessages.clear();
aggs = concurrentGet(prev);
aggSizeForRow = doAggregate(metrics, aggs, rowContainer, row, parseExceptionMessages);
// Free up the misfire
concurrentRemove(rowIndex);
// This is expected to occur ~80% of the time in the worst scenarios
}
// For a new key, row size = key size + aggregator size + overhead
final long estimatedSizeOfAggregators = useMaxMemoryEstimates ? maxBytesPerRowForAggregators : aggSizeForRow;
final long rowSize = key.estimateBytesInMemory() + estimatedSizeOfAggregators + ROUGH_OVERHEAD_PER_MAP_ENTRY;
totalSizeInBytes.addAndGet(rowSize);
}
return new AddToFactsResult(numEntries.get(), totalSizeInBytes.get(), parseExceptionMessages);
}
use of org.apache.druid.query.aggregation.Aggregator in project druid by druid-io.
the class OnheapIncrementalIndex method doAggregate.
/**
* Performs aggregation for all of the aggregators.
*
* @return Total incremental memory in bytes required by this step of the
* aggregation. The returned value is non-zero only if
* {@link #useMaxMemoryEstimates} is false.
*/
private long doAggregate(AggregatorFactory[] metrics, Aggregator[] aggs, ThreadLocal<InputRow> rowContainer, InputRow row, List<String> parseExceptionsHolder) {
rowContainer.set(row);
long totalIncrementalBytes = 0L;
for (int i = 0; i < aggs.length; i++) {
final Aggregator agg = aggs[i];
synchronized (agg) {
try {
if (useMaxMemoryEstimates) {
agg.aggregate();
} else {
totalIncrementalBytes += agg.aggregateWithSize();
}
} catch (ParseException e) {
// "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
log.debug(e, "Encountered parse error, skipping aggregator[%s].", metrics[i].getName());
parseExceptionsHolder.add(e.getMessage());
}
}
}
rowContainer.set(null);
return totalIncrementalBytes;
}
use of org.apache.druid.query.aggregation.Aggregator in project druid by druid-io.
the class OnheapIncrementalIndex method iterableWithPostAggregations.
@Override
public Iterable<Row> iterableWithPostAggregations(@Nullable final List<PostAggregator> postAggs, final boolean descending) {
final AggregatorFactory[] metrics = getMetricAggs();
{
return () -> {
final List<DimensionDesc> dimensions = getDimensions();
return Iterators.transform(getFacts().iterator(descending), incrementalIndexRow -> {
final int rowOffset = incrementalIndexRow.getRowIndex();
Object[] theDims = incrementalIndexRow.getDims();
Map<String, Object> theVals = Maps.newLinkedHashMap();
for (int i = 0; i < theDims.length; ++i) {
Object dim = theDims[i];
DimensionDesc dimensionDesc = dimensions.get(i);
if (dimensionDesc == null) {
continue;
}
String dimensionName = dimensionDesc.getName();
DimensionHandler handler = dimensionDesc.getHandler();
if (dim == null || handler.getLengthOfEncodedKeyComponent(dim) == 0) {
theVals.put(dimensionName, null);
continue;
}
final DimensionIndexer indexer = dimensionDesc.getIndexer();
Object rowVals = indexer.convertUnsortedEncodedKeyComponentToActualList(dim);
theVals.put(dimensionName, rowVals);
}
Aggregator[] aggs = getAggsForRow(rowOffset);
for (int i = 0; i < aggs.length; ++i) {
theVals.put(metrics[i].getName(), aggs[i].get());
}
if (postAggs != null) {
for (PostAggregator postAgg : postAggs) {
theVals.put(postAgg.getName(), postAgg.compute(theVals));
}
}
return new MapBasedRow(incrementalIndexRow.getTimestamp(), theVals);
});
};
}
}
use of org.apache.druid.query.aggregation.Aggregator in project druid by druid-io.
the class DoublesSketchToHistogramPostAggregatorTest method splitPoints.
@Test
public void splitPoints() {
final double[] values = new double[] { 1, 2, 3, 4, 5, 6 };
final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values);
final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8);
// noinspection ForLoopReplaceableByForEach
for (int i = 0; i < values.length; i++) {
agg.aggregate();
selector.increment();
}
final Map<String, Object> fields = new HashMap<>();
fields.put("sketch", agg.get());
final PostAggregator postAgg = new DoublesSketchToHistogramPostAggregator("histogram", new FieldAccessPostAggregator("field", "sketch"), // splits distribution into two bins of equal mass
new double[] { 3.5 }, null);
final double[] histogram = (double[]) postAgg.compute(fields);
Assert.assertNotNull(histogram);
Assert.assertEquals(2, histogram.length);
Assert.assertEquals(3.0, histogram[0], 0);
Assert.assertEquals(3.0, histogram[1], 0);
}
Aggregations