Search in sources :

Example 51 with Aggregator

use of org.apache.druid.query.aggregation.Aggregator in project druid by druid-io.

the class StringTopNColumnAggregatesProcessor method scanAndAggregateWithCardinalityKnown.

private long scanAndAggregateWithCardinalityKnown(TopNQuery query, Cursor cursor, DimensionSelector selector, Aggregator[][] rowSelector) {
    long processedRows = 0;
    while (!cursor.isDone()) {
        final IndexedInts dimValues = selector.getRow();
        for (int i = 0, size = dimValues.size(); i < size; ++i) {
            final int dimIndex = dimValues.get(i);
            Aggregator[] aggs = rowSelector[dimIndex];
            if (aggs == null) {
                final Comparable<?> key = dimensionValueConverter.apply(selector.lookupName(dimIndex));
                aggs = aggregatesStore.computeIfAbsent(key, k -> BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs()));
                rowSelector[dimIndex] = aggs;
            }
            for (Aggregator aggregator : aggs) {
                aggregator.aggregate();
            }
        }
        cursor.advance();
        processedRows++;
    }
    return processedRows;
}
Also used : DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) HashMap(java.util.HashMap) Aggregator(org.apache.druid.query.aggregation.Aggregator) IndexedInts(org.apache.druid.segment.data.IndexedInts) StorageAdapter(org.apache.druid.segment.StorageAdapter) Function(java.util.function.Function) TopNQuery(org.apache.druid.query.topn.TopNQuery) DimensionDictionarySelector(org.apache.druid.segment.DimensionDictionarySelector) Cursor(org.apache.druid.segment.Cursor) BaseTopNAlgorithm(org.apache.druid.query.topn.BaseTopNAlgorithm) Map(java.util.Map) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) ColumnType(org.apache.druid.segment.column.ColumnType) DimensionSelector(org.apache.druid.segment.DimensionSelector) TopNResultBuilder(org.apache.druid.query.topn.TopNResultBuilder) TopNParams(org.apache.druid.query.topn.TopNParams) IndexedInts(org.apache.druid.segment.data.IndexedInts) Aggregator(org.apache.druid.query.aggregation.Aggregator)

Example 52 with Aggregator

use of org.apache.druid.query.aggregation.Aggregator in project druid by druid-io.

the class OnheapIncrementalIndex method addToFacts.

@Override
protected AddToFactsResult addToFacts(InputRow row, IncrementalIndexRow key, ThreadLocal<InputRow> rowContainer, Supplier<InputRow> rowSupplier, boolean skipMaxRowsInMemoryCheck) throws IndexSizeExceededException {
    final List<String> parseExceptionMessages = new ArrayList<>();
    final int priorIndex = facts.getPriorIndex(key);
    Aggregator[] aggs;
    final AggregatorFactory[] metrics = getMetrics();
    final AtomicInteger numEntries = getNumEntries();
    final AtomicLong totalSizeInBytes = getBytesInMemory();
    if (IncrementalIndexRow.EMPTY_ROW_INDEX != priorIndex) {
        aggs = concurrentGet(priorIndex);
        long aggSizeDelta = doAggregate(metrics, aggs, rowContainer, row, parseExceptionMessages);
        totalSizeInBytes.addAndGet(useMaxMemoryEstimates ? 0 : aggSizeDelta);
    } else {
        aggs = new Aggregator[metrics.length];
        long aggSizeForRow = factorizeAggs(metrics, aggs, rowContainer, row);
        aggSizeForRow += doAggregate(metrics, aggs, rowContainer, row, parseExceptionMessages);
        final int rowIndex = indexIncrement.getAndIncrement();
        concurrentSet(rowIndex, aggs);
        // Last ditch sanity checks
        if ((numEntries.get() >= maxRowCount || totalSizeInBytes.get() >= maxBytesInMemory) && facts.getPriorIndex(key) == IncrementalIndexRow.EMPTY_ROW_INDEX && !skipMaxRowsInMemoryCheck) {
            throw new IndexSizeExceededException("Maximum number of rows [%d] or max size in bytes [%d] reached", maxRowCount, maxBytesInMemory);
        }
        final int prev = facts.putIfAbsent(key, rowIndex);
        if (IncrementalIndexRow.EMPTY_ROW_INDEX == prev) {
            numEntries.incrementAndGet();
        } else {
            // This would happen in a race condition where there are multiple write threads
            // which could be possible in case of GroupBy v1 strategy
            parseExceptionMessages.clear();
            aggs = concurrentGet(prev);
            aggSizeForRow = doAggregate(metrics, aggs, rowContainer, row, parseExceptionMessages);
            // Free up the misfire
            concurrentRemove(rowIndex);
        // This is expected to occur ~80% of the time in the worst scenarios
        }
        // For a new key, row size = key size + aggregator size + overhead
        final long estimatedSizeOfAggregators = useMaxMemoryEstimates ? maxBytesPerRowForAggregators : aggSizeForRow;
        final long rowSize = key.estimateBytesInMemory() + estimatedSizeOfAggregators + ROUGH_OVERHEAD_PER_MAP_ENTRY;
        totalSizeInBytes.addAndGet(rowSize);
    }
    return new AddToFactsResult(numEntries.get(), totalSizeInBytes.get(), parseExceptionMessages);
}
Also used : ArrayList(java.util.ArrayList) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Aggregator(org.apache.druid.query.aggregation.Aggregator) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) AtomicLong(java.util.concurrent.atomic.AtomicLong) AtomicInteger(java.util.concurrent.atomic.AtomicInteger)

Example 53 with Aggregator

use of org.apache.druid.query.aggregation.Aggregator in project druid by druid-io.

the class OnheapIncrementalIndex method doAggregate.

/**
 * Performs aggregation for all of the aggregators.
 *
 * @return Total incremental memory in bytes required by this step of the
 * aggregation. The returned value is non-zero only if
 * {@link #useMaxMemoryEstimates} is false.
 */
private long doAggregate(AggregatorFactory[] metrics, Aggregator[] aggs, ThreadLocal<InputRow> rowContainer, InputRow row, List<String> parseExceptionsHolder) {
    rowContainer.set(row);
    long totalIncrementalBytes = 0L;
    for (int i = 0; i < aggs.length; i++) {
        final Aggregator agg = aggs[i];
        synchronized (agg) {
            try {
                if (useMaxMemoryEstimates) {
                    agg.aggregate();
                } else {
                    totalIncrementalBytes += agg.aggregateWithSize();
                }
            } catch (ParseException e) {
                // "aggregate" can throw ParseExceptions if a selector expects something but gets something else.
                log.debug(e, "Encountered parse error, skipping aggregator[%s].", metrics[i].getName());
                parseExceptionsHolder.add(e.getMessage());
            }
        }
    }
    rowContainer.set(null);
    return totalIncrementalBytes;
}
Also used : PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Aggregator(org.apache.druid.query.aggregation.Aggregator) ParseException(org.apache.druid.java.util.common.parsers.ParseException)

Example 54 with Aggregator

use of org.apache.druid.query.aggregation.Aggregator in project druid by druid-io.

the class OnheapIncrementalIndex method iterableWithPostAggregations.

@Override
public Iterable<Row> iterableWithPostAggregations(@Nullable final List<PostAggregator> postAggs, final boolean descending) {
    final AggregatorFactory[] metrics = getMetricAggs();
    {
        return () -> {
            final List<DimensionDesc> dimensions = getDimensions();
            return Iterators.transform(getFacts().iterator(descending), incrementalIndexRow -> {
                final int rowOffset = incrementalIndexRow.getRowIndex();
                Object[] theDims = incrementalIndexRow.getDims();
                Map<String, Object> theVals = Maps.newLinkedHashMap();
                for (int i = 0; i < theDims.length; ++i) {
                    Object dim = theDims[i];
                    DimensionDesc dimensionDesc = dimensions.get(i);
                    if (dimensionDesc == null) {
                        continue;
                    }
                    String dimensionName = dimensionDesc.getName();
                    DimensionHandler handler = dimensionDesc.getHandler();
                    if (dim == null || handler.getLengthOfEncodedKeyComponent(dim) == 0) {
                        theVals.put(dimensionName, null);
                        continue;
                    }
                    final DimensionIndexer indexer = dimensionDesc.getIndexer();
                    Object rowVals = indexer.convertUnsortedEncodedKeyComponentToActualList(dim);
                    theVals.put(dimensionName, rowVals);
                }
                Aggregator[] aggs = getAggsForRow(rowOffset);
                for (int i = 0; i < aggs.length; ++i) {
                    theVals.put(metrics[i].getName(), aggs[i].get());
                }
                if (postAggs != null) {
                    for (PostAggregator postAgg : postAggs) {
                        theVals.put(postAgg.getName(), postAgg.compute(theVals));
                    }
                }
                return new MapBasedRow(incrementalIndexRow.getTimestamp(), theVals);
            });
        };
    }
}
Also used : DimensionHandler(org.apache.druid.segment.DimensionHandler) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Aggregator(org.apache.druid.query.aggregation.Aggregator) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) MapBasedRow(org.apache.druid.data.input.MapBasedRow) DimensionIndexer(org.apache.druid.segment.DimensionIndexer)

Example 55 with Aggregator

use of org.apache.druid.query.aggregation.Aggregator in project druid by druid-io.

the class DoublesSketchToHistogramPostAggregatorTest method splitPoints.

@Test
public void splitPoints() {
    final double[] values = new double[] { 1, 2, 3, 4, 5, 6 };
    final TestDoubleColumnSelectorImpl selector = new TestDoubleColumnSelectorImpl(values);
    final Aggregator agg = new DoublesSketchBuildAggregator(selector, 8);
    // noinspection ForLoopReplaceableByForEach
    for (int i = 0; i < values.length; i++) {
        agg.aggregate();
        selector.increment();
    }
    final Map<String, Object> fields = new HashMap<>();
    fields.put("sketch", agg.get());
    final PostAggregator postAgg = new DoublesSketchToHistogramPostAggregator("histogram", new FieldAccessPostAggregator("field", "sketch"), // splits distribution into two bins of equal mass
    new double[] { 3.5 }, null);
    final double[] histogram = (double[]) postAgg.compute(fields);
    Assert.assertNotNull(histogram);
    Assert.assertEquals(2, histogram.length);
    Assert.assertEquals(3.0, histogram[0], 0);
    Assert.assertEquals(3.0, histogram[1], 0);
}
Also used : FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) HashMap(java.util.HashMap) Aggregator(org.apache.druid.query.aggregation.Aggregator) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) TestDoubleColumnSelectorImpl(org.apache.druid.query.aggregation.TestDoubleColumnSelectorImpl) Test(org.junit.Test)

Aggregations

Aggregator (org.apache.druid.query.aggregation.Aggregator)63 Test (org.junit.Test)50 BufferAggregator (org.apache.druid.query.aggregation.BufferAggregator)35 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)30 Pair (org.apache.druid.java.util.common.Pair)24 SerializablePair (org.apache.druid.collections.SerializablePair)18 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)18 HashMap (java.util.HashMap)12 FieldAccessPostAggregator (org.apache.druid.query.aggregation.post.FieldAccessPostAggregator)12 TestDoubleColumnSelectorImpl (org.apache.druid.query.aggregation.TestDoubleColumnSelectorImpl)9 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)7 SerializablePairLongString (org.apache.druid.query.aggregation.SerializablePairLongString)6 TestObjectColumnSelector (org.apache.druid.query.aggregation.TestObjectColumnSelector)4 ArrayList (java.util.ArrayList)3 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)3 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)3 Cursor (org.apache.druid.segment.Cursor)3 InputRow (org.apache.druid.data.input.InputRow)2 MapBasedRow (org.apache.druid.data.input.MapBasedRow)2 ParseException (org.apache.druid.java.util.common.parsers.ParseException)2