Search in sources :

Example 26 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniqueFinalizingPostAggregatorTest method testCompute.

@Test
public void testCompute() {
    Random random = new Random(0L);
    HyperUniqueFinalizingPostAggregator postAggregator = new HyperUniqueFinalizingPostAggregator("uniques", "uniques");
    HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
    for (int i = 0; i < 100; ++i) {
        byte[] hashedVal = fn.hashLong(random.nextLong()).asBytes();
        collector.add(hashedVal);
    }
    double cardinality = (Double) postAggregator.compute(ImmutableMap.of("uniques", collector));
    Assert.assertTrue(cardinality == 99.37233005831612);
}
Also used : Random(java.util.Random) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) Test(org.junit.Test)

Example 27 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniquesAggregatorFactoryTest method testCompare1.

@Test
public void testCompare1() {
    HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
    HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
    collector1.add(fn.hashLong(0).asBytes());
    HyperUniquesAggregatorFactory factory = new HyperUniquesAggregatorFactory("foo", "bar");
    Comparator comparator = factory.getComparator();
    for (int i = 1; i < 100; i = i + 2) {
        collector1.add(fn.hashLong(i).asBytes());
        collector2.add(fn.hashLong(i + 1).asBytes());
        Assert.assertEquals(1, comparator.compare(collector1, collector2));
        Assert.assertEquals(1, Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()));
    }
}
Also used : HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) VersionZeroHyperLogLogCollector(org.apache.druid.hll.VersionZeroHyperLogLogCollector) Comparator(java.util.Comparator) Test(org.junit.Test)

Example 28 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class TopNQueryQueryToolChestTest method doTestCacheStrategyOrderByPost.

private void doTestCacheStrategyOrderByPost(final ColumnType valueType, final Object dimValue) throws IOException {
    CacheStrategy<Result<TopNResultValue>, Object, TopNQuery> strategy = new TopNQueryQueryToolChest(null, null).getCacheStrategy(new TopNQuery(new TableDataSource("dummy"), VirtualColumns.EMPTY, new DefaultDimensionSpec("test", "test", valueType), new NumericTopNMetricSpec("post"), 3, new MultipleIntervalSegmentSpec(ImmutableList.of(Intervals.of("2015-01-01/2015-01-02"))), null, Granularities.ALL, ImmutableList.of(new HyperUniquesAggregatorFactory("metric1", "test", false, false), new CountAggregatorFactory("metric2")), ImmutableList.of(new ArithmeticPostAggregator("post", "+", ImmutableList.of(new FinalizingFieldAccessPostAggregator("metric1", "metric1"), new FieldAccessPostAggregator("metric2", "metric2")))), null));
    HyperLogLogCollector collector = getIntermediateHllCollector(valueType.getType(), dimValue);
    final Result<TopNResultValue> result1 = new Result<>(// test timestamps that result in integer size millis
    DateTimes.utc(123L), new TopNResultValue(Collections.singletonList(ImmutableMap.of("test", dimValue, "metric1", collector, "metric2", 2, "post", collector.estimateCardinality() + 2))));
    Object preparedValue = strategy.prepareForSegmentLevelCache().apply(result1);
    ObjectMapper objectMapper = TestHelper.makeJsonMapper();
    Object fromCacheValue = objectMapper.readValue(objectMapper.writeValueAsBytes(preparedValue), strategy.getCacheObjectClazz());
    Result<TopNResultValue> fromCacheResult = strategy.pullFromSegmentLevelCache().apply(fromCacheValue);
    Assert.assertEquals(result1, fromCacheResult);
    final Result<TopNResultValue> resultLevelCacheResult = new Result<>(// test timestamps that result in integer size millis
    DateTimes.utc(123L), new TopNResultValue(Collections.singletonList(ImmutableMap.of("test", dimValue, "metric1", collector.estimateCardinality(), "metric2", 2, "post", collector.estimateCardinality() + 2))));
    Object preparedResultCacheValue = strategy.prepareForCache(true).apply(resultLevelCacheResult);
    Object fromResultCacheValue = objectMapper.readValue(objectMapper.writeValueAsBytes(preparedResultCacheValue), strategy.getCacheObjectClazz());
    Result<TopNResultValue> fromResultCacheResult = strategy.pullFromCache(true).apply(fromResultCacheValue);
    Assert.assertEquals(resultLevelCacheResult, fromResultCacheResult);
}
Also used : ArithmeticPostAggregator(org.apache.druid.query.aggregation.post.ArithmeticPostAggregator) FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) FieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FieldAccessPostAggregator) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) FinalizingFieldAccessPostAggregator(org.apache.druid.query.aggregation.post.FinalizingFieldAccessPostAggregator) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Result(org.apache.druid.query.Result) TableDataSource(org.apache.druid.query.TableDataSource) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 29 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class DoubleCardinalityVectorProcessor method aggregate.

@Override
public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) {
    // Save position, limit and restore later instead of allocating a new ByteBuffer object
    final int oldPosition = buf.position();
    final int oldLimit = buf.limit();
    try {
        final double[] vector = selector.getDoubleVector();
        final boolean[] nullVector = selector.getNullVector();
        for (int i = 0; i < numRows; i++) {
            final int idx = rows != null ? rows[i] : i;
            if (NullHandling.replaceWithDefault() || nullVector == null || !nullVector[idx]) {
                final int position = positions[i] + positionOffset;
                buf.limit(position + HyperLogLogCollector.getLatestNumBytesForDenseStorage());
                buf.position(position);
                final HyperLogLogCollector collector = HyperLogLogCollector.makeCollector(buf);
                DoubleCardinalityAggregatorColumnSelectorStrategy.addDoubleToCollector(collector, vector[idx]);
            }
        }
    } finally {
        buf.limit(oldLimit);
        buf.position(oldPosition);
    }
}
Also used : HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector)

Example 30 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class LongCardinalityVectorProcessor method aggregate.

@Override
public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) {
    // Save position, limit and restore later instead of allocating a new ByteBuffer object
    final int oldPosition = buf.position();
    final int oldLimit = buf.limit();
    try {
        final long[] vector = selector.getLongVector();
        final boolean[] nullVector = selector.getNullVector();
        for (int i = 0; i < numRows; i++) {
            final int idx = rows != null ? rows[i] : i;
            if (NullHandling.replaceWithDefault() || nullVector == null || !nullVector[idx]) {
                final int position = positions[i] + positionOffset;
                buf.limit(position + HyperLogLogCollector.getLatestNumBytesForDenseStorage());
                buf.position(position);
                final HyperLogLogCollector collector = HyperLogLogCollector.makeCollector(buf);
                LongCardinalityAggregatorColumnSelectorStrategy.addLongToCollector(collector, vector[idx]);
            }
        }
    } finally {
        buf.limit(oldLimit);
        buf.position(oldPosition);
    }
}
Also used : HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector)

Aggregations

HyperLogLogCollector (org.apache.druid.hll.HyperLogLogCollector)41 Test (org.junit.Test)12 Random (java.util.Random)4 InputRow (org.apache.druid.data.input.InputRow)4 ByteBuffer (java.nio.ByteBuffer)3 ArrayList (java.util.ArrayList)3 Comparator (java.util.Comparator)3 HashMap (java.util.HashMap)3 VersionZeroHyperLogLogCollector (org.apache.druid.hll.VersionZeroHyperLogLogCollector)3 Expr (org.apache.druid.math.expr.Expr)3 ExprEval (org.apache.druid.math.expr.ExprEval)3 HyperUniquesAggregatorFactory (org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)3 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)3 Interval (org.joda.time.Interval)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 Optional (com.google.common.base.Optional)2 ImmutableList (com.google.common.collect.ImmutableList)2 HashFunction (com.google.common.hash.HashFunction)2 File (java.io.File)2 List (java.util.List)2