Search in sources :

Example 31 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class SingleValueStringCardinalityVectorProcessor method aggregate.

@Override
public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) {
    // Save position, limit and restore later instead of allocating a new ByteBuffer object
    final int oldPosition = buf.position();
    final int oldLimit = buf.limit();
    try {
        final int[] vector = selector.getRowVector();
        for (int i = 0; i < numRows; i++) {
            final String s = selector.lookupName(vector[rows != null ? rows[i] : i]);
            if (NullHandling.replaceWithDefault() || s != null) {
                final int position = positions[i] + positionOffset;
                buf.limit(position + HyperLogLogCollector.getLatestNumBytesForDenseStorage());
                buf.position(position);
                final HyperLogLogCollector collector = HyperLogLogCollector.makeCollector(buf);
                StringCardinalityAggregatorColumnSelectorStrategy.addStringToCollector(collector, s);
            }
        }
    } finally {
        buf.limit(oldLimit);
        buf.position(oldPosition);
    }
}
Also used : HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector)

Example 32 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniquesSerde method getExtractor.

@Override
public ComplexMetricExtractor<HyperLogLogCollector> getExtractor() {
    return new ComplexMetricExtractor<HyperLogLogCollector>() {

        @Override
        public Class<HyperLogLogCollector> extractedClass() {
            return HyperLogLogCollector.class;
        }

        @Override
        public HyperLogLogCollector extractValue(InputRow inputRow, String metricName) {
            Object rawValue = inputRow.getRaw(metricName);
            if (rawValue instanceof HyperLogLogCollector) {
                return (HyperLogLogCollector) rawValue;
            } else {
                HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                List<String> dimValues = inputRow.getDimension(metricName);
                if (dimValues == null) {
                    return collector;
                }
                for (String dimensionValue : dimValues) {
                    collector.add(hyperLogLogHash.hash(dimensionValue));
                }
                return collector;
            }
        }
    };
}
Also used : HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) ComplexMetricExtractor(org.apache.druid.segment.serde.ComplexMetricExtractor) InputRow(org.apache.druid.data.input.InputRow)

Example 33 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniquesSerde method getObjectStrategy.

@Override
public ObjectStrategy getObjectStrategy() {
    return new ObjectStrategy<HyperLogLogCollector>() {

        @Override
        public Class<HyperLogLogCollector> getClazz() {
            return HyperLogLogCollector.class;
        }

        @Override
        public HyperLogLogCollector fromByteBuffer(ByteBuffer buffer, int numBytes) {
            // make a copy of buffer, because the given buffer is not duplicated in HyperLogLogCollector.makeCollector() and
            // stored in a field.
            final ByteBuffer readOnlyBuffer = buffer.asReadOnlyBuffer();
            readOnlyBuffer.limit(readOnlyBuffer.position() + numBytes);
            return HyperLogLogCollector.makeCollector(readOnlyBuffer);
        }

        @Override
        public byte[] toBytes(HyperLogLogCollector collector) {
            if (collector == null) {
                return new byte[] {};
            }
            ByteBuffer val = collector.toByteBuffer();
            byte[] retVal = new byte[val.remaining()];
            val.asReadOnlyBuffer().get(retVal);
            return retVal;
        }

        @Override
        public int compare(HyperLogLogCollector o1, HyperLogLogCollector o2) {
            return comparator.compare(o1, o2);
        }
    };
}
Also used : HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) ObjectStrategy(org.apache.druid.segment.data.ObjectStrategy) ByteBuffer(java.nio.ByteBuffer)

Example 34 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniqueExpressionsTest method testLong.

@Test
public void testLong() {
    Expr expr = Parser.parse("hyper_unique_add(1234, hyper_unique())", MACRO_TABLE);
    ExprEval eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(1.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
    expr = Parser.parse("hyper_unique_add(1234, hyper_unique_add(5678, hyper_unique()))", MACRO_TABLE);
    eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(2.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
    expr = Parser.parse("hyper_unique_add(long, hyper_unique())", MACRO_TABLE);
    eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(1.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
    expr = Parser.parse("hyper_unique_add(nullLong, hyper_unique())", MACRO_TABLE);
    eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 1.0 : 0.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
}
Also used : ExprEval(org.apache.druid.math.expr.ExprEval) Expr(org.apache.druid.math.expr.Expr) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 35 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class FloatCardinalityVectorProcessor method aggregate.

@Override
public void aggregate(ByteBuffer buf, int numRows, int[] positions, @Nullable int[] rows, int positionOffset) {
    // Save position, limit and restore later instead of allocating a new ByteBuffer object
    final int oldPosition = buf.position();
    final int oldLimit = buf.limit();
    try {
        final float[] vector = selector.getFloatVector();
        final boolean[] nullVector = selector.getNullVector();
        for (int i = 0; i < numRows; i++) {
            final int idx = rows != null ? rows[i] : i;
            if (NullHandling.replaceWithDefault() || nullVector == null || !nullVector[idx]) {
                final int position = positions[i] + positionOffset;
                buf.limit(position + HyperLogLogCollector.getLatestNumBytesForDenseStorage());
                buf.position(position);
                final HyperLogLogCollector collector = HyperLogLogCollector.makeCollector(buf);
                FloatCardinalityAggregatorColumnSelectorStrategy.addFloatToCollector(collector, vector[idx]);
            }
        }
    } finally {
        buf.limit(oldLimit);
        buf.position(oldPosition);
    }
}
Also used : HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector)

Aggregations

HyperLogLogCollector (org.apache.druid.hll.HyperLogLogCollector)41 Test (org.junit.Test)12 Random (java.util.Random)4 InputRow (org.apache.druid.data.input.InputRow)4 ByteBuffer (java.nio.ByteBuffer)3 ArrayList (java.util.ArrayList)3 Comparator (java.util.Comparator)3 HashMap (java.util.HashMap)3 VersionZeroHyperLogLogCollector (org.apache.druid.hll.VersionZeroHyperLogLogCollector)3 Expr (org.apache.druid.math.expr.Expr)3 ExprEval (org.apache.druid.math.expr.ExprEval)3 HyperUniquesAggregatorFactory (org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)3 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)3 Interval (org.joda.time.Interval)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 Optional (com.google.common.base.Optional)2 ImmutableList (com.google.common.collect.ImmutableList)2 HashFunction (com.google.common.hash.HashFunction)2 File (java.io.File)2 List (java.util.List)2