Search in sources :

Example 1 with HyperLogLogCollector

use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.

@Test
public void testSanity() throws IOException {
    HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
    int[] cases = { 1000, 5000, 10000, 20000 };
    int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Longs.BYTES, 2500 * Longs.BYTES };
    for (int columnSize : columnSizes) {
        for (int aCase : cases) {
            File tmpFile = FileUtils.getTempDirectory();
            HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
            try (IOPeon peon = new TmpFileIOPeon();
                FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
                LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(peon, "test", serde.getObjectStrategy(), columnSize);
                serializer.open();
                for (int i = 0; i < aCase; i++) {
                    HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                    byte[] hashBytes = fn.hashLong(i).asBytes();
                    collector.add(hashBytes);
                    baseCollector.fold(collector);
                    serializer.serialize(collector);
                }
                serializer.close();
                try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
                    serializer.writeToChannel(channel, v9Smoosher);
                }
            }
            SmooshedFileMapper mapper = Smoosh.map(tmpFile);
            final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
            serde.deserializeColumn(mapper.mapFile("test"), builder);
            Column column = builder.build();
            ComplexColumn complexColumn = column.getComplexColumn();
            HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
            for (int i = 0; i < aCase; i++) {
                collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
            }
            Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
        }
    }
}
Also used : SmooshedWriter(io.druid.java.util.common.io.smoosh.SmooshedWriter) HyperLogLogCollector(io.druid.hll.HyperLogLogCollector) IOPeon(io.druid.segment.data.IOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) Column(io.druid.segment.column.Column) ComplexColumn(io.druid.segment.column.ComplexColumn) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) FileSmoosher(io.druid.java.util.common.io.smoosh.FileSmoosher) ColumnBuilder(io.druid.segment.column.ColumnBuilder) File(java.io.File) SmooshedFileMapper(io.druid.java.util.common.io.smoosh.SmooshedFileMapper) ComplexColumn(io.druid.segment.column.ComplexColumn) Test(org.junit.Test)

Example 2 with HyperLogLogCollector

use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class CachingClusteredClientTest method testGroupByCaching.

@Test
public void testGroupByCaching() throws Exception {
    List<AggregatorFactory> aggsWithUniques = ImmutableList.<AggregatorFactory>builder().addAll(AGGS).add(new HyperUniquesAggregatorFactory("uniques", "uniques")).build();
    final HashFunction hashFn = Hashing.murmur3_128();
    GroupByQuery.Builder builder = new GroupByQuery.Builder().setDataSource(DATA_SOURCE).setQuerySegmentSpec(SEG_SPEC).setDimFilter(DIM_FILTER).setGranularity(GRANULARITY).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec("a", "a"))).setAggregatorSpecs(aggsWithUniques).setPostAggregatorSpecs(POST_AGGS).setContext(CONTEXT);
    final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
    collector.add(hashFn.hashString("abc123", Charsets.UTF_8).asBytes());
    collector.add(hashFn.hashString("123abc", Charsets.UTF_8).asBytes());
    testQueryCaching(client, builder.build(), new Interval("2011-01-01/2011-01-02"), makeGroupByResults(new DateTime("2011-01-01"), ImmutableMap.of("a", "a", "rows", 1, "imps", 1, "impers", 1, "uniques", collector)), new Interval("2011-01-02/2011-01-03"), makeGroupByResults(new DateTime("2011-01-02"), ImmutableMap.of("a", "b", "rows", 2, "imps", 2, "impers", 2, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)));
    QueryRunner runner = new FinalizeResultsQueryRunner(client, GroupByQueryRunnerTest.makeQueryRunnerFactory(new GroupByQueryConfig()).getToolchest());
    HashMap<String, Object> context = new HashMap<String, Object>();
    TestHelper.assertExpectedObjects(makeGroupByResults(new DateTime("2011-01-05T"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), runner.run(builder.setInterval("2011-01-05/2011-01-10").build(), context), "");
}
Also used : DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) HashMap(java.util.HashMap) HyperLogLogCollector(io.druid.hll.HyperLogLogCollector) TopNQueryBuilder(io.druid.query.topn.TopNQueryBuilder) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) DateTime(org.joda.time.DateTime) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) QueryRunner(io.druid.query.QueryRunner) GroupByQuery(io.druid.query.groupby.GroupByQuery) HashFunction(com.google.common.hash.HashFunction) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) Interval(org.joda.time.Interval) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Example 3 with HyperLogLogCollector

use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniquesSerde method getExtractor.

@Override
public ComplexMetricExtractor getExtractor() {
    return new ComplexMetricExtractor() {

        @Override
        public Class<HyperLogLogCollector> extractedClass() {
            return HyperLogLogCollector.class;
        }

        @Override
        public HyperLogLogCollector extractValue(InputRow inputRow, String metricName) {
            Object rawValue = inputRow.getRaw(metricName);
            if (rawValue instanceof HyperLogLogCollector) {
                return (HyperLogLogCollector) rawValue;
            } else {
                HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                List<String> dimValues = inputRow.getDimension(metricName);
                if (dimValues == null) {
                    return collector;
                }
                for (String dimensionValue : dimValues) {
                    collector.add(hyperLogLogHash.hash(dimensionValue));
                }
                return collector;
            }
        }
    };
}
Also used : HyperLogLogCollector(io.druid.hll.HyperLogLogCollector) ComplexMetricExtractor(io.druid.segment.serde.ComplexMetricExtractor) InputRow(io.druid.data.input.InputRow)

Example 4 with HyperLogLogCollector

use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniqueFinalizingPostAggregatorTest method testCompute.

@Test
public void testCompute() throws Exception {
    Random random = new Random(0L);
    HyperUniqueFinalizingPostAggregator postAggregator = new HyperUniqueFinalizingPostAggregator("uniques", "uniques");
    HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
    for (int i = 0; i < 100; ++i) {
        byte[] hashedVal = fn.hashLong(random.nextLong()).asBytes();
        collector.add(hashedVal);
    }
    double cardinality = (Double) postAggregator.compute(ImmutableMap.<String, Object>of("uniques", collector));
    Assert.assertTrue(cardinality == 99.37233005831612);
}
Also used : Random(java.util.Random) HyperLogLogCollector(io.druid.hll.HyperLogLogCollector) Test(org.junit.Test)

Example 5 with HyperLogLogCollector

use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniquesAggregatorFactoryTest method testCompare2.

@Test
public void testCompare2() throws Exception {
    Random rand = new Random(0);
    HyperUniquesAggregatorFactory factory = new HyperUniquesAggregatorFactory("foo", "bar");
    Comparator comparator = factory.getComparator();
    for (int i = 1; i < 1000; ++i) {
        HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
        int j = rand.nextInt(50);
        for (int l = 0; l < j; ++l) {
            collector1.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
        int k = j + 1 + rand.nextInt(5);
        for (int l = 0; l < k; ++l) {
            collector2.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        Assert.assertEquals(Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()), comparator.compare(collector1, collector2));
    }
    for (int i = 1; i < 100; ++i) {
        HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
        int j = rand.nextInt(500);
        for (int l = 0; l < j; ++l) {
            collector1.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
        int k = j + 2 + rand.nextInt(5);
        for (int l = 0; l < k; ++l) {
            collector2.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        Assert.assertEquals(Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()), comparator.compare(collector1, collector2));
    }
    for (int i = 1; i < 10; ++i) {
        HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
        int j = rand.nextInt(100000);
        for (int l = 0; l < j; ++l) {
            collector1.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
        int k = j + 20000 + rand.nextInt(100000);
        for (int l = 0; l < k; ++l) {
            collector2.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        Assert.assertEquals(Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()), comparator.compare(collector1, collector2));
    }
}
Also used : Random(java.util.Random) HyperLogLogCollector(io.druid.hll.HyperLogLogCollector) Comparator(java.util.Comparator) Test(org.junit.Test)

Aggregations

HyperLogLogCollector (io.druid.hll.HyperLogLogCollector)14 Test (org.junit.Test)6 InputRow (io.druid.data.input.InputRow)3 Comparator (java.util.Comparator)3 Random (java.util.Random)3 HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)2 ObjectStrategy (io.druid.segment.data.ObjectStrategy)2 ByteBuffer (java.nio.ByteBuffer)2 Interval (org.joda.time.Interval)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Optional (com.google.common.base.Optional)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSortedMap (com.google.common.collect.ImmutableSortedMap)1 HashFunction (com.google.common.hash.HashFunction)1 Firehose (io.druid.data.input.Firehose)1 Granularity (io.druid.java.util.common.granularity.Granularity)1 FileSmoosher (io.druid.java.util.common.io.smoosh.FileSmoosher)1 SmooshedFileMapper (io.druid.java.util.common.io.smoosh.SmooshedFileMapper)1 SmooshedWriter (io.druid.java.util.common.io.smoosh.SmooshedWriter)1