Search in sources :

Example 6 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniquesAggregatorFactoryTest method testCompare2.

@Test
public void testCompare2() {
    Random rand = new Random(0);
    HyperUniquesAggregatorFactory factory = new HyperUniquesAggregatorFactory("foo", "bar");
    Comparator comparator = factory.getComparator();
    for (int i = 1; i < 1000; ++i) {
        HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
        int j = rand.nextInt(50);
        for (int l = 0; l < j; ++l) {
            collector1.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
        int k = j + 1 + rand.nextInt(5);
        for (int l = 0; l < k; ++l) {
            collector2.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        Assert.assertEquals(Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()), comparator.compare(collector1, collector2));
    }
    for (int i = 1; i < 100; ++i) {
        HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
        int j = rand.nextInt(500);
        for (int l = 0; l < j; ++l) {
            collector1.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
        int k = j + 2 + rand.nextInt(5);
        for (int l = 0; l < k; ++l) {
            collector2.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        Assert.assertEquals(Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()), comparator.compare(collector1, collector2));
    }
    for (int i = 1; i < 10; ++i) {
        HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
        int j = rand.nextInt(100000);
        for (int l = 0; l < j; ++l) {
            collector1.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
        int k = j + 20000 + rand.nextInt(100000);
        for (int l = 0; l < k; ++l) {
            collector2.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        Assert.assertEquals(Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()), comparator.compare(collector1, collector2));
    }
}
Also used : Random(java.util.Random) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) VersionZeroHyperLogLogCollector(org.apache.druid.hll.VersionZeroHyperLogLogCollector) Comparator(java.util.Comparator) Test(org.junit.Test)

Example 7 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniquesAggregatorFactoryTest method testEstimateCardinalityForZeroCardinality.

@Test
public void testEstimateCardinalityForZeroCardinality() {
    HyperLogLogCollector emptyHyperLogLogCollector = HyperUniquesBufferAggregator.doGet(ByteBuffer.allocate(HyperLogLogCollector.getLatestNumBytesForDenseStorage()), 0);
    Assert.assertEquals(0L, HyperUniquesAggregatorFactory.estimateCardinality(null, true));
    Assert.assertEquals(0d, HyperUniquesAggregatorFactory.estimateCardinality(null, false));
    Assert.assertEquals(0L, HyperUniquesAggregatorFactory.estimateCardinality(emptyHyperLogLogCollector, true));
    Assert.assertEquals(0d, HyperUniquesAggregatorFactory.estimateCardinality(emptyHyperLogLogCollector, false));
    Assert.assertEquals(HyperUniquesAggregatorFactory.estimateCardinality(emptyHyperLogLogCollector, true).getClass(), HyperUniquesAggregatorFactory.estimateCardinality(null, true).getClass());
    Assert.assertEquals(HyperUniquesAggregatorFactory.estimateCardinality(emptyHyperLogLogCollector, false).getClass(), HyperUniquesAggregatorFactory.estimateCardinality(null, false).getClass());
}
Also used : HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) VersionZeroHyperLogLogCollector(org.apache.druid.hll.VersionZeroHyperLogLogCollector) Test(org.junit.Test)

Example 8 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.

@Test
public void testSanity() throws IOException {
    HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
    int[] cases = { 1000, 5000, 10000, 20000 };
    int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Long.BYTES, 2500 * Long.BYTES };
    for (int columnSize : columnSizes) {
        for (int aCase : cases) {
            File tmpFile = temporaryFolder.newFolder();
            HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
            try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium();
                FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
                LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(segmentWriteOutMedium, "test", serde.getObjectStrategy(), columnSize);
                serializer.open();
                for (int i = 0; i < aCase; i++) {
                    HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                    byte[] hashBytes = fn.hashLong(i).asBytes();
                    collector.add(hashBytes);
                    baseCollector.fold(collector);
                    serializer.serialize(new ObjectColumnSelector() {

                        @Nullable
                        @Override
                        public Object getObject() {
                            return collector;
                        }

                        @Override
                        public Class classOfObject() {
                            return HyperLogLogCollector.class;
                        }

                        @Override
                        public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
                        // doesn't matter in tests
                        }
                    });
                }
                try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
                    serializer.writeTo(channel, v9Smoosher);
                }
            }
            SmooshedFileMapper mapper = Smoosh.map(tmpFile);
            final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
            serde.deserializeColumn(mapper.mapFile("test"), builder, null);
            ColumnHolder columnHolder = builder.build();
            ComplexColumn complexColumn = (ComplexColumn) columnHolder.getColumn();
            HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
            for (int i = 0; i < aCase; i++) {
                collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
            }
            Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
        }
    }
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) ColumnBuilder(org.apache.druid.segment.column.ColumnBuilder) File(java.io.File) Nullable(javax.annotation.Nullable) SmooshedFileMapper(org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper) ComplexColumn(org.apache.druid.segment.column.ComplexColumn) ObjectColumnSelector(org.apache.druid.segment.ObjectColumnSelector) Test(org.junit.Test)

Example 9 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniquesSerdeForTest method getExtractor.

@Override
public ComplexMetricExtractor getExtractor() {
    return new ComplexMetricExtractor() {

        @Override
        public Class<HyperLogLogCollector> extractedClass() {
            return HyperLogLogCollector.class;
        }

        @Override
        public HyperLogLogCollector extractValue(InputRow inputRow, String metricName) {
            Object rawValue = inputRow.getRaw(metricName);
            if (rawValue instanceof HyperLogLogCollector) {
                return (HyperLogLogCollector) rawValue;
            } else {
                HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                List<String> dimValues = inputRow.getDimension(metricName);
                if (dimValues == null) {
                    return collector;
                }
                for (String dimensionValue : dimValues) {
                    collector.add(hashFn.hashBytes(StringUtils.toUtf8(dimensionValue)).asBytes());
                }
                return collector;
            }
        }
    };
}
Also used : HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) InputRow(org.apache.druid.data.input.InputRow)

Example 10 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class BatchDeltaIngestionTest method verifyRows.

private void verifyRows(List<ImmutableMap<String, Object>> expectedRows, List<InputRow> actualRows, List<String> expectedDimensions, List<String> expectedMetrics) {
    Assert.assertEquals(expectedRows.size(), actualRows.size());
    for (int i = 0; i < expectedRows.size(); i++) {
        Map<String, Object> expected = expectedRows.get(i);
        InputRow actual = actualRows.get(i);
        Assert.assertEquals(expected.get("time"), actual.getTimestamp());
        Assert.assertEquals(expectedDimensions, actual.getDimensions());
        expectedDimensions.forEach(s -> Assert.assertEquals(expected.get(s), actual.getDimension(s)));
        for (String metric : expectedMetrics) {
            Object actualValue = actual.getRaw(metric);
            if (actualValue instanceof HyperLogLogCollector) {
                Assert.assertEquals((Double) expected.get(metric), (Double) HyperUniquesAggregatorFactory.estimateCardinality(actualValue, false), 0.001);
            } else {
                Assert.assertEquals(expected.get(metric), actual.getMetric(metric));
            }
        }
    }
}
Also used : HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) InputRow(org.apache.druid.data.input.InputRow)

Aggregations

HyperLogLogCollector (org.apache.druid.hll.HyperLogLogCollector)41 Test (org.junit.Test)12 Random (java.util.Random)4 InputRow (org.apache.druid.data.input.InputRow)4 ByteBuffer (java.nio.ByteBuffer)3 ArrayList (java.util.ArrayList)3 Comparator (java.util.Comparator)3 HashMap (java.util.HashMap)3 VersionZeroHyperLogLogCollector (org.apache.druid.hll.VersionZeroHyperLogLogCollector)3 Expr (org.apache.druid.math.expr.Expr)3 ExprEval (org.apache.druid.math.expr.ExprEval)3 HyperUniquesAggregatorFactory (org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)3 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)3 Interval (org.joda.time.Interval)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 Optional (com.google.common.base.Optional)2 ImmutableList (com.google.common.collect.ImmutableList)2 HashFunction (com.google.common.hash.HashFunction)2 File (java.io.File)2 List (java.util.List)2