use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.
public void testSanity() throws IOException {
HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
int[] cases = { 1000, 5000, 10000, 20000 };
int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Longs.BYTES, 2500 * Longs.BYTES };
for (int columnSize : columnSizes) {
for (int aCase : cases) {
File tmpFile = FileUtils.getTempDirectory();
HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
try (IOPeon peon = new TmpFileIOPeon();
FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(peon, "test", serde.getObjectStrategy(), columnSize);;
for (int i = 0; i < aCase; i++) {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
byte[] hashBytes = fn.hashLong(i).asBytes();
try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
serializer.writeToChannel(channel, v9Smoosher);
SmooshedFileMapper mapper =;
final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
serde.deserializeColumn(mapper.mapFile("test"), builder);
Column column =;
ComplexColumn complexColumn = column.getComplexColumn();
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
for (int i = 0; i < aCase; i++) {
collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class CachingClusteredClientTest method testGroupByCaching.
public void testGroupByCaching() throws Exception {
List<AggregatorFactory> aggsWithUniques = ImmutableList.<AggregatorFactory>builder().addAll(AGGS).add(new HyperUniquesAggregatorFactory("uniques", "uniques")).build();
final HashFunction hashFn = Hashing.murmur3_128();
GroupByQuery.Builder builder = new GroupByQuery.Builder().setDataSource(DATA_SOURCE).setQuerySegmentSpec(SEG_SPEC).setDimFilter(DIM_FILTER).setGranularity(GRANULARITY).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec("a", "a"))).setAggregatorSpecs(aggsWithUniques).setPostAggregatorSpecs(POST_AGGS).setContext(CONTEXT);
final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
collector.add(hashFn.hashString("abc123", Charsets.UTF_8).asBytes());
collector.add(hashFn.hashString("123abc", Charsets.UTF_8).asBytes());
testQueryCaching(client,, new Interval("2011-01-01/2011-01-02"), makeGroupByResults(new DateTime("2011-01-01"), ImmutableMap.of("a", "a", "rows", 1, "imps", 1, "impers", 1, "uniques", collector)), new Interval("2011-01-02/2011-01-03"), makeGroupByResults(new DateTime("2011-01-02"), ImmutableMap.of("a", "b", "rows", 2, "imps", 2, "impers", 2, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)));
QueryRunner runner = new FinalizeResultsQueryRunner(client, GroupByQueryRunnerTest.makeQueryRunnerFactory(new GroupByQueryConfig()).getToolchest());
HashMap<String, Object> context = new HashMap<String, Object>();
TestHelper.assertExpectedObjects(makeGroupByResults(new DateTime("2011-01-05T"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)),"2011-01-05/2011-01-10").build(), context), "");
use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniquesSerde method getExtractor.
public ComplexMetricExtractor getExtractor() {
return new ComplexMetricExtractor() {
public Class<HyperLogLogCollector> extractedClass() {
return HyperLogLogCollector.class;
public HyperLogLogCollector extractValue(InputRow inputRow, String metricName) {
Object rawValue = inputRow.getRaw(metricName);
if (rawValue instanceof HyperLogLogCollector) {
return (HyperLogLogCollector) rawValue;
} else {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
List<String> dimValues = inputRow.getDimension(metricName);
if (dimValues == null) {
return collector;
for (String dimensionValue : dimValues) {
return collector;
use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniqueFinalizingPostAggregatorTest method testCompute.
public void testCompute() throws Exception {
Random random = new Random(0L);
HyperUniqueFinalizingPostAggregator postAggregator = new HyperUniqueFinalizingPostAggregator("uniques", "uniques");
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
for (int i = 0; i < 100; ++i) {
byte[] hashedVal = fn.hashLong(random.nextLong()).asBytes();
double cardinality = (Double) postAggregator.compute(ImmutableMap.<String, Object>of("uniques", collector));
Assert.assertTrue(cardinality == 99.37233005831612);
use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniquesAggregatorFactoryTest method testCompare2.
public void testCompare2() throws Exception {
Random rand = new Random(0);
HyperUniquesAggregatorFactory factory = new HyperUniquesAggregatorFactory("foo", "bar");
Comparator comparator = factory.getComparator();
for (int i = 1; i < 1000; ++i) {
HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
int j = rand.nextInt(50);
for (int l = 0; l < j; ++l) {
HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
int k = j + 1 + rand.nextInt(5);
for (int l = 0; l < k; ++l) {
Assert.assertEquals(, collector2.estimateCardinality()),, collector2));
for (int i = 1; i < 100; ++i) {
HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
int j = rand.nextInt(500);
for (int l = 0; l < j; ++l) {
HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
int k = j + 2 + rand.nextInt(5);
for (int l = 0; l < k; ++l) {
Assert.assertEquals(, collector2.estimateCardinality()),, collector2));
for (int i = 1; i < 10; ++i) {
HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
int j = rand.nextInt(100000);
for (int l = 0; l < j; ++l) {
HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
int k = j + 20000 + rand.nextInt(100000);
for (int l = 0; l < k; ++l) {
Assert.assertEquals(, collector2.estimateCardinality()),, collector2));