use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.
@Test
public void testSanity() throws IOException {
HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
int[] cases = { 1000, 5000, 10000, 20000 };
int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Longs.BYTES, 2500 * Longs.BYTES };
for (int columnSize : columnSizes) {
for (int aCase : cases) {
File tmpFile = FileUtils.getTempDirectory();
HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
try (IOPeon peon = new TmpFileIOPeon();
FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(peon, "test", serde.getObjectStrategy(), columnSize);
serializer.open();
for (int i = 0; i < aCase; i++) {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
byte[] hashBytes = fn.hashLong(i).asBytes();
collector.add(hashBytes);
baseCollector.fold(collector);
serializer.serialize(collector);
}
serializer.close();
try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
serializer.writeToChannel(channel, v9Smoosher);
}
}
SmooshedFileMapper mapper = Smoosh.map(tmpFile);
final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
serde.deserializeColumn(mapper.mapFile("test"), builder);
Column column = builder.build();
ComplexColumn complexColumn = column.getComplexColumn();
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
for (int i = 0; i < aCase; i++) {
collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
}
Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
}
}
}
use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class CachingClusteredClientTest method testGroupByCaching.
@Test
public void testGroupByCaching() throws Exception {
List<AggregatorFactory> aggsWithUniques = ImmutableList.<AggregatorFactory>builder().addAll(AGGS).add(new HyperUniquesAggregatorFactory("uniques", "uniques")).build();
final HashFunction hashFn = Hashing.murmur3_128();
GroupByQuery.Builder builder = new GroupByQuery.Builder().setDataSource(DATA_SOURCE).setQuerySegmentSpec(SEG_SPEC).setDimFilter(DIM_FILTER).setGranularity(GRANULARITY).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec("a", "a"))).setAggregatorSpecs(aggsWithUniques).setPostAggregatorSpecs(POST_AGGS).setContext(CONTEXT);
final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
collector.add(hashFn.hashString("abc123", Charsets.UTF_8).asBytes());
collector.add(hashFn.hashString("123abc", Charsets.UTF_8).asBytes());
testQueryCaching(client, builder.build(), new Interval("2011-01-01/2011-01-02"), makeGroupByResults(new DateTime("2011-01-01"), ImmutableMap.of("a", "a", "rows", 1, "imps", 1, "impers", 1, "uniques", collector)), new Interval("2011-01-02/2011-01-03"), makeGroupByResults(new DateTime("2011-01-02"), ImmutableMap.of("a", "b", "rows", 2, "imps", 2, "impers", 2, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)));
QueryRunner runner = new FinalizeResultsQueryRunner(client, GroupByQueryRunnerTest.makeQueryRunnerFactory(new GroupByQueryConfig()).getToolchest());
HashMap<String, Object> context = new HashMap<String, Object>();
TestHelper.assertExpectedObjects(makeGroupByResults(new DateTime("2011-01-05T"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), runner.run(builder.setInterval("2011-01-05/2011-01-10").build(), context), "");
}
use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniquesSerde method getExtractor.
@Override
public ComplexMetricExtractor getExtractor() {
return new ComplexMetricExtractor() {
@Override
public Class<HyperLogLogCollector> extractedClass() {
return HyperLogLogCollector.class;
}
@Override
public HyperLogLogCollector extractValue(InputRow inputRow, String metricName) {
Object rawValue = inputRow.getRaw(metricName);
if (rawValue instanceof HyperLogLogCollector) {
return (HyperLogLogCollector) rawValue;
} else {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
List<String> dimValues = inputRow.getDimension(metricName);
if (dimValues == null) {
return collector;
}
for (String dimensionValue : dimValues) {
collector.add(hyperLogLogHash.hash(dimensionValue));
}
return collector;
}
}
};
}
use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniqueFinalizingPostAggregatorTest method testCompute.
@Test
public void testCompute() throws Exception {
Random random = new Random(0L);
HyperUniqueFinalizingPostAggregator postAggregator = new HyperUniqueFinalizingPostAggregator("uniques", "uniques");
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
for (int i = 0; i < 100; ++i) {
byte[] hashedVal = fn.hashLong(random.nextLong()).asBytes();
collector.add(hashedVal);
}
double cardinality = (Double) postAggregator.compute(ImmutableMap.<String, Object>of("uniques", collector));
Assert.assertTrue(cardinality == 99.37233005831612);
}
use of io.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniquesAggregatorFactoryTest method testCompare2.
@Test
public void testCompare2() throws Exception {
Random rand = new Random(0);
HyperUniquesAggregatorFactory factory = new HyperUniquesAggregatorFactory("foo", "bar");
Comparator comparator = factory.getComparator();
for (int i = 1; i < 1000; ++i) {
HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
int j = rand.nextInt(50);
for (int l = 0; l < j; ++l) {
collector1.add(fn.hashLong(rand.nextLong()).asBytes());
}
HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
int k = j + 1 + rand.nextInt(5);
for (int l = 0; l < k; ++l) {
collector2.add(fn.hashLong(rand.nextLong()).asBytes());
}
Assert.assertEquals(Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()), comparator.compare(collector1, collector2));
}
for (int i = 1; i < 100; ++i) {
HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
int j = rand.nextInt(500);
for (int l = 0; l < j; ++l) {
collector1.add(fn.hashLong(rand.nextLong()).asBytes());
}
HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
int k = j + 2 + rand.nextInt(5);
for (int l = 0; l < k; ++l) {
collector2.add(fn.hashLong(rand.nextLong()).asBytes());
}
Assert.assertEquals(Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()), comparator.compare(collector1, collector2));
}
for (int i = 1; i < 10; ++i) {
HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector();
int j = rand.nextInt(100000);
for (int l = 0; l < j; ++l) {
collector1.add(fn.hashLong(rand.nextLong()).asBytes());
}
HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector();
int k = j + 20000 + rand.nextInt(100000);
for (int l = 0; l < k; ++l) {
collector2.add(fn.hashLong(rand.nextLong()).asBytes());
}
Assert.assertEquals(Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality()), comparator.compare(collector1, collector2));
}
}
Aggregations