use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class ScanQueryRunnerTest method toEvents.
private List<List<Map<String, Object>>> toEvents(final String[] dimSpecs, final String[]... valueSet) {
List<String> values = new ArrayList<>();
for (String[] vSet : valueSet) {
values.addAll(Arrays.asList(vSet));
}
List<List<Map<String, Object>>> events = new ArrayList<>();
events.add(Lists.newArrayList(Iterables.transform(values, input -> {
Map<String, Object> event = new HashMap<>();
String[] values1 = input.split("\\t");
for (int i = 0; i < dimSpecs.length; i++) {
if (dimSpecs[i] == null || i >= dimSpecs.length) {
continue;
}
// they don't appear in the source data.
if (dimSpecs[i].equals(EXPR_COLUMN.getOutputName())) {
event.put(EXPR_COLUMN.getOutputName(), (double) event.get(QueryRunnerTestHelper.INDEX_METRIC) * 2);
continue;
} else if (dimSpecs[i].equals("indexMin")) {
event.put("indexMin", (double) event.get(QueryRunnerTestHelper.INDEX_METRIC));
continue;
} else if (dimSpecs[i].equals("indexFloat")) {
event.put("indexFloat", (float) (double) event.get(QueryRunnerTestHelper.INDEX_METRIC));
continue;
} else if (dimSpecs[i].equals("indexMaxPlusTen")) {
event.put("indexMaxPlusTen", (double) event.get(QueryRunnerTestHelper.INDEX_METRIC) + 10);
continue;
} else if (dimSpecs[i].equals("indexMinFloat")) {
event.put("indexMinFloat", (float) (double) event.get(QueryRunnerTestHelper.INDEX_METRIC));
continue;
} else if (dimSpecs[i].equals("indexMaxFloat")) {
event.put("indexMaxFloat", (float) (double) event.get(QueryRunnerTestHelper.INDEX_METRIC));
continue;
} else if (dimSpecs[i].equals("quality_uniques")) {
final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
collector.add(Hashing.murmur3_128().hashBytes(StringUtils.toUtf8((String) event.get("quality"))).asBytes());
event.put("quality_uniques", collector);
}
if (i >= values1.length) {
continue;
}
String[] specs = dimSpecs[i].split(":");
Object eventVal;
if (specs.length == 1 || specs[1].equals("STRING")) {
eventVal = values1[i];
} else if (specs[1].equals("TIME")) {
eventVal = toTimestamp(values1[i]);
} else if (specs[1].equals("FLOAT")) {
eventVal = values1[i].isEmpty() ? NullHandling.defaultFloatValue() : Float.valueOf(values1[i]);
} else if (specs[1].equals("DOUBLE")) {
eventVal = values1[i].isEmpty() ? NullHandling.defaultDoubleValue() : Double.valueOf(values1[i]);
} else if (specs[1].equals("LONG")) {
eventVal = values1[i].isEmpty() ? NullHandling.defaultLongValue() : Long.valueOf(values1[i]);
} else if (specs[1].equals(("NULL"))) {
eventVal = null;
} else if (specs[1].equals("STRINGS")) {
eventVal = Arrays.asList(values1[i].split("\u0001"));
} else {
eventVal = values1[i];
}
event.put(specs[0], eventVal);
}
return event;
})));
return events;
}
use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniqueExpressionsTest method testDouble.
@Test
public void testDouble() {
Expr expr = Parser.parse("hyper_unique_add(1.234, hyper_unique())", MACRO_TABLE);
ExprEval eval = expr.eval(inputBindings);
Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
Assert.assertEquals(1.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
expr = Parser.parse("hyper_unique_add(1.234, hyper_unique_add(5.678, hyper_unique()))", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
Assert.assertEquals(2.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
expr = Parser.parse("hyper_unique_add(double, hyper_unique())", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
Assert.assertEquals(1.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
expr = Parser.parse("hyper_unique_add(nullDouble, hyper_unique())", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
Assert.assertEquals(NullHandling.replaceWithDefault() ? 1.0 : 0.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
}
use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniqueExpressionsTest method testCreate.
@Test
public void testCreate() {
Expr expr = Parser.parse("hyper_unique()", MACRO_TABLE);
ExprEval eval = expr.eval(inputBindings);
Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
Assert.assertEquals(0.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0);
}
use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniqueExpressionsTest method testString.
@Test
public void testString() {
Expr expr = Parser.parse("hyper_unique_add('foo', hyper_unique())", MACRO_TABLE);
ExprEval eval = expr.eval(inputBindings);
Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
Assert.assertEquals(1.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
expr = Parser.parse("hyper_unique_add('bar', hyper_unique_add('foo', hyper_unique()))", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
Assert.assertEquals(2.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
expr = Parser.parse("hyper_unique_add(string, hyper_unique())", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
Assert.assertEquals(1.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
expr = Parser.parse("hyper_unique_add(nullString, hyper_unique())", MACRO_TABLE);
eval = expr.eval(inputBindings);
Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
Assert.assertEquals(NullHandling.replaceWithDefault() ? 1.0 : 0.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
}
use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.
the class HyperUniquesAggregatorFactoryTest method testCompareToShouldBehaveConsistentlyWithEstimatedCardinalitiesEvenInToughCases.
@Test
public void testCompareToShouldBehaveConsistentlyWithEstimatedCardinalitiesEvenInToughCases() {
// given
Random rand = new Random(0);
HyperUniquesAggregatorFactory factory = new HyperUniquesAggregatorFactory("foo", "bar");
Comparator comparator = factory.getComparator();
for (int i = 0; i < 1000; ++i) {
// given
HyperLogLogCollector leftCollector = HyperLogLogCollector.makeLatestCollector();
int j = rand.nextInt(9000) + 5000;
for (int l = 0; l < j; ++l) {
leftCollector.add(fn.hashLong(rand.nextLong()).asBytes());
}
HyperLogLogCollector rightCollector = HyperLogLogCollector.makeLatestCollector();
int k = rand.nextInt(9000) + 5000;
for (int l = 0; l < k; ++l) {
rightCollector.add(fn.hashLong(rand.nextLong()).asBytes());
}
// when
final int orderedByCardinality = Double.compare(leftCollector.estimateCardinality(), rightCollector.estimateCardinality());
final int orderedByComparator = comparator.compare(leftCollector, rightCollector);
// then, assert hyperloglog comparator behaves consistently with estimated cardinalities
Assert.assertEquals(StringUtils.format("orderedByComparator=%d, orderedByCardinality=%d,\n" + "Left={cardinality=%f, hll=%s},\n" + "Right={cardinality=%f, hll=%s},\n", orderedByComparator, orderedByCardinality, leftCollector.estimateCardinality(), leftCollector, rightCollector.estimateCardinality(), rightCollector), orderedByCardinality, orderedByComparator);
}
}
Aggregations