Search in sources :

Example 1 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class ScanQueryRunnerTest method toEvents.

private List<List<Map<String, Object>>> toEvents(final String[] dimSpecs, final String[]... valueSet) {
    List<String> values = new ArrayList<>();
    for (String[] vSet : valueSet) {
        values.addAll(Arrays.asList(vSet));
    }
    List<List<Map<String, Object>>> events = new ArrayList<>();
    events.add(Lists.newArrayList(Iterables.transform(values, input -> {
        Map<String, Object> event = new HashMap<>();
        String[] values1 = input.split("\\t");
        for (int i = 0; i < dimSpecs.length; i++) {
            if (dimSpecs[i] == null || i >= dimSpecs.length) {
                continue;
            }
            // they don't appear in the source data.
            if (dimSpecs[i].equals(EXPR_COLUMN.getOutputName())) {
                event.put(EXPR_COLUMN.getOutputName(), (double) event.get(QueryRunnerTestHelper.INDEX_METRIC) * 2);
                continue;
            } else if (dimSpecs[i].equals("indexMin")) {
                event.put("indexMin", (double) event.get(QueryRunnerTestHelper.INDEX_METRIC));
                continue;
            } else if (dimSpecs[i].equals("indexFloat")) {
                event.put("indexFloat", (float) (double) event.get(QueryRunnerTestHelper.INDEX_METRIC));
                continue;
            } else if (dimSpecs[i].equals("indexMaxPlusTen")) {
                event.put("indexMaxPlusTen", (double) event.get(QueryRunnerTestHelper.INDEX_METRIC) + 10);
                continue;
            } else if (dimSpecs[i].equals("indexMinFloat")) {
                event.put("indexMinFloat", (float) (double) event.get(QueryRunnerTestHelper.INDEX_METRIC));
                continue;
            } else if (dimSpecs[i].equals("indexMaxFloat")) {
                event.put("indexMaxFloat", (float) (double) event.get(QueryRunnerTestHelper.INDEX_METRIC));
                continue;
            } else if (dimSpecs[i].equals("quality_uniques")) {
                final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                collector.add(Hashing.murmur3_128().hashBytes(StringUtils.toUtf8((String) event.get("quality"))).asBytes());
                event.put("quality_uniques", collector);
            }
            if (i >= values1.length) {
                continue;
            }
            String[] specs = dimSpecs[i].split(":");
            Object eventVal;
            if (specs.length == 1 || specs[1].equals("STRING")) {
                eventVal = values1[i];
            } else if (specs[1].equals("TIME")) {
                eventVal = toTimestamp(values1[i]);
            } else if (specs[1].equals("FLOAT")) {
                eventVal = values1[i].isEmpty() ? NullHandling.defaultFloatValue() : Float.valueOf(values1[i]);
            } else if (specs[1].equals("DOUBLE")) {
                eventVal = values1[i].isEmpty() ? NullHandling.defaultDoubleValue() : Double.valueOf(values1[i]);
            } else if (specs[1].equals("LONG")) {
                eventVal = values1[i].isEmpty() ? NullHandling.defaultLongValue() : Long.valueOf(values1[i]);
            } else if (specs[1].equals(("NULL"))) {
                eventVal = null;
            } else if (specs[1].equals("STRINGS")) {
                eventVal = Arrays.asList(values1[i].split("\u0001"));
            } else {
                eventVal = values1[i];
            }
            event.put(specs[0], eventVal);
        }
        return event;
    })));
    return events;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList)

Example 2 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniqueExpressionsTest method testDouble.

@Test
public void testDouble() {
    Expr expr = Parser.parse("hyper_unique_add(1.234, hyper_unique())", MACRO_TABLE);
    ExprEval eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(1.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
    expr = Parser.parse("hyper_unique_add(1.234, hyper_unique_add(5.678, hyper_unique()))", MACRO_TABLE);
    eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(2.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
    expr = Parser.parse("hyper_unique_add(double, hyper_unique())", MACRO_TABLE);
    eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(1.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
    expr = Parser.parse("hyper_unique_add(nullDouble, hyper_unique())", MACRO_TABLE);
    eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 1.0 : 0.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
}
Also used : ExprEval(org.apache.druid.math.expr.ExprEval) Expr(org.apache.druid.math.expr.Expr) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 3 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniqueExpressionsTest method testCreate.

@Test
public void testCreate() {
    Expr expr = Parser.parse("hyper_unique()", MACRO_TABLE);
    ExprEval eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(0.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0);
}
Also used : ExprEval(org.apache.druid.math.expr.ExprEval) Expr(org.apache.druid.math.expr.Expr) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 4 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniqueExpressionsTest method testString.

@Test
public void testString() {
    Expr expr = Parser.parse("hyper_unique_add('foo', hyper_unique())", MACRO_TABLE);
    ExprEval eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(1.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
    expr = Parser.parse("hyper_unique_add('bar', hyper_unique_add('foo', hyper_unique()))", MACRO_TABLE);
    eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(2.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
    expr = Parser.parse("hyper_unique_add(string, hyper_unique())", MACRO_TABLE);
    eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(1.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
    expr = Parser.parse("hyper_unique_add(nullString, hyper_unique())", MACRO_TABLE);
    eval = expr.eval(inputBindings);
    Assert.assertEquals(HyperUniqueExpressions.TYPE, eval.type());
    Assert.assertTrue(eval.value() instanceof HyperLogLogCollector);
    Assert.assertEquals(NullHandling.replaceWithDefault() ? 1.0 : 0.0, ((HyperLogLogCollector) eval.value()).estimateCardinality(), 0.01);
}
Also used : ExprEval(org.apache.druid.math.expr.ExprEval) Expr(org.apache.druid.math.expr.Expr) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 5 with HyperLogLogCollector

use of org.apache.druid.hll.HyperLogLogCollector in project druid by druid-io.

the class HyperUniquesAggregatorFactoryTest method testCompareToShouldBehaveConsistentlyWithEstimatedCardinalitiesEvenInToughCases.

@Test
public void testCompareToShouldBehaveConsistentlyWithEstimatedCardinalitiesEvenInToughCases() {
    // given
    Random rand = new Random(0);
    HyperUniquesAggregatorFactory factory = new HyperUniquesAggregatorFactory("foo", "bar");
    Comparator comparator = factory.getComparator();
    for (int i = 0; i < 1000; ++i) {
        // given
        HyperLogLogCollector leftCollector = HyperLogLogCollector.makeLatestCollector();
        int j = rand.nextInt(9000) + 5000;
        for (int l = 0; l < j; ++l) {
            leftCollector.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        HyperLogLogCollector rightCollector = HyperLogLogCollector.makeLatestCollector();
        int k = rand.nextInt(9000) + 5000;
        for (int l = 0; l < k; ++l) {
            rightCollector.add(fn.hashLong(rand.nextLong()).asBytes());
        }
        // when
        final int orderedByCardinality = Double.compare(leftCollector.estimateCardinality(), rightCollector.estimateCardinality());
        final int orderedByComparator = comparator.compare(leftCollector, rightCollector);
        // then, assert hyperloglog comparator behaves consistently with estimated cardinalities
        Assert.assertEquals(StringUtils.format("orderedByComparator=%d, orderedByCardinality=%d,\n" + "Left={cardinality=%f, hll=%s},\n" + "Right={cardinality=%f, hll=%s},\n", orderedByComparator, orderedByCardinality, leftCollector.estimateCardinality(), leftCollector, rightCollector.estimateCardinality(), rightCollector), orderedByCardinality, orderedByComparator);
    }
}
Also used : Random(java.util.Random) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) VersionZeroHyperLogLogCollector(org.apache.druid.hll.VersionZeroHyperLogLogCollector) Comparator(java.util.Comparator) Test(org.junit.Test)

Aggregations

HyperLogLogCollector (org.apache.druid.hll.HyperLogLogCollector)41 Test (org.junit.Test)12 Random (java.util.Random)4 InputRow (org.apache.druid.data.input.InputRow)4 ByteBuffer (java.nio.ByteBuffer)3 ArrayList (java.util.ArrayList)3 Comparator (java.util.Comparator)3 HashMap (java.util.HashMap)3 VersionZeroHyperLogLogCollector (org.apache.druid.hll.VersionZeroHyperLogLogCollector)3 Expr (org.apache.druid.math.expr.Expr)3 ExprEval (org.apache.druid.math.expr.ExprEval)3 HyperUniquesAggregatorFactory (org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)3 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)3 Interval (org.joda.time.Interval)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 Optional (com.google.common.base.Optional)2 ImmutableList (com.google.common.collect.ImmutableList)2 HashFunction (com.google.common.hash.HashFunction)2 File (java.io.File)2 List (java.util.List)2