Search in sources :

Example 11 with HyperUniquesAggregatorFactory

use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class MetricManipulatorFnsTest method constructorFeeder.

@Parameterized.Parameters(name = "{0}")
public static Iterable<Object[]> constructorFeeder() {
    final ArrayList<Object[]> constructorArrays = new ArrayList<>();
    final long longVal = 13789;
    LongMinAggregator longMinAggregator = new LongMinAggregator(new TestLongColumnSelector() {

        @Override
        public long get() {
            return longVal;
        }
    });
    LongMinAggregatorFactory longMinAggregatorFactory = new LongMinAggregatorFactory(NAME, FIELD);
    constructorArrays.add(new Object[] { longMinAggregatorFactory, longMinAggregator, longMinAggregator, longMinAggregator, longVal, longVal });
    HyperUniquesAggregatorFactory hyperUniquesAggregatorFactory = new HyperUniquesAggregatorFactory(NAME, FIELD);
    HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
    collector.add((short) 1, (byte) 5);
    constructorArrays.add(new Object[] { hyperUniquesAggregatorFactory, collector, collector, collector.estimateCardinality(), collector.toByteArray(), collector });
    LongSumAggregatorFactory longSumAggregatorFactory = new LongSumAggregatorFactory(NAME, FIELD);
    LongSumAggregator longSumAggregator = new LongSumAggregator(new TestLongColumnSelector() {

        @Override
        public long get() {
            return longVal;
        }
    });
    constructorArrays.add(new Object[] { longSumAggregatorFactory, longSumAggregator, longSumAggregator, longSumAggregator, longVal, longVal });
    for (Object[] argList : constructorArrays) {
        Assert.assertEquals(String.format("Arglist %s is too short. Expected 6 found %d", Arrays.toString(argList), argList.length), 6, argList.length);
    }
    return constructorArrays;
}
Also used : HyperLogLogCollector(io.druid.hll.HyperLogLogCollector) ArrayList(java.util.ArrayList) TestLongColumnSelector(io.druid.segment.TestLongColumnSelector) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)

Example 12 with HyperUniquesAggregatorFactory

use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class SchemaEvolutionTest method setUp.

@Before
public void setUp() throws IOException {
    // Index1: c1 is a string, c2 nonexistent, "uniques" nonexistent
    index1 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt") }).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c1"))).buildMMappedIndex();
    // Index2: c1 is a long, c2 is a string, "uniques" is uniques on c2
    index2 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("c1", "c1"), new HyperUniquesAggregatorFactory("uniques", "c2") }).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c2"))).buildMMappedIndex();
    // Index3: c1 is a float, c2 is a string, "uniques" is uniques on c2
    index3 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("c1", "c1"), new HyperUniquesAggregatorFactory("uniques", "c2") }).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c2"))).buildMMappedIndex();
    // Index4: c1 is nonexistent, c2 is uniques on c2
    index4 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new HyperUniquesAggregatorFactory("c2", "c2") }).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.<String>of())).buildMMappedIndex();
    if (index4.getAvailableDimensions().size() != 0) {
        // Just double-checking that the exclusions are working properly
        throw new ISE("WTF?! Expected no dimensions in index4");
    }
}
Also used : CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) ISE(io.druid.java.util.common.ISE) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema) Before(org.junit.Before)

Example 13 with HyperUniquesAggregatorFactory

use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class SchemaEvolutionTest method testHyperUniqueEvolutionTimeseries.

@Test
public void testHyperUniqueEvolutionTimeseries() {
    final TimeseriesQueryRunnerFactory factory = QueryRunnerTestHelper.newTimeseriesQueryRunnerFactory();
    final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(DATA_SOURCE).intervals("1000/3000").aggregators(ImmutableList.<AggregatorFactory>of(new HyperUniquesAggregatorFactory("uniques", "uniques"))).build();
    // index1 has no "uniques" column
    Assert.assertEquals(timeseriesResult(ImmutableMap.of("uniques", 0)), runQuery(query, factory, ImmutableList.of(index1)));
    // index1 (no uniques) + index2 and index3 (yes uniques); we should be able to combine
    Assert.assertEquals(timeseriesResult(ImmutableMap.of("uniques", 4.003911343725148d)), runQuery(query, factory, ImmutableList.of(index1, index2, index3)));
}
Also used : TimeseriesQueryRunnerFactory(io.druid.query.timeseries.TimeseriesQueryRunnerFactory) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) Test(org.junit.Test)

Example 14 with HyperUniquesAggregatorFactory

use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class TopNTypeInterfaceBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
    BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
        queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
        queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
        // Use an IdentityExtractionFn to force usage of DimExtractionTopNAlgorithm
        TopNQueryBuilder queryBuilderString = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension(new ExtractionDimensionSpec("dimSequential", "dimSequential", IdentityExtractionFn.getInstance())).metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        // DimExtractionTopNAlgorithm is always used for numeric columns
        TopNQueryBuilder queryBuilderLong = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metLongUniform").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        TopNQueryBuilder queryBuilderFloat = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metFloatNormal").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("string", queryBuilderString);
        basicQueries.put("long", queryBuilderLong);
        basicQueries.put("float", queryBuilderFloat);
    }
    {
        // basic.numericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("numericSort", queryBuilderA);
    }
    {
        // basic.alphanumericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("alphanumericSort", queryBuilderA);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
Also used : TopNQueryBuilder(io.druid.query.topn.TopNQueryBuilder) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(io.druid.query.spec.MultipleIntervalSegmentSpec) DoubleMinAggregatorFactory(io.druid.query.aggregation.DoubleMinAggregatorFactory) LinkedHashMap(java.util.LinkedHashMap) DimensionTopNMetricSpec(io.druid.query.topn.DimensionTopNMetricSpec) BenchmarkSchemaInfo(io.druid.benchmark.datagen.BenchmarkSchemaInfo) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) QuerySegmentSpec(io.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) LongMaxAggregatorFactory(io.druid.query.aggregation.LongMaxAggregatorFactory) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec)

Example 15 with HyperUniquesAggregatorFactory

use of io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory in project druid by druid-io.

the class GroupByQueryRunnerTest method testSubqueryWithHyperUniquesPostAggregator.

@Test
public void testSubqueryWithHyperUniquesPostAggregator() {
    GroupByQuery subquery = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList()).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"), new HyperUniquesAggregatorFactory("quality_uniques_inner", "quality_uniques"))).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(new FieldAccessPostAggregator("quality_uniques_inner_post", "quality_uniques_inner"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
    GroupByQuery query = GroupByQuery.builder().setDataSource(subquery).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList()).setAggregatorSpecs(Arrays.asList(new LongSumAggregatorFactory("rows", "rows"), new LongSumAggregatorFactory("idx", "idx"), new HyperUniquesAggregatorFactory("quality_uniques_outer", "quality_uniques_inner_post"))).setPostAggregatorSpecs(Arrays.<PostAggregator>asList(new HyperUniqueFinalizingPostAggregator("quality_uniques_outer_post", "quality_uniques_outer"))).setGranularity(QueryRunnerTestHelper.allGran).build();
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "rows", 26L, "idx", 12446L, "quality_uniques_outer", 9.019833517963864, "quality_uniques_outer_post", 9.019833517963864));
    // Subqueries are handled by the ToolChest
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "");
}
Also used : FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) Row(io.druid.data.input.Row) Test(org.junit.Test)

Aggregations

HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)19 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)16 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)11 Test (org.junit.Test)9 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)8 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)6 ImmutableMap (com.google.common.collect.ImmutableMap)4 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)4 TimestampSpec (io.druid.data.input.impl.TimestampSpec)4 DoubleMinAggregatorFactory (io.druid.query.aggregation.DoubleMinAggregatorFactory)4 LongMaxAggregatorFactory (io.druid.query.aggregation.LongMaxAggregatorFactory)4 ArrayList (java.util.ArrayList)4 Interval (org.joda.time.Interval)4 BenchmarkSchemaInfo (io.druid.benchmark.datagen.BenchmarkSchemaInfo)3 InputRow (io.druid.data.input.InputRow)3 Row (io.druid.data.input.Row)3 StringInputRowParser (io.druid.data.input.impl.StringInputRowParser)3 FilteredAggregatorFactory (io.druid.query.aggregation.FilteredAggregatorFactory)3 CardinalityAggregatorFactory (io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)3 HyperUniqueFinalizingPostAggregator (io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator)3