Examples with GroupByQuery - org.apache.druid.query.groupby.GroupByQuery

Example 61 with GroupByQuery

use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class GroupByBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, GroupByQuery> basicQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new CountAggregatorFactory("cnt"));
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        basicQueries.put("A", queryA);
    }
    {
        // basic.sorted
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec("sumLongSequential", OrderByColumnSpec.Direction.DESCENDING, StringComparators.NUMERIC)), 100)).build();
        basicQueries.put("sorted", queryA);
    }
    {
        // basic.nested
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        GroupByQuery subqueryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null), new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularities.DAY).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        GroupByQuery queryA = GroupByQuery.builder().setDataSource(subqueryA).setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularities.WEEK).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        basicQueries.put("nested", queryA);
    }
    {
        // basic.filter
        final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        // Use multiple aggregators to see how the number of aggregators impact to the query performance
        List<AggregatorFactory> queryAggs = ImmutableList.of(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new LongSumAggregatorFactory("rows", "rows"), new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"), new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimUniform", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setDimFilter(new BoundDimFilter("dimUniform", "0", "100", true, true, null, null, null)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        basicQueries.put("filter", queryA);
    }
    {
        // basic.singleZipf
        final QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        // Use multiple aggregators to see how the number of aggregators impact to the query performance
        List<AggregatorFactory> queryAggs = ImmutableList.of(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"), new LongSumAggregatorFactory("rows", "rows"), new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"), new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimZipf", null)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        basicQueries.put("singleZipf", queryA);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
    // simple one column schema, for testing performance difference between querying on numeric values as Strings and
    // directly as longs
    Map<String, GroupByQuery> simpleQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo simpleSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simple");
    {
        // simple.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.STRING)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        simpleQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simple", simpleQueries);
    Map<String, GroupByQuery> simpleLongQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo simpleLongSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simpleLong");
    {
        // simpleLong.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleLongSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.LONG)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        simpleLongQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simpleLong", simpleLongQueries);
    Map<String, GroupByQuery> simpleFloatQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo simpleFloatSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("simpleFloat");
    {
        // simpleFloat.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(simpleFloatSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("rows", "rows"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("dimSequential", "dimSequential", ColumnType.FLOAT)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        simpleFloatQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("simpleFloat", simpleFloatQueries);
    // simple one column schema, for testing performance difference between querying on numeric values as Strings and
    // directly as longs
    Map<String, GroupByQuery> nullQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo nullSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("nulls");
    {
        // simple-null
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(nullSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new DoubleSumAggregatorFactory("doubleSum", "doubleZipf"));
        GroupByQuery queryA = GroupByQuery.builder().setDataSource("blah").setQuerySegmentSpec(intervalSpec).setDimensions(new DefaultDimensionSpec("stringZipf", "stringZipf", ColumnType.STRING)).setAggregatorSpecs(queryAggs).setGranularity(Granularity.fromString(queryGranularity)).setContext(ImmutableMap.of("vectorize", vectorize)).build();
        nullQueries.put("A", queryA);
    }
    SCHEMA_QUERY_MAP.put("nulls", nullQueries);
}

Also used : BoundDimFilter(org.apache.druid.query.filter.BoundDimFilter) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) LinkedHashMap(java.util.LinkedHashMap) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList)

Example 62 with GroupByQuery

use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class GroupByBenchmark method queryMultiQueryableIndexWithSerde.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndexWithSerde(Blackhole blackhole, QueryableIndexState state) {
    QueryToolChest<ResultRow, GroupByQuery> toolChest = factory.getToolchest();
    // noinspection unchecked
    QueryRunner<ResultRow> theRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(new SerializingQueryRunner<>(new DefaultObjectMapper(new SmileFactory()), ResultRow.class, toolChest.mergeResults(factory.mergeRunners(state.executorService, makeMultiRunners(state))))), (QueryToolChest) toolChest);
    Sequence<ResultRow> queryResult = theRunner.run(QueryPlus.wrap(query), ResponseContext.createEmpty());
    List<ResultRow> results = queryResult.toList();
    blackhole.consume(results);
}

Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 63 with GroupByQuery

use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class GroupByBenchmark method queryMultiQueryableIndexWithSpilling.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndexWithSpilling(Blackhole blackhole, QueryableIndexState state) {
    QueryToolChest<ResultRow, GroupByQuery> toolChest = factory.getToolchest();
    QueryRunner<ResultRow> theRunner = new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(state.executorService, makeMultiRunners(state))), (QueryToolChest) toolChest);
    final GroupByQuery spillingQuery = query.withOverriddenContext(ImmutableMap.of("bufferGrouperMaxSize", 4000));
    Sequence<ResultRow> queryResult = theRunner.run(QueryPlus.wrap(spillingQuery), ResponseContext.createEmpty());
    List<ResultRow> results = queryResult.toList();
    blackhole.consume(results);
}

Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 64 with GroupByQuery

use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class DistinctCountGroupByQueryTest method testGroupByWithDistinctCountAgg.

@Test
public void testGroupByWithDistinctCountAgg() throws Exception {
    IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.SECOND).withMetrics(new CountAggregatorFactory("cnt")).build()).setConcurrentEventAdd(true).setMaxRowCount(1000).build();
    String visitor_id = "visitor_id";
    String client_type = "client_type";
    long timestamp = DateTimes.of("2010-01-01").getMillis();
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "0", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp + 1, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "1", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp + 2, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "2", client_type, "android")));
    GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setGranularity(QueryRunnerTestHelper.ALL_GRAN).setDimensions(new DefaultDimensionSpec(client_type, client_type)).setInterval(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec(client_type, OrderByColumnSpec.Direction.DESCENDING)), 10)).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new DistinctCountAggregatorFactory("UV", visitor_id, null)).build();
    final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
    Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, factory.createRunner(incrementalIndexSegment), query);
    List<ResultRow> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", client_type, "iphone", "UV", 2L, "rows", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", client_type, "android", "UV", 1L, "rows", 1L));
    TestHelper.assertExpectedObjects(expectedResults, results, "distinct-count");
}

Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) Segment(org.apache.druid.segment.Segment) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 65 with GroupByQuery

use of org.apache.druid.query.groupby.GroupByQuery in project druid by druid-io.

the class SketchAggregationTest method testSketchDataIngestAndGpByQuery.

@Test
public void testSketchDataIngestAndGpByQuery() throws Exception {
    final GroupByQuery groupByQuery = readQueryFromClasspath("sketch_test_data_group_by_query.json", helper.getObjectMapper(), vectorize);
    final Sequence<ResultRow> seq = helper.createIndexAndRunQueryOnSegment(new File(SketchAggregationTest.class.getClassLoader().getResource("sketch_test_data.tsv").getFile()), readFileFromClasspathAsString("sketch_test_data_record_parser.json"), readFileFromClasspathAsString("sketch_test_data_aggregators.json"), 0, Granularities.NONE, 1000, groupByQuery);
    final String expectedSummary = "\n### HeapCompactSketch SUMMARY: \n" + "   Estimate                : 50.0\n" + "   Upper Bound, 95% conf   : 50.0\n" + "   Lower Bound, 95% conf   : 50.0\n" + "   Theta (double)          : 1.0\n" + "   Theta (long)            : 9223372036854775807\n" + "   Theta (long) hex        : 7fffffffffffffff\n" + "   EstMode?                : false\n" + "   Empty?                  : false\n" + "   Ordered?                : true\n" + "   Retained Entries        : 50\n" + "   Seed Hash               : 93cc | 37836\n" + "### END SKETCH SUMMARY\n";
    List<ResultRow> results = seq.toList();
    Assert.assertEquals(1, results.size());
    Assert.assertEquals(ResultRow.fromLegacyRow(new MapBasedRow(DateTimes.of("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("sids_sketch_count", 50.0).put("sids_sketch_count_with_err", new SketchEstimateWithErrorBounds(50.0, 50.0, 50.0, 2)).put("sketchEstimatePostAgg", 50.0).put("sketchEstimatePostAggWithErrorBounds", new SketchEstimateWithErrorBounds(50.0, 50.0, 50.0, 2)).put("sketchUnionPostAggEstimate", 50.0).put("sketchSummary", expectedSummary).put("sketchIntersectionPostAggEstimate", 50.0).put("sketchAnotBPostAggEstimate", 0.0).put("non_existing_col_validation", 0.0).build()), groupByQuery), results.get(0));
}

Also used : ResultRow(org.apache.druid.query.groupby.ResultRow) MapBasedRow(org.apache.druid.data.input.MapBasedRow) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) File(java.io.File) GroupByQueryRunnerTest(org.apache.druid.query.groupby.GroupByQueryRunnerTest) Test(org.junit.Test)

Aggregations

GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)95 Test (org.junit.Test)68 ResultRow (org.apache.druid.query.groupby.ResultRow)57 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)49 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)37 GroupByQueryRunnerTest (org.apache.druid.query.groupby.GroupByQueryRunnerTest)37 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)37 List (java.util.List)21 IncrementalIndexSegment (org.apache.druid.segment.IncrementalIndexSegment)21 LegacySegmentSpec (org.apache.druid.query.spec.LegacySegmentSpec)20 QueryableIndexSegment (org.apache.druid.segment.QueryableIndexSegment)20 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)17 ArrayList (java.util.ArrayList)16 ExpressionVirtualColumn (org.apache.druid.segment.virtual.ExpressionVirtualColumn)15 Collectors (java.util.stream.Collectors)13 QueryDataSource (org.apache.druid.query.QueryDataSource)13 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)13 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)12 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)12 MapBasedRow (org.apache.druid.data.input.MapBasedRow)11