Search in sources :

Example 1 with DimensionTopNMetricSpec

use of org.apache.druid.query.topn.DimensionTopNMetricSpec in project druid by druid-io.

the class CalciteQueryTest method testQueryContextOuterLimit.

@Test
public void testQueryContextOuterLimit() throws Exception {
    Map<String, Object> outerLimitContext = new HashMap<>(QUERY_CONTEXT_DEFAULT);
    outerLimitContext.put(PlannerContext.CTX_SQL_OUTER_LIMIT, 4);
    TopNQueryBuilder baseBuilder = new TopNQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(querySegmentSpec(Filtration.eternity())).granularity(Granularities.ALL).dimension(new DefaultDimensionSpec("dim1", "d0")).metric(new InvertedTopNMetricSpec(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC))).context(outerLimitContext);
    List<Object[]> results1;
    if (NullHandling.replaceWithDefault()) {
        results1 = ImmutableList.of(new Object[] { "" }, new Object[] { "def" }, new Object[] { "abc" }, new Object[] { "2" });
    } else {
        results1 = ImmutableList.of(new Object[] { "def" }, new Object[] { "abc" }, new Object[] { "2" }, new Object[] { "10.1" });
    }
    // no existing limit
    testQuery(PLANNER_CONFIG_DEFAULT, outerLimitContext, "SELECT dim1 FROM druid.foo GROUP BY dim1 ORDER BY dim1 DESC", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(baseBuilder.threshold(4).build()), results1);
    // existing limit greater than context limit, override existing limit
    testQuery(PLANNER_CONFIG_DEFAULT, outerLimitContext, "SELECT dim1 FROM druid.foo GROUP BY dim1 ORDER BY dim1 DESC LIMIT 9", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(baseBuilder.threshold(4).build()), results1);
    List<Object[]> results2;
    if (NullHandling.replaceWithDefault()) {
        results2 = ImmutableList.of(new Object[] { "" }, new Object[] { "def" });
    } else {
        results2 = ImmutableList.of(new Object[] { "def" }, new Object[] { "abc" });
    }
    // existing limit less than context limit, keep existing limit
    testQuery(PLANNER_CONFIG_DEFAULT, outerLimitContext, "SELECT dim1 FROM druid.foo GROUP BY dim1 ORDER BY dim1 DESC LIMIT 2", CalciteTests.REGULAR_USER_AUTH_RESULT, ImmutableList.of(baseBuilder.threshold(2).build()), results2);
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) InvertedTopNMetricSpec(org.apache.druid.query.topn.InvertedTopNMetricSpec) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) HashMap(java.util.HashMap) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Example 2 with DimensionTopNMetricSpec

use of org.apache.druid.query.topn.DimensionTopNMetricSpec in project druid by druid-io.

the class DruidQuery method toTopNQuery.

/**
 * Return this query as a TopN query, or null if this query is not compatible with TopN.
 *
 * @return query or null
 */
@Nullable
private TopNQuery toTopNQuery(final QueryFeatureInspector queryFeatureInspector) {
    // Must be allowed by the QueryMaker.
    if (!queryFeatureInspector.feature(QueryFeature.CAN_RUN_TOPN)) {
        return null;
    }
    // Must have GROUP BY one column, no GROUPING SETS, ORDER BY ≤ 1 column, LIMIT > 0 and ≤ maxTopNLimit,
    // no OFFSET, no HAVING.
    final boolean topNOk = grouping != null && grouping.getDimensions().size() == 1 && !grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs()) && sorting != null && (sorting.getOrderBys().size() <= 1 && sorting.getOffsetLimit().hasLimit() && sorting.getOffsetLimit().getLimit() > 0 && sorting.getOffsetLimit().getLimit() <= plannerContext.getPlannerConfig().getMaxTopNLimit() && !sorting.getOffsetLimit().hasOffset()) && grouping.getHavingFilter() == null;
    if (!topNOk) {
        return null;
    }
    final DimensionSpec dimensionSpec = Iterables.getOnlyElement(grouping.getDimensions()).toDimensionSpec();
    // grouping col cannot be type array
    if (dimensionSpec.getOutputType().isArray()) {
        return null;
    }
    final OrderByColumnSpec limitColumn;
    if (sorting.getOrderBys().isEmpty()) {
        limitColumn = new OrderByColumnSpec(dimensionSpec.getOutputName(), OrderByColumnSpec.Direction.ASCENDING, Calcites.getStringComparatorForValueType(dimensionSpec.getOutputType()));
    } else {
        limitColumn = Iterables.getOnlyElement(sorting.getOrderBys());
    }
    final TopNMetricSpec topNMetricSpec;
    if (limitColumn.getDimension().equals(dimensionSpec.getOutputName())) {
        // DimensionTopNMetricSpec is exact; always return it even if allowApproximate is false.
        final DimensionTopNMetricSpec baseMetricSpec = new DimensionTopNMetricSpec(null, limitColumn.getDimensionComparator());
        topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? baseMetricSpec : new InvertedTopNMetricSpec(baseMetricSpec);
    } else if (plannerContext.getPlannerConfig().isUseApproximateTopN()) {
        // ORDER BY metric
        final NumericTopNMetricSpec baseMetricSpec = new NumericTopNMetricSpec(limitColumn.getDimension());
        topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? new InvertedTopNMetricSpec(baseMetricSpec) : baseMetricSpec;
    } else {
        return null;
    }
    final Pair<DataSource, Filtration> dataSourceFiltrationPair = getFiltration(dataSource, filter, virtualColumnRegistry);
    final DataSource newDataSource = dataSourceFiltrationPair.lhs;
    final Filtration filtration = dataSourceFiltrationPair.rhs;
    final List<PostAggregator> postAggregators = new ArrayList<>(grouping.getPostAggregators());
    if (sorting.getProjection() != null) {
        postAggregators.addAll(sorting.getProjection().getPostAggregators());
    }
    return new TopNQuery(newDataSource, getVirtualColumns(true), dimensionSpec, topNMetricSpec, Ints.checkedCast(sorting.getOffsetLimit().getLimit()), filtration.getQuerySegmentSpec(), filtration.getDimFilter(), Granularities.ALL, grouping.getAggregatorFactories(), postAggregators, ImmutableSortedMap.copyOf(plannerContext.getQueryContext()));
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) Filtration(org.apache.druid.sql.calcite.filtration.Filtration) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) ArrayList(java.util.ArrayList) InvertedTopNMetricSpec(org.apache.druid.query.topn.InvertedTopNMetricSpec) TopNMetricSpec(org.apache.druid.query.topn.TopNMetricSpec) NumericTopNMetricSpec(org.apache.druid.query.topn.NumericTopNMetricSpec) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) JoinDataSource(org.apache.druid.query.JoinDataSource) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) InvertedTopNMetricSpec(org.apache.druid.query.topn.InvertedTopNMetricSpec) NumericTopNMetricSpec(org.apache.druid.query.topn.NumericTopNMetricSpec) TopNQuery(org.apache.druid.query.topn.TopNQuery) Nullable(javax.annotation.Nullable)

Example 3 with DimensionTopNMetricSpec

use of org.apache.druid.query.topn.DimensionTopNMetricSpec in project druid by druid-io.

the class TopNTypeInterfaceBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
        queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
        queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
        // Use an IdentityExtractionFn to force usage of HeapBasedTopNAlgorithm
        TopNQueryBuilder queryBuilderString = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension(new ExtractionDimensionSpec("dimSequential", "dimSequential", IdentityExtractionFn.getInstance())).metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        // HeapBasedTopNAlgorithm is always used for numeric columns
        TopNQueryBuilder queryBuilderLong = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metLongUniform").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        TopNQueryBuilder queryBuilderFloat = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metFloatNormal").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("string", queryBuilderString);
        basicQueries.put("long", queryBuilderLong);
        basicQueries.put("float", queryBuilderFloat);
    }
    {
        // basic.numericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("numericSort", queryBuilderA);
    }
    {
        // basic.alphanumericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("alphanumericSort", queryBuilderA);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) LinkedHashMap(java.util.LinkedHashMap) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec)

Example 4 with DimensionTopNMetricSpec

use of org.apache.druid.query.topn.DimensionTopNMetricSpec in project druid by druid-io.

the class TopNBenchmark method setupQueries.

private void setupQueries() {
    // queries for the basic schema
    Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
    GeneratorSchemaInfo basicSchema = GeneratorBasicSchemas.SCHEMA_MAP.get("basic");
    {
        // basic.A
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
        queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
        queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
        queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimSequential").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("A", queryBuilderA);
    }
    {
        // basic.numericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("numericSort", queryBuilderA);
    }
    {
        // basic.alphanumericSort
        QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(basicSchema.getDataInterval()));
        List<AggregatorFactory> queryAggs = new ArrayList<>();
        queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
        TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
        basicQueries.put("alphanumericSort", queryBuilderA);
    }
    SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DoubleMinAggregatorFactory(org.apache.druid.query.aggregation.DoubleMinAggregatorFactory) LinkedHashMap(java.util.LinkedHashMap) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ArrayList(java.util.ArrayList) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory)

Example 5 with DimensionTopNMetricSpec

use of org.apache.druid.query.topn.DimensionTopNMetricSpec in project druid by druid-io.

the class CalciteQueryTest method testNullDoubleTopN.

@Test
public void testNullDoubleTopN() throws Exception {
    List<Object[]> expected;
    if (useDefault) {
        expected = ImmutableList.of(new Object[] { 1.7, 1L }, new Object[] { 1.0, 1L }, new Object[] { 0.0, 4L });
    } else {
        expected = ImmutableList.of(new Object[] { null, 3L }, new Object[] { 1.7, 1L }, new Object[] { 1.0, 1L }, new Object[] { 0.0, 1L });
    }
    testQuery("SELECT d1, COUNT(*) FROM druid.numfoo GROUP BY d1 ORDER BY d1 DESC LIMIT 10", QUERY_CONTEXT_DEFAULT, ImmutableList.of(new TopNQueryBuilder().dataSource(CalciteTests.DATASOURCE3).intervals(querySegmentSpec(Filtration.eternity())).dimension(new DefaultDimensionSpec("d1", "_d0", ColumnType.DOUBLE)).threshold(10).aggregators(aggregators(new CountAggregatorFactory("a0"))).metric(new InvertedTopNMetricSpec(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC))).context(QUERY_CONTEXT_DEFAULT).build()), expected);
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) InvertedTopNMetricSpec(org.apache.druid.query.topn.InvertedTopNMetricSpec) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Aggregations

DimensionTopNMetricSpec (org.apache.druid.query.topn.DimensionTopNMetricSpec)11 TopNQueryBuilder (org.apache.druid.query.topn.TopNQueryBuilder)9 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)8 Test (org.junit.Test)8 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)6 InvertedTopNMetricSpec (org.apache.druid.query.topn.InvertedTopNMetricSpec)6 ArrayList (java.util.ArrayList)3 QueryDataSource (org.apache.druid.query.QueryDataSource)3 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)3 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)3 HashMap (java.util.HashMap)2 LinkedHashMap (java.util.LinkedHashMap)2 List (java.util.List)2 TableDataSource (org.apache.druid.query.TableDataSource)2 DoubleMinAggregatorFactory (org.apache.druid.query.aggregation.DoubleMinAggregatorFactory)2 LongMaxAggregatorFactory (org.apache.druid.query.aggregation.LongMaxAggregatorFactory)2 HyperUniquesAggregatorFactory (org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)2 NotDimFilter (org.apache.druid.query.filter.NotDimFilter)2 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)2 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)2