Search in sources :

Example 1 with OrderByColumnSpec

use of io.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.

the class GroupByRules method applyLimit.

/**
   * Applies a sort to an aggregating druidRel, as a LimitSpec. Do not call this method unless
   * {@link #canApplyLimit(DruidRel)} returns true.
   *
   * @return new rel, or null if the sort cannot be applied
   */
private static DruidRel applyLimit(final DruidRel druidRel, final Sort sort) {
    Preconditions.checkState(canApplyLimit(druidRel), "Cannot applyLimit.");
    final Grouping grouping = druidRel.getQueryBuilder().getGrouping();
    final DefaultLimitSpec limitSpec = toLimitSpec(druidRel.getQueryBuilder().getRowOrder(), sort);
    if (limitSpec == null) {
        return null;
    }
    final List<OrderByColumnSpec> orderBys = limitSpec.getColumns();
    final List<DimensionSpec> newDimensions = Lists.newArrayList(grouping.getDimensions());
    // Reorder dimensions, maybe, to allow groupBy to consider pushing down sorting (see DefaultLimitSpec).
    if (!orderBys.isEmpty()) {
        final Map<String, Integer> dimensionOrderByOutputName = Maps.newHashMap();
        for (int i = 0; i < newDimensions.size(); i++) {
            dimensionOrderByOutputName.put(newDimensions.get(i).getOutputName(), i);
        }
        for (int i = 0; i < orderBys.size(); i++) {
            final OrderByColumnSpec orderBy = orderBys.get(i);
            final Integer dimensionOrder = dimensionOrderByOutputName.get(orderBy.getDimension());
            if (dimensionOrder != null && dimensionOrder != i && orderBy.getDirection() == OrderByColumnSpec.Direction.ASCENDING && orderBy.getDimensionComparator().equals(StringComparators.LEXICOGRAPHIC)) {
                final DimensionSpec tmp = newDimensions.get(i);
                newDimensions.set(i, newDimensions.get(dimensionOrder));
                newDimensions.set(dimensionOrder, tmp);
                dimensionOrderByOutputName.put(newDimensions.get(i).getOutputName(), i);
                dimensionOrderByOutputName.put(newDimensions.get(dimensionOrder).getOutputName(), dimensionOrder);
            }
        }
    }
    if (!orderBys.isEmpty() || limitSpec.getLimit() < Integer.MAX_VALUE) {
        return druidRel.withQueryBuilder(druidRel.getQueryBuilder().withAdjustedGrouping(Grouping.create(newDimensions, grouping.getAggregations()), druidRel.getQueryBuilder().getRowType(), druidRel.getQueryBuilder().getRowOrder()).withLimitSpec(limitSpec));
    } else {
        return druidRel;
    }
}
Also used : OrderByColumnSpec(io.druid.query.groupby.orderby.OrderByColumnSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultLimitSpec(io.druid.query.groupby.orderby.DefaultLimitSpec) Grouping(io.druid.sql.calcite.rel.Grouping)

Example 2 with OrderByColumnSpec

use of io.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.

the class DruidQueryBuilder method toTopNQuery.

/**
   * Return this query as a TopN query, or null if this query is not compatible with TopN.
   *
   * @param dataSource         data source to query
   * @param sourceRowSignature row signature of the dataSource
   * @param context            query context
   * @param maxTopNLimit       maxTopNLimit from a PlannerConfig
   * @param useApproximateTopN from a PlannerConfig
   *
   * @return query or null
   */
public TopNQuery toTopNQuery(final DataSource dataSource, final RowSignature sourceRowSignature, final Map<String, Object> context, final int maxTopNLimit, final boolean useApproximateTopN) {
    // Must have GROUP BY one column, ORDER BY zero or one column, limit less than maxTopNLimit, and no HAVING.
    final boolean topNOk = grouping != null && grouping.getDimensions().size() == 1 && limitSpec != null && (limitSpec.getColumns().size() <= 1 && limitSpec.getLimit() <= maxTopNLimit) && having == null;
    if (!topNOk) {
        return null;
    }
    final DimensionSpec dimensionSpec = Iterables.getOnlyElement(grouping.getDimensions());
    final OrderByColumnSpec limitColumn;
    if (limitSpec.getColumns().isEmpty()) {
        limitColumn = new OrderByColumnSpec(dimensionSpec.getOutputName(), OrderByColumnSpec.Direction.ASCENDING, StringComparators.LEXICOGRAPHIC);
    } else {
        limitColumn = Iterables.getOnlyElement(limitSpec.getColumns());
    }
    final TopNMetricSpec topNMetricSpec;
    if (limitColumn.getDimension().equals(dimensionSpec.getOutputName())) {
        // DimensionTopNMetricSpec is exact; always return it even if allowApproximate is false.
        final DimensionTopNMetricSpec baseMetricSpec = new DimensionTopNMetricSpec(null, limitColumn.getDimensionComparator());
        topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? baseMetricSpec : new InvertedTopNMetricSpec(baseMetricSpec);
    } else if (useApproximateTopN) {
        // ORDER BY metric
        final NumericTopNMetricSpec baseMetricSpec = new NumericTopNMetricSpec(limitColumn.getDimension());
        topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? new InvertedTopNMetricSpec(baseMetricSpec) : baseMetricSpec;
    } else {
        return null;
    }
    final Filtration filtration = Filtration.create(filter).optimize(sourceRowSignature);
    return new TopNQuery(dataSource, VirtualColumns.EMPTY, Iterables.getOnlyElement(grouping.getDimensions()), topNMetricSpec, limitSpec.getLimit(), filtration.getQuerySegmentSpec(), filtration.getDimFilter(), Granularities.ALL, grouping.getAggregatorFactories(), grouping.getPostAggregators(), context);
}
Also used : OrderByColumnSpec(io.druid.query.groupby.orderby.OrderByColumnSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) DimensionTopNMetricSpec(io.druid.query.topn.DimensionTopNMetricSpec) InvertedTopNMetricSpec(io.druid.query.topn.InvertedTopNMetricSpec) Filtration(io.druid.sql.calcite.filtration.Filtration) NumericTopNMetricSpec(io.druid.query.topn.NumericTopNMetricSpec) TopNQuery(io.druid.query.topn.TopNQuery) TopNMetricSpec(io.druid.query.topn.TopNMetricSpec) NumericTopNMetricSpec(io.druid.query.topn.NumericTopNMetricSpec) DimensionTopNMetricSpec(io.druid.query.topn.DimensionTopNMetricSpec) InvertedTopNMetricSpec(io.druid.query.topn.InvertedTopNMetricSpec)

Example 3 with OrderByColumnSpec

use of io.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByWithHavingOnHyperUnique.

@Test
public void testGroupByWithHavingOnHyperUnique() {
    GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.dataSource).setGranularity(QueryRunnerTestHelper.allGran).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension))).setInterval(QueryRunnerTestHelper.fullOnInterval).setLimitSpec(new DefaultLimitSpec(Lists.newArrayList(new OrderByColumnSpec(QueryRunnerTestHelper.uniqueMetric, OrderByColumnSpec.Direction.DESCENDING)), 3)).setHavingSpec(new GreaterThanHavingSpec(QueryRunnerTestHelper.uniqueMetric, 8)).setAggregatorSpecs(Lists.<AggregatorFactory>newArrayList(QueryRunnerTestHelper.qualityUniques)).setPostAggregatorSpecs(Lists.<PostAggregator>newArrayList(new HyperUniqueFinalizingPostAggregator(QueryRunnerTestHelper.hyperUniqueFinalizingPostAggMetric, QueryRunnerTestHelper.uniqueMetric))).build();
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "market", "spot", QueryRunnerTestHelper.uniqueMetric, QueryRunnerTestHelper.UNIQUES_9, QueryRunnerTestHelper.hyperUniqueFinalizingPostAggMetric, QueryRunnerTestHelper.UNIQUES_9));
    // havingSpec equalTo/greaterThan/lessThan do not work on complex aggregators, even if they could be finalized.
    // See also: https://github.com/druid-io/druid/issues/2507
    expectedException.expect(ParseException.class);
    expectedException.expectMessage("Unknown type[class io.druid.hll.HLLCV1]");
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "order-limit");
}
Also used : OrderByColumnSpec(io.druid.query.groupby.orderby.OrderByColumnSpec) DefaultLimitSpec(io.druid.query.groupby.orderby.DefaultLimitSpec) HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) ExpressionPostAggregator(io.druid.query.aggregation.post.ExpressionPostAggregator) ConstantPostAggregator(io.druid.query.aggregation.post.ConstantPostAggregator) PostAggregator(io.druid.query.aggregation.PostAggregator) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) GreaterThanHavingSpec(io.druid.query.groupby.having.GreaterThanHavingSpec) HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) Row(io.druid.data.input.Row) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Example 4 with OrderByColumnSpec

use of io.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByWithOrderOnHyperUnique.

@Test
public void testGroupByWithOrderOnHyperUnique() {
    GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.dataSource).setGranularity(QueryRunnerTestHelper.allGran).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec(QueryRunnerTestHelper.marketDimension, QueryRunnerTestHelper.marketDimension))).setInterval(QueryRunnerTestHelper.fullOnInterval).setLimitSpec(new DefaultLimitSpec(Lists.newArrayList(new OrderByColumnSpec(QueryRunnerTestHelper.uniqueMetric, OrderByColumnSpec.Direction.DESCENDING)), 3)).setAggregatorSpecs(Lists.<AggregatorFactory>newArrayList(QueryRunnerTestHelper.qualityUniques)).setPostAggregatorSpecs(Lists.<PostAggregator>newArrayList(new HyperUniqueFinalizingPostAggregator(QueryRunnerTestHelper.hyperUniqueFinalizingPostAggMetric, QueryRunnerTestHelper.uniqueMetric))).build();
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "market", "spot", QueryRunnerTestHelper.uniqueMetric, QueryRunnerTestHelper.UNIQUES_9, QueryRunnerTestHelper.hyperUniqueFinalizingPostAggMetric, QueryRunnerTestHelper.UNIQUES_9), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "market", "upfront", QueryRunnerTestHelper.uniqueMetric, QueryRunnerTestHelper.UNIQUES_2, QueryRunnerTestHelper.hyperUniqueFinalizingPostAggMetric, QueryRunnerTestHelper.UNIQUES_2), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", "market", "total_market", QueryRunnerTestHelper.uniqueMetric, QueryRunnerTestHelper.UNIQUES_2, QueryRunnerTestHelper.hyperUniqueFinalizingPostAggMetric, QueryRunnerTestHelper.UNIQUES_2));
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
    TestHelper.assertExpectedObjects(expectedResults, results, "order-limit");
}
Also used : OrderByColumnSpec(io.druid.query.groupby.orderby.OrderByColumnSpec) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) RegexFilteredDimensionSpec(io.druid.query.dimension.RegexFilteredDimensionSpec) ExtractionDimensionSpec(io.druid.query.dimension.ExtractionDimensionSpec) ListFilteredDimensionSpec(io.druid.query.dimension.ListFilteredDimensionSpec) DimensionSpec(io.druid.query.dimension.DimensionSpec) DefaultLimitSpec(io.druid.query.groupby.orderby.DefaultLimitSpec) HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) ExpressionPostAggregator(io.druid.query.aggregation.post.ExpressionPostAggregator) ConstantPostAggregator(io.druid.query.aggregation.post.ConstantPostAggregator) PostAggregator(io.druid.query.aggregation.PostAggregator) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) HyperUniqueFinalizingPostAggregator(io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) Row(io.druid.data.input.Row) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Example 5 with OrderByColumnSpec

use of io.druid.query.groupby.orderby.OrderByColumnSpec in project druid by druid-io.

the class GroupByQueryRunnerTest method testGroupByWithOrderLimitHavingSpec.

@Test
public void testGroupByWithOrderLimitHavingSpec() {
    GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setInterval("2011-01-25/2011-01-28").setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new DoubleSumAggregatorFactory("index", "index"))).setGranularity(Granularities.ALL).setHavingSpec(new GreaterThanHavingSpec("index", 310L)).setLimitSpec(new DefaultLimitSpec(Lists.newArrayList(new OrderByColumnSpec("index", OrderByColumnSpec.Direction.ASCENDING)), 5));
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-01-25", "alias", "business", "rows", 3L, "index", 312.38165283203125), GroupByQueryRunnerTestHelper.createExpectedRow("2011-01-25", "alias", "news", "rows", 3L, "index", 312.7834167480469), GroupByQueryRunnerTestHelper.createExpectedRow("2011-01-25", "alias", "technology", "rows", 3L, "index", 324.6412353515625), GroupByQueryRunnerTestHelper.createExpectedRow("2011-01-25", "alias", "travel", "rows", 3L, "index", 393.36322021484375), GroupByQueryRunnerTestHelper.createExpectedRow("2011-01-25", "alias", "health", "rows", 3L, "index", 511.2996826171875));
    GroupByQuery fullQuery = builder.build();
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, fullQuery);
    TestHelper.assertExpectedObjects(expectedResults, results, "");
}
Also used : OrderByColumnSpec(io.druid.query.groupby.orderby.OrderByColumnSpec) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) DefaultLimitSpec(io.druid.query.groupby.orderby.DefaultLimitSpec) GreaterThanHavingSpec(io.druid.query.groupby.having.GreaterThanHavingSpec) Row(io.druid.data.input.Row) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Aggregations

OrderByColumnSpec (io.druid.query.groupby.orderby.OrderByColumnSpec)27 Test (org.junit.Test)22 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)21 Row (io.druid.data.input.Row)19 DefaultLimitSpec (io.druid.query.groupby.orderby.DefaultLimitSpec)19 DimensionSpec (io.druid.query.dimension.DimensionSpec)15 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)13 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)10 ListFilteredDimensionSpec (io.druid.query.dimension.ListFilteredDimensionSpec)9 RegexFilteredDimensionSpec (io.druid.query.dimension.RegexFilteredDimensionSpec)9 PostAggregator (io.druid.query.aggregation.PostAggregator)7 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)6 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)6 FieldAccessPostAggregator (io.druid.query.aggregation.post.FieldAccessPostAggregator)6 JavaScriptAggregatorFactory (io.druid.query.aggregation.JavaScriptAggregatorFactory)5 HyperUniqueFinalizingPostAggregator (io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator)5 ArithmeticPostAggregator (io.druid.query.aggregation.post.ArithmeticPostAggregator)5 ConstantPostAggregator (io.druid.query.aggregation.post.ConstantPostAggregator)5 ExpressionPostAggregator (io.druid.query.aggregation.post.ExpressionPostAggregator)5 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)4