Search in sources :

Example 1 with LimitSpec

use of org.apache.druid.query.groupby.orderby.LimitSpec in project druid by druid-io.

the class GroupByQuery method getRowOrderingForPushDown.

/**
 * When limit push down is applied, the partial results would be sorted by the ordering specified by the
 * limit/order spec (unlike non-push down case where the results always use the default natural ascending order),
 * so when merging these partial result streams, the merge needs to use the same ordering to get correct results.
 */
private Ordering<ResultRow> getRowOrderingForPushDown(final boolean granular, final DefaultLimitSpec limitSpec) {
    final boolean sortByDimsFirst = getContextSortByDimsFirst();
    final IntList orderedFieldNumbers = new IntArrayList();
    final Set<Integer> dimsInOrderBy = new HashSet<>();
    final List<Boolean> needsReverseList = new ArrayList<>();
    final List<ColumnType> dimensionTypes = new ArrayList<>();
    final List<StringComparator> comparators = new ArrayList<>();
    for (OrderByColumnSpec orderSpec : limitSpec.getColumns()) {
        boolean needsReverse = orderSpec.getDirection() != OrderByColumnSpec.Direction.ASCENDING;
        int dimIndex = OrderByColumnSpec.getDimIndexForOrderBy(orderSpec, dimensions);
        if (dimIndex >= 0) {
            DimensionSpec dim = dimensions.get(dimIndex);
            orderedFieldNumbers.add(resultRowSignature.indexOf(dim.getOutputName()));
            dimsInOrderBy.add(dimIndex);
            needsReverseList.add(needsReverse);
            final ColumnType type = dimensions.get(dimIndex).getOutputType();
            dimensionTypes.add(type);
            comparators.add(orderSpec.getDimensionComparator());
        }
    }
    for (int i = 0; i < dimensions.size(); i++) {
        if (!dimsInOrderBy.contains(i)) {
            orderedFieldNumbers.add(resultRowSignature.indexOf(dimensions.get(i).getOutputName()));
            needsReverseList.add(false);
            final ColumnType type = dimensions.get(i).getOutputType();
            dimensionTypes.add(type);
            comparators.add(StringComparators.LEXICOGRAPHIC);
        }
    }
    final Comparator<ResultRow> timeComparator = getTimeComparator(granular);
    if (timeComparator == null) {
        return Ordering.from((lhs, rhs) -> compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs));
    } else if (sortByDimsFirst) {
        return Ordering.from((lhs, rhs) -> {
            final int cmp = compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs);
            if (cmp != 0) {
                return cmp;
            }
            return timeComparator.compare(lhs, rhs);
        });
    } else {
        return Ordering.from((lhs, rhs) -> {
            final int timeCompare = timeComparator.compare(lhs, rhs);
            if (timeCompare != 0) {
                return timeCompare;
            }
            return compareDimsForLimitPushDown(orderedFieldNumbers, needsReverseList, dimensionTypes, comparators, lhs, rhs);
        });
    }
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Longs(com.google.common.primitives.Longs) Function(com.google.common.base.Function) ImmutableMap(com.google.common.collect.ImmutableMap) DataSource(org.apache.druid.query.DataSource) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) UUID(java.util.UUID) Collectors(java.util.stream.Collectors) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) Objects(java.util.Objects) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) DimFilter(org.apache.druid.query.filter.DimFilter) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) HavingSpec(org.apache.druid.query.groupby.having.HavingSpec) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) ComparableList(org.apache.druid.segment.data.ComparableList) BaseQuery(org.apache.druid.query.BaseQuery) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) StringComparators(org.apache.druid.query.ordering.StringComparators) ComparableStringArray(org.apache.druid.segment.data.ComparableStringArray) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) Functions(com.google.common.base.Functions) VirtualColumns(org.apache.druid.segment.VirtualColumns) StringComparator(org.apache.druid.query.ordering.StringComparator) VirtualColumn(org.apache.druid.segment.VirtualColumn) DateTime(org.joda.time.DateTime) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) TableDataSource(org.apache.druid.query.TableDataSource) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Queries(org.apache.druid.query.Queries) IntList(it.unimi.dsi.fastutil.ints.IntList) Ordering(com.google.common.collect.Ordering) LegacySegmentSpec(org.apache.druid.query.spec.LegacySegmentSpec) RowSignature(org.apache.druid.segment.column.RowSignature) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) Comparator(java.util.Comparator) Collections(java.util.Collections) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) ArrayList(java.util.ArrayList) StringComparator(org.apache.druid.query.ordering.StringComparator) IntList(it.unimi.dsi.fastutil.ints.IntList) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) HashSet(java.util.HashSet)

Example 2 with LimitSpec

use of org.apache.druid.query.groupby.orderby.LimitSpec in project druid by druid-io.

the class GroupByStrategyV2 method processSubtotalsSpec.

@Override
public Sequence<ResultRow> processSubtotalsSpec(GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> queryResult) {
    // How it works?
    // First we accumulate the result of top level base query aka queryResult arg inside a resultSupplierOne object.
    // Next for each subtotalSpec
    // If subtotalSpec is a prefix of top level dims then we iterate on rows in resultSupplierOne object which are still
    // sorted by subtotalSpec, stream merge them and return.
    // 
    // If subtotalSpec is not a prefix of top level dims then we create a resultSupplierTwo object filled with rows from
    // resultSupplierOne object with only dims from subtotalSpec. Then we iterate on rows in resultSupplierTwo object which are
    // of course sorted by subtotalSpec, stream merge them and return.
    // Keep a reference to resultSupplier outside the "try" so we can close it if something goes wrong
    // while creating the sequence.
    GroupByRowProcessor.ResultSupplier resultSupplierOne = null;
    try {
        // baseSubtotalQuery is the original query with dimensions and aggregators rewritten to apply to the *results*
        // rather than *inputs* of that query. It has its virtual columns and dim filter removed, because those only
        // make sense when applied to inputs. Finally, it has subtotalsSpec removed, since we'll be computing them
        // one-by-one soon enough.
        GroupByQuery baseSubtotalQuery = query.withDimensionSpecs(query.getDimensions().stream().map(dimSpec -> new DefaultDimensionSpec(dimSpec.getOutputName(), dimSpec.getOutputName(), dimSpec.getOutputType())).collect(Collectors.toList())).withAggregatorSpecs(query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList())).withVirtualColumns(VirtualColumns.EMPTY).withDimFilter(null).withSubtotalsSpec(null).withOverriddenContext(ImmutableMap.of(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD, ""));
        resultSupplierOne = GroupByRowProcessor.process(baseSubtotalQuery, baseSubtotalQuery, queryResult, configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
        List<String> queryDimNames = baseSubtotalQuery.getDimensions().stream().map(DimensionSpec::getOutputName).collect(Collectors.toList());
        // Only needed to make LimitSpec.filterColumns(..) call later in case base query has a non default LimitSpec.
        Set<String> aggsAndPostAggs = null;
        if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
            aggsAndPostAggs = getAggregatorAndPostAggregatorNames(baseSubtotalQuery);
        }
        List<List<String>> subtotals = query.getSubtotalsSpec();
        List<Sequence<ResultRow>> subtotalsResults = new ArrayList<>(subtotals.size());
        // Iterate through each subtotalSpec, build results for it and add to subtotalsResults
        for (List<String> subtotalSpec : subtotals) {
            final ImmutableSet<String> dimsInSubtotalSpec = ImmutableSet.copyOf(subtotalSpec);
            // Dimension spec including dimension name and output name
            final List<DimensionSpec> subTotalDimensionSpec = new ArrayList<>(dimsInSubtotalSpec.size());
            final List<DimensionSpec> dimensions = query.getDimensions();
            for (DimensionSpec dimensionSpec : dimensions) {
                if (dimsInSubtotalSpec.contains(dimensionSpec.getOutputName())) {
                    subTotalDimensionSpec.add(dimensionSpec);
                }
            }
            // Create appropriate LimitSpec for subtotal query
            LimitSpec subtotalQueryLimitSpec = NoopLimitSpec.instance();
            if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
                Set<String> columns = new HashSet<>(aggsAndPostAggs);
                columns.addAll(subtotalSpec);
                subtotalQueryLimitSpec = baseSubtotalQuery.getLimitSpec().filterColumns(columns);
            }
            GroupByQuery subtotalQuery = baseSubtotalQuery.withLimitSpec(subtotalQueryLimitSpec);
            final GroupByRowProcessor.ResultSupplier resultSupplierOneFinal = resultSupplierOne;
            if (Utils.isPrefix(subtotalSpec, queryDimNames)) {
                // Since subtotalSpec is a prefix of base query dimensions, so results from base query are also sorted
                // by subtotalSpec as needed by stream merging.
                subtotalsResults.add(processSubtotalsResultAndOptionallyClose(() -> resultSupplierOneFinal, subTotalDimensionSpec, subtotalQuery, false));
            } else {
                // Since subtotalSpec is not a prefix of base query dimensions, so results from base query are not sorted
                // by subtotalSpec. So we first add the result of base query into another resultSupplier which are sorted
                // by subtotalSpec and then stream merge them.
                // Also note, we can't create the ResultSupplier eagerly here or as we don't want to eagerly allocate
                // merge buffers for processing subtotal.
                Supplier<GroupByRowProcessor.ResultSupplier> resultSupplierTwo = () -> GroupByRowProcessor.process(baseSubtotalQuery, subtotalQuery, resultSupplierOneFinal.results(subTotalDimensionSpec), configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
                subtotalsResults.add(processSubtotalsResultAndOptionallyClose(resultSupplierTwo, subTotalDimensionSpec, subtotalQuery, true));
            }
        }
        return Sequences.withBaggage(query.postProcess(Sequences.concat(subtotalsResults)), // this will close resources allocated by resultSupplierOne after sequence read
        resultSupplierOne);
    } catch (Throwable e) {
        throw CloseableUtils.closeAndWrapInCatch(e, resultSupplierOne);
    }
}
Also used : QueryPlus(org.apache.druid.query.QueryPlus) GroupByQueryEngineV2(org.apache.druid.query.groupby.epinephelinae.GroupByQueryEngineV2) Inject(com.google.inject.Inject) Smile(org.apache.druid.guice.annotations.Smile) Merging(org.apache.druid.guice.annotations.Merging) QueryProcessingPool(org.apache.druid.query.QueryProcessingPool) ResultMergeQueryRunner(org.apache.druid.query.ResultMergeQueryRunner) StorageAdapter(org.apache.druid.segment.StorageAdapter) ByteBuffer(java.nio.ByteBuffer) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) GroupByBinaryFnV2(org.apache.druid.query.groupby.epinephelinae.GroupByBinaryFnV2) QueryWatcher(org.apache.druid.query.QueryWatcher) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) Sequence(org.apache.druid.java.util.common.guava.Sequence) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) GroupByMergingQueryRunnerV2(org.apache.druid.query.groupby.epinephelinae.GroupByMergingQueryRunnerV2) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) BinaryOperator(java.util.function.BinaryOperator) BlockingPool(org.apache.druid.collections.BlockingPool) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) NonBlockingPool(org.apache.druid.collections.NonBlockingPool) Intervals(org.apache.druid.java.util.common.Intervals) Supplier(com.google.common.base.Supplier) GroupByQueryResource(org.apache.druid.query.groupby.resource.GroupByQueryResource) Utils(org.apache.druid.java.util.common.collect.Utils) ArrayList(java.util.ArrayList) QueryCapacityExceededException(org.apache.druid.query.QueryCapacityExceededException) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Suppliers(com.google.common.base.Suppliers) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Sequences(org.apache.druid.java.util.common.guava.Sequences) VirtualColumns(org.apache.druid.segment.VirtualColumns) ResponseContext(org.apache.druid.query.context.ResponseContext) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) Global(org.apache.druid.guice.annotations.Global) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) CloseableUtils(org.apache.druid.utils.CloseableUtils) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) ArrayList(java.util.ArrayList) Sequence(org.apache.druid.java.util.common.guava.Sequence) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) HashSet(java.util.HashSet)

Example 3 with LimitSpec

use of org.apache.druid.query.groupby.orderby.LimitSpec in project druid by druid-io.

the class GroupByQueryRunnerTest method testMergeResultsWithOrderBy.

@Test
public void testMergeResultsWithOrderBy() {
    LimitSpec[] orderBySpecs = new LimitSpec[] { new DefaultLimitSpec(OrderByColumnSpec.ascending("idx"), null), new DefaultLimitSpec(OrderByColumnSpec.ascending("rows", "idx"), null), new DefaultLimitSpec(OrderByColumnSpec.descending("idx"), null), new DefaultLimitSpec(OrderByColumnSpec.descending("rows", "idx"), null) };
    GroupByQuery baseQuery = makeQueryBuilder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setInterval("2011-04-02/2011-04-04").setDimensions(new DefaultDimensionSpec("quality", "alias")).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new LongSumAggregatorFactory("idx", "index")).setGranularity(new PeriodGranularity(new Period("P1M"), null, null)).build();
    List<ResultRow> allResults = Arrays.asList(makeRow(baseQuery, "2011-04-01", "alias", "automotive", "rows", 2L, "idx", 269L), makeRow(baseQuery, "2011-04-01", "alias", "business", "rows", 2L, "idx", 217L), makeRow(baseQuery, "2011-04-01", "alias", "entertainment", "rows", 2L, "idx", 319L), makeRow(baseQuery, "2011-04-01", "alias", "health", "rows", 2L, "idx", 216L), makeRow(baseQuery, "2011-04-01", "alias", "mezzanine", "rows", 6L, "idx", 4420L), makeRow(baseQuery, "2011-04-01", "alias", "news", "rows", 2L, "idx", 221L), makeRow(baseQuery, "2011-04-01", "alias", "premium", "rows", 6L, "idx", 4416L), makeRow(baseQuery, "2011-04-01", "alias", "technology", "rows", 2L, "idx", 177L), makeRow(baseQuery, "2011-04-01", "alias", "travel", "rows", 2L, "idx", 243L));
    final int idxPosition = baseQuery.getResultRowSignature().indexOf("idx");
    final int rowsPosition = baseQuery.getResultRowSignature().indexOf("rows");
    Comparator<ResultRow> idxComparator = Comparator.comparing(row -> ((Number) row.get(idxPosition)).floatValue());
    Comparator<ResultRow> rowsComparator = Comparator.comparing(row -> ((Number) row.get(rowsPosition)).floatValue());
    Comparator<ResultRow> rowsIdxComparator = Ordering.from(rowsComparator).thenComparing(idxComparator);
    List<List<ResultRow>> expectedResults = Lists.newArrayList(Ordering.from(idxComparator).sortedCopy(allResults), Ordering.from(rowsIdxComparator).sortedCopy(allResults), Ordering.from(idxComparator).reverse().sortedCopy(allResults), Ordering.from(rowsIdxComparator).reverse().sortedCopy(allResults));
    for (int i = 0; i < orderBySpecs.length; ++i) {
        doTestMergeResultsWithOrderBy(baseQuery, orderBySpecs[i], expectedResults.get(i));
    }
}
Also used : DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Period(org.joda.time.Period) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ComparableList(org.apache.druid.segment.data.ComparableList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

ImmutableList (com.google.common.collect.ImmutableList)3 ArrayList (java.util.ArrayList)3 List (java.util.List)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 Comparator (java.util.Comparator)2 HashSet (java.util.HashSet)2 Map (java.util.Map)2 Set (java.util.Set)2 Collectors (java.util.stream.Collectors)2 Granularity (org.apache.druid.java.util.common.granularity.Granularity)2 Sequence (org.apache.druid.java.util.common.guava.Sequence)2 Sequences (org.apache.druid.java.util.common.guava.Sequences)2 DataSource (org.apache.druid.query.DataSource)2 Query (org.apache.druid.query.Query)2 QueryDataSource (org.apache.druid.query.QueryDataSource)2 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)2 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)2 LimitSpec (org.apache.druid.query.groupby.orderby.LimitSpec)2 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)1 JsonIgnore (com.fasterxml.jackson.annotation.JsonIgnore)1