Search in sources :

Example 1 with NoopLimitSpec

use of org.apache.druid.query.groupby.orderby.NoopLimitSpec in project druid by druid-io.

the class GroupByStrategyV2 method processSubtotalsSpec.

@Override
public Sequence<ResultRow> processSubtotalsSpec(GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> queryResult) {
    // How it works?
    // First we accumulate the result of top level base query aka queryResult arg inside a resultSupplierOne object.
    // Next for each subtotalSpec
    // If subtotalSpec is a prefix of top level dims then we iterate on rows in resultSupplierOne object which are still
    // sorted by subtotalSpec, stream merge them and return.
    // 
    // If subtotalSpec is not a prefix of top level dims then we create a resultSupplierTwo object filled with rows from
    // resultSupplierOne object with only dims from subtotalSpec. Then we iterate on rows in resultSupplierTwo object which are
    // of course sorted by subtotalSpec, stream merge them and return.
    // Keep a reference to resultSupplier outside the "try" so we can close it if something goes wrong
    // while creating the sequence.
    GroupByRowProcessor.ResultSupplier resultSupplierOne = null;
    try {
        // baseSubtotalQuery is the original query with dimensions and aggregators rewritten to apply to the *results*
        // rather than *inputs* of that query. It has its virtual columns and dim filter removed, because those only
        // make sense when applied to inputs. Finally, it has subtotalsSpec removed, since we'll be computing them
        // one-by-one soon enough.
        GroupByQuery baseSubtotalQuery = query.withDimensionSpecs(query.getDimensions().stream().map(dimSpec -> new DefaultDimensionSpec(dimSpec.getOutputName(), dimSpec.getOutputName(), dimSpec.getOutputType())).collect(Collectors.toList())).withAggregatorSpecs(query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList())).withVirtualColumns(VirtualColumns.EMPTY).withDimFilter(null).withSubtotalsSpec(null).withOverriddenContext(ImmutableMap.of(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD, ""));
        resultSupplierOne = GroupByRowProcessor.process(baseSubtotalQuery, baseSubtotalQuery, queryResult, configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
        List<String> queryDimNames = baseSubtotalQuery.getDimensions().stream().map(DimensionSpec::getOutputName).collect(Collectors.toList());
        // Only needed to make LimitSpec.filterColumns(..) call later in case base query has a non default LimitSpec.
        Set<String> aggsAndPostAggs = null;
        if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
            aggsAndPostAggs = getAggregatorAndPostAggregatorNames(baseSubtotalQuery);
        }
        List<List<String>> subtotals = query.getSubtotalsSpec();
        List<Sequence<ResultRow>> subtotalsResults = new ArrayList<>(subtotals.size());
        // Iterate through each subtotalSpec, build results for it and add to subtotalsResults
        for (List<String> subtotalSpec : subtotals) {
            final ImmutableSet<String> dimsInSubtotalSpec = ImmutableSet.copyOf(subtotalSpec);
            // Dimension spec including dimension name and output name
            final List<DimensionSpec> subTotalDimensionSpec = new ArrayList<>(dimsInSubtotalSpec.size());
            final List<DimensionSpec> dimensions = query.getDimensions();
            for (DimensionSpec dimensionSpec : dimensions) {
                if (dimsInSubtotalSpec.contains(dimensionSpec.getOutputName())) {
                    subTotalDimensionSpec.add(dimensionSpec);
                }
            }
            // Create appropriate LimitSpec for subtotal query
            LimitSpec subtotalQueryLimitSpec = NoopLimitSpec.instance();
            if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
                Set<String> columns = new HashSet<>(aggsAndPostAggs);
                columns.addAll(subtotalSpec);
                subtotalQueryLimitSpec = baseSubtotalQuery.getLimitSpec().filterColumns(columns);
            }
            GroupByQuery subtotalQuery = baseSubtotalQuery.withLimitSpec(subtotalQueryLimitSpec);
            final GroupByRowProcessor.ResultSupplier resultSupplierOneFinal = resultSupplierOne;
            if (Utils.isPrefix(subtotalSpec, queryDimNames)) {
                // Since subtotalSpec is a prefix of base query dimensions, so results from base query are also sorted
                // by subtotalSpec as needed by stream merging.
                subtotalsResults.add(processSubtotalsResultAndOptionallyClose(() -> resultSupplierOneFinal, subTotalDimensionSpec, subtotalQuery, false));
            } else {
                // Since subtotalSpec is not a prefix of base query dimensions, so results from base query are not sorted
                // by subtotalSpec. So we first add the result of base query into another resultSupplier which are sorted
                // by subtotalSpec and then stream merge them.
                // Also note, we can't create the ResultSupplier eagerly here or as we don't want to eagerly allocate
                // merge buffers for processing subtotal.
                Supplier<GroupByRowProcessor.ResultSupplier> resultSupplierTwo = () -> GroupByRowProcessor.process(baseSubtotalQuery, subtotalQuery, resultSupplierOneFinal.results(subTotalDimensionSpec), configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
                subtotalsResults.add(processSubtotalsResultAndOptionallyClose(resultSupplierTwo, subTotalDimensionSpec, subtotalQuery, true));
            }
        }
        return Sequences.withBaggage(query.postProcess(Sequences.concat(subtotalsResults)), // this will close resources allocated by resultSupplierOne after sequence read
        resultSupplierOne);
    } catch (Throwable e) {
        throw CloseableUtils.closeAndWrapInCatch(e, resultSupplierOne);
    }
}
Also used : QueryPlus(org.apache.druid.query.QueryPlus) GroupByQueryEngineV2(org.apache.druid.query.groupby.epinephelinae.GroupByQueryEngineV2) Inject(com.google.inject.Inject) Smile(org.apache.druid.guice.annotations.Smile) Merging(org.apache.druid.guice.annotations.Merging) QueryProcessingPool(org.apache.druid.query.QueryProcessingPool) ResultMergeQueryRunner(org.apache.druid.query.ResultMergeQueryRunner) StorageAdapter(org.apache.druid.segment.StorageAdapter) ByteBuffer(java.nio.ByteBuffer) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) GroupByBinaryFnV2(org.apache.druid.query.groupby.epinephelinae.GroupByBinaryFnV2) QueryWatcher(org.apache.druid.query.QueryWatcher) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) Sequence(org.apache.druid.java.util.common.guava.Sequence) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) GroupByMergingQueryRunnerV2(org.apache.druid.query.groupby.epinephelinae.GroupByMergingQueryRunnerV2) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) BinaryOperator(java.util.function.BinaryOperator) BlockingPool(org.apache.druid.collections.BlockingPool) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) NonBlockingPool(org.apache.druid.collections.NonBlockingPool) Intervals(org.apache.druid.java.util.common.Intervals) Supplier(com.google.common.base.Supplier) GroupByQueryResource(org.apache.druid.query.groupby.resource.GroupByQueryResource) Utils(org.apache.druid.java.util.common.collect.Utils) ArrayList(java.util.ArrayList) QueryCapacityExceededException(org.apache.druid.query.QueryCapacityExceededException) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Suppliers(com.google.common.base.Suppliers) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Sequences(org.apache.druid.java.util.common.guava.Sequences) VirtualColumns(org.apache.druid.segment.VirtualColumns) ResponseContext(org.apache.druid.query.context.ResponseContext) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) Global(org.apache.druid.guice.annotations.Global) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) CloseableUtils(org.apache.druid.utils.CloseableUtils) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) ArrayList(java.util.ArrayList) Sequence(org.apache.druid.java.util.common.guava.Sequence) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) HashSet(java.util.HashSet)

Aggregations

ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Supplier (com.google.common.base.Supplier)1 Suppliers (com.google.common.base.Suppliers)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Inject (com.google.inject.Inject)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 Comparator (java.util.Comparator)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 BinaryOperator (java.util.function.BinaryOperator)1 Collectors (java.util.stream.Collectors)1 BlockingPool (org.apache.druid.collections.BlockingPool)1 NonBlockingPool (org.apache.druid.collections.NonBlockingPool)1 ReferenceCountingResourceHolder (org.apache.druid.collections.ReferenceCountingResourceHolder)1