Examples with DimensionSpec - org.apache.druid.query.dimension.DimensionSpec

Example 36 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class GroupByStrategyV2 method processSubtotalsSpec.

@Override
public Sequence<ResultRow> processSubtotalsSpec(GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> queryResult) {
    // How it works?
    // First we accumulate the result of top level base query aka queryResult arg inside a resultSupplierOne object.
    // Next for each subtotalSpec
    // If subtotalSpec is a prefix of top level dims then we iterate on rows in resultSupplierOne object which are still
    // sorted by subtotalSpec, stream merge them and return.
    // 
    // If subtotalSpec is not a prefix of top level dims then we create a resultSupplierTwo object filled with rows from
    // resultSupplierOne object with only dims from subtotalSpec. Then we iterate on rows in resultSupplierTwo object which are
    // of course sorted by subtotalSpec, stream merge them and return.
    // Keep a reference to resultSupplier outside the "try" so we can close it if something goes wrong
    // while creating the sequence.
    GroupByRowProcessor.ResultSupplier resultSupplierOne = null;
    try {
        // baseSubtotalQuery is the original query with dimensions and aggregators rewritten to apply to the *results*
        // rather than *inputs* of that query. It has its virtual columns and dim filter removed, because those only
        // make sense when applied to inputs. Finally, it has subtotalsSpec removed, since we'll be computing them
        // one-by-one soon enough.
        GroupByQuery baseSubtotalQuery = query.withDimensionSpecs(query.getDimensions().stream().map(dimSpec -> new DefaultDimensionSpec(dimSpec.getOutputName(), dimSpec.getOutputName(), dimSpec.getOutputType())).collect(Collectors.toList())).withAggregatorSpecs(query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList())).withVirtualColumns(VirtualColumns.EMPTY).withDimFilter(null).withSubtotalsSpec(null).withOverriddenContext(ImmutableMap.of(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD, ""));
        resultSupplierOne = GroupByRowProcessor.process(baseSubtotalQuery, baseSubtotalQuery, queryResult, configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
        List<String> queryDimNames = baseSubtotalQuery.getDimensions().stream().map(DimensionSpec::getOutputName).collect(Collectors.toList());
        // Only needed to make LimitSpec.filterColumns(..) call later in case base query has a non default LimitSpec.
        Set<String> aggsAndPostAggs = null;
        if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
            aggsAndPostAggs = getAggregatorAndPostAggregatorNames(baseSubtotalQuery);
        }
        List<List<String>> subtotals = query.getSubtotalsSpec();
        List<Sequence<ResultRow>> subtotalsResults = new ArrayList<>(subtotals.size());
        // Iterate through each subtotalSpec, build results for it and add to subtotalsResults
        for (List<String> subtotalSpec : subtotals) {
            final ImmutableSet<String> dimsInSubtotalSpec = ImmutableSet.copyOf(subtotalSpec);
            // Dimension spec including dimension name and output name
            final List<DimensionSpec> subTotalDimensionSpec = new ArrayList<>(dimsInSubtotalSpec.size());
            final List<DimensionSpec> dimensions = query.getDimensions();
            for (DimensionSpec dimensionSpec : dimensions) {
                if (dimsInSubtotalSpec.contains(dimensionSpec.getOutputName())) {
                    subTotalDimensionSpec.add(dimensionSpec);
                }
            }
            // Create appropriate LimitSpec for subtotal query
            LimitSpec subtotalQueryLimitSpec = NoopLimitSpec.instance();
            if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
                Set<String> columns = new HashSet<>(aggsAndPostAggs);
                columns.addAll(subtotalSpec);
                subtotalQueryLimitSpec = baseSubtotalQuery.getLimitSpec().filterColumns(columns);
            }
            GroupByQuery subtotalQuery = baseSubtotalQuery.withLimitSpec(subtotalQueryLimitSpec);
            final GroupByRowProcessor.ResultSupplier resultSupplierOneFinal = resultSupplierOne;
            if (Utils.isPrefix(subtotalSpec, queryDimNames)) {
                // Since subtotalSpec is a prefix of base query dimensions, so results from base query are also sorted
                // by subtotalSpec as needed by stream merging.
                subtotalsResults.add(processSubtotalsResultAndOptionallyClose(() -> resultSupplierOneFinal, subTotalDimensionSpec, subtotalQuery, false));
            } else {
                // Since subtotalSpec is not a prefix of base query dimensions, so results from base query are not sorted
                // by subtotalSpec. So we first add the result of base query into another resultSupplier which are sorted
                // by subtotalSpec and then stream merge them.
                // Also note, we can't create the ResultSupplier eagerly here or as we don't want to eagerly allocate
                // merge buffers for processing subtotal.
                Supplier<GroupByRowProcessor.ResultSupplier> resultSupplierTwo = () -> GroupByRowProcessor.process(baseSubtotalQuery, subtotalQuery, resultSupplierOneFinal.results(subTotalDimensionSpec), configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
                subtotalsResults.add(processSubtotalsResultAndOptionallyClose(resultSupplierTwo, subTotalDimensionSpec, subtotalQuery, true));
            }
        }
        return Sequences.withBaggage(query.postProcess(Sequences.concat(subtotalsResults)), // this will close resources allocated by resultSupplierOne after sequence read
        resultSupplierOne);
    } catch (Throwable e) {
        throw CloseableUtils.closeAndWrapInCatch(e, resultSupplierOne);
    }
}

Also used : QueryPlus(org.apache.druid.query.QueryPlus) GroupByQueryEngineV2(org.apache.druid.query.groupby.epinephelinae.GroupByQueryEngineV2) Inject(com.google.inject.Inject) Smile(org.apache.druid.guice.annotations.Smile) Merging(org.apache.druid.guice.annotations.Merging) QueryProcessingPool(org.apache.druid.query.QueryProcessingPool) ResultMergeQueryRunner(org.apache.druid.query.ResultMergeQueryRunner) StorageAdapter(org.apache.druid.segment.StorageAdapter) ByteBuffer(java.nio.ByteBuffer) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) GroupByBinaryFnV2(org.apache.druid.query.groupby.epinephelinae.GroupByBinaryFnV2) QueryWatcher(org.apache.druid.query.QueryWatcher) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) Sequence(org.apache.druid.java.util.common.guava.Sequence) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) GroupByMergingQueryRunnerV2(org.apache.druid.query.groupby.epinephelinae.GroupByMergingQueryRunnerV2) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) BinaryOperator(java.util.function.BinaryOperator) BlockingPool(org.apache.druid.collections.BlockingPool) QueryDataSource(org.apache.druid.query.QueryDataSource) List(java.util.List) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) NonBlockingPool(org.apache.druid.collections.NonBlockingPool) Intervals(org.apache.druid.java.util.common.Intervals) Supplier(com.google.common.base.Supplier) GroupByQueryResource(org.apache.druid.query.groupby.resource.GroupByQueryResource) Utils(org.apache.druid.java.util.common.collect.Utils) ArrayList(java.util.ArrayList) QueryCapacityExceededException(org.apache.druid.query.QueryCapacityExceededException) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Suppliers(com.google.common.base.Suppliers) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) Sequences(org.apache.druid.java.util.common.guava.Sequences) VirtualColumns(org.apache.druid.segment.VirtualColumns) ResponseContext(org.apache.druid.query.context.ResponseContext) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) Global(org.apache.druid.guice.annotations.Global) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) CloseableUtils(org.apache.druid.utils.CloseableUtils) ReferenceCountingResourceHolder(org.apache.druid.collections.ReferenceCountingResourceHolder) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) GroupByRowProcessor(org.apache.druid.query.groupby.epinephelinae.GroupByRowProcessor) ArrayList(java.util.ArrayList) Sequence(org.apache.druid.java.util.common.guava.Sequence) LazySequence(org.apache.druid.java.util.common.guava.LazySequence) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) DefaultLimitSpec(org.apache.druid.query.groupby.orderby.DefaultLimitSpec) NoopLimitSpec(org.apache.druid.query.groupby.orderby.NoopLimitSpec) LimitSpec(org.apache.druid.query.groupby.orderby.LimitSpec) HashSet(java.util.HashSet)

Example 37 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class AutoStrategy method getExecutionPlan.

@Override
public List<SearchQueryExecutor> getExecutionPlan(SearchQuery query, Segment segment) {
    final QueryableIndex index = segment.asQueryableIndex();
    if (index != null) {
        final BitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector(index.getBitmapFactoryForDimensions(), VirtualColumns.EMPTY, index);
        // from the non-bitmap-support filters, and then use it to compute the filtered result by intersecting bitmaps.
        if (filter == null || filter.supportsSelectivityEstimation(index, selector)) {
            final List<DimensionSpec> dimsToSearch = getDimsToSearch(index.getAvailableDimensions(), query.getDimensions());
            // Choose a search query execution strategy depending on the query.
            // The costs of index-only plan and cursor-based plan can be computed like below.
            // 
            // c_index = (total cardinality of all search dimensions) * (bitmap intersection cost)
            // * (search predicate processing cost)
            // c_cursor = (# of rows in a segment) * (filter selectivity) * (# of dimensions)
            // * (search predicate processing cost)
            final SearchQueryDecisionHelper helper = getDecisionHelper(index);
            final double useIndexStrategyCost = helper.getBitmapIntersectCost() * computeTotalCard(index, dimsToSearch);
            final double cursorOnlyStrategyCost = (filter == null ? 1. : filter.estimateSelectivity(selector)) * selector.getNumRows() * dimsToSearch.size();
            log.debug("Use-index strategy cost: %f, cursor-only strategy cost: %f", useIndexStrategyCost, cursorOnlyStrategyCost);
            if (useIndexStrategyCost < cursorOnlyStrategyCost) {
                log.debug("Use-index execution strategy is selected, query id [%s]", query.getId());
                return UseIndexesStrategy.of(query).getExecutionPlan(query, segment);
            } else {
                log.debug("Cursor-only execution strategy is selected, query id [%s]", query.getId());
                return CursorOnlyStrategy.of(query).getExecutionPlan(query, segment);
            }
        } else {
            log.debug("Filter doesn't support bitmap index. Fall back to cursor-only execution strategy, query id [%s]", query.getId());
            return CursorOnlyStrategy.of(query).getExecutionPlan(query, segment);
        }
    } else {
        log.debug("Index doesn't exist. Fall back to cursor-only execution strategy, query id [%s]", query.getId());
        return CursorOnlyStrategy.of(query).getExecutionPlan(query, segment);
    }
}

Also used : ColumnSelectorBitmapIndexSelector(org.apache.druid.segment.ColumnSelectorBitmapIndexSelector) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) QueryableIndex(org.apache.druid.segment.QueryableIndex) ColumnSelectorBitmapIndexSelector(org.apache.druid.segment.ColumnSelectorBitmapIndexSelector) BitmapIndexSelector(org.apache.druid.query.filter.BitmapIndexSelector)

Example 38 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class CursorOnlyStrategy method getExecutionPlan.

@Override
public List<SearchQueryExecutor> getExecutionPlan(SearchQuery query, Segment segment) {
    final StorageAdapter adapter = segment.asStorageAdapter();
    final List<DimensionSpec> dimensionSpecs = getDimsToSearch(adapter.getAvailableDimensions(), query.getDimensions());
    return ImmutableList.of(new CursorBasedExecutor(query, segment, filter, interval, dimensionSpecs));
}

Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) StorageAdapter(org.apache.druid.segment.StorageAdapter)

Example 39 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class UseIndexesStrategy method partitionDimensionList.

/**
 * Split the given dimensions list into bitmap-supporting dimensions and non-bitmap supporting ones.
 * Note that the returned lists are free to modify.
 */
private static Pair<List<DimensionSpec>, List<DimensionSpec>> partitionDimensionList(StorageAdapter adapter, List<DimensionSpec> dimensions) {
    final List<DimensionSpec> bitmapDims = new ArrayList<>();
    final List<DimensionSpec> nonBitmapDims = new ArrayList<>();
    final List<DimensionSpec> dimsToSearch = getDimsToSearch(adapter.getAvailableDimensions(), dimensions);
    for (DimensionSpec spec : dimsToSearch) {
        ColumnCapabilities capabilities = adapter.getColumnCapabilities(spec.getDimension());
        if (capabilities == null) {
            continue;
        }
        if (capabilities.hasBitmapIndexes()) {
            bitmapDims.add(spec);
        } else {
            nonBitmapDims.add(spec);
        }
    }
    return new Pair<>(bitmapDims, nonBitmapDims);
}

Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ArrayList(java.util.ArrayList) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Pair(org.apache.druid.java.util.common.Pair)

Example 40 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class BaseVarianceSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
    final RexNode inputOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
    final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, inputOperand);
    if (input == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final RelDataType dataType = inputOperand.getType();
    final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
    final DimensionSpec dimensionSpec;
    final String aggName = StringUtils.format("%s:agg", name);
    final SqlAggFunction func = calciteFunction();
    final String estimator;
    final String inputTypeName;
    PostAggregator postAggregator = null;
    if (input.isSimpleExtraction()) {
        dimensionSpec = input.getSimpleExtraction().toDimensionSpec(null, inputType);
    } else {
        String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, dataType);
        dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
    }
    if (inputType == null) {
        throw new IAE("VarianceSqlAggregator[%s] has invalid inputType", func);
    }
    if (inputType.isNumeric()) {
        inputTypeName = StringUtils.toLowerCase(inputType.getType().name());
    } else {
        throw new IAE("VarianceSqlAggregator[%s] has invalid inputType[%s]", func, inputType.asTypeString());
    }
    if (func == SqlStdOperatorTable.VAR_POP || func == SqlStdOperatorTable.STDDEV_POP) {
        estimator = "population";
    } else {
        estimator = "sample";
    }
    aggregatorFactory = new VarianceAggregatorFactory(aggName, dimensionSpec.getDimension(), estimator, inputTypeName);
    if (func == SqlStdOperatorTable.STDDEV_POP || func == SqlStdOperatorTable.STDDEV_SAMP || func == SqlStdOperatorTable.STDDEV) {
        postAggregator = new StandardDeviationPostAggregator(name, aggregatorFactory.getName(), estimator);
    }
    return Aggregation.create(ImmutableList.of(aggregatorFactory), postAggregator);
}

Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) StandardDeviationPostAggregator(org.apache.druid.query.aggregation.variance.StandardDeviationPostAggregator) StandardDeviationPostAggregator(org.apache.druid.query.aggregation.variance.StandardDeviationPostAggregator) RelDataType(org.apache.calcite.rel.type.RelDataType) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction) VarianceAggregatorFactory(org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) IAE(org.apache.druid.java.util.common.IAE) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) VarianceAggregatorFactory(org.apache.druid.query.aggregation.variance.VarianceAggregatorFactory) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Aggregations

DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)53 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)27 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)20 ArrayList (java.util.ArrayList)19 HashMap (java.util.HashMap)16 Nullable (javax.annotation.Nullable)15 Test (org.junit.Test)15 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 MapBasedRow (org.apache.druid.data.input.MapBasedRow)12 Row (org.apache.druid.data.input.Row)12 ISE (org.apache.druid.java.util.common.ISE)12 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)11 Map (java.util.Map)10 ColumnType (org.apache.druid.segment.column.ColumnType)10 List (java.util.List)9 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)9 LongMeanAveragerFactory (org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory)9 HashSet (java.util.HashSet)8 Function (com.google.common.base.Function)7 ImmutableList (com.google.common.collect.ImmutableList)7