use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class GroupByStrategyV2 method processSubtotalsSpec.
@Override
public Sequence<ResultRow> processSubtotalsSpec(GroupByQuery query, GroupByQueryResource resource, Sequence<ResultRow> queryResult) {
// How it works?
// First we accumulate the result of top level base query aka queryResult arg inside a resultSupplierOne object.
// Next for each subtotalSpec
// If subtotalSpec is a prefix of top level dims then we iterate on rows in resultSupplierOne object which are still
// sorted by subtotalSpec, stream merge them and return.
//
// If subtotalSpec is not a prefix of top level dims then we create a resultSupplierTwo object filled with rows from
// resultSupplierOne object with only dims from subtotalSpec. Then we iterate on rows in resultSupplierTwo object which are
// of course sorted by subtotalSpec, stream merge them and return.
// Keep a reference to resultSupplier outside the "try" so we can close it if something goes wrong
// while creating the sequence.
GroupByRowProcessor.ResultSupplier resultSupplierOne = null;
try {
// baseSubtotalQuery is the original query with dimensions and aggregators rewritten to apply to the *results*
// rather than *inputs* of that query. It has its virtual columns and dim filter removed, because those only
// make sense when applied to inputs. Finally, it has subtotalsSpec removed, since we'll be computing them
// one-by-one soon enough.
GroupByQuery baseSubtotalQuery = query.withDimensionSpecs(query.getDimensions().stream().map(dimSpec -> new DefaultDimensionSpec(dimSpec.getOutputName(), dimSpec.getOutputName(), dimSpec.getOutputType())).collect(Collectors.toList())).withAggregatorSpecs(query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList())).withVirtualColumns(VirtualColumns.EMPTY).withDimFilter(null).withSubtotalsSpec(null).withOverriddenContext(ImmutableMap.of(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD, ""));
resultSupplierOne = GroupByRowProcessor.process(baseSubtotalQuery, baseSubtotalQuery, queryResult, configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
List<String> queryDimNames = baseSubtotalQuery.getDimensions().stream().map(DimensionSpec::getOutputName).collect(Collectors.toList());
// Only needed to make LimitSpec.filterColumns(..) call later in case base query has a non default LimitSpec.
Set<String> aggsAndPostAggs = null;
if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
aggsAndPostAggs = getAggregatorAndPostAggregatorNames(baseSubtotalQuery);
}
List<List<String>> subtotals = query.getSubtotalsSpec();
List<Sequence<ResultRow>> subtotalsResults = new ArrayList<>(subtotals.size());
// Iterate through each subtotalSpec, build results for it and add to subtotalsResults
for (List<String> subtotalSpec : subtotals) {
final ImmutableSet<String> dimsInSubtotalSpec = ImmutableSet.copyOf(subtotalSpec);
// Dimension spec including dimension name and output name
final List<DimensionSpec> subTotalDimensionSpec = new ArrayList<>(dimsInSubtotalSpec.size());
final List<DimensionSpec> dimensions = query.getDimensions();
for (DimensionSpec dimensionSpec : dimensions) {
if (dimsInSubtotalSpec.contains(dimensionSpec.getOutputName())) {
subTotalDimensionSpec.add(dimensionSpec);
}
}
// Create appropriate LimitSpec for subtotal query
LimitSpec subtotalQueryLimitSpec = NoopLimitSpec.instance();
if (!(baseSubtotalQuery.getLimitSpec() instanceof NoopLimitSpec)) {
Set<String> columns = new HashSet<>(aggsAndPostAggs);
columns.addAll(subtotalSpec);
subtotalQueryLimitSpec = baseSubtotalQuery.getLimitSpec().filterColumns(columns);
}
GroupByQuery subtotalQuery = baseSubtotalQuery.withLimitSpec(subtotalQueryLimitSpec);
final GroupByRowProcessor.ResultSupplier resultSupplierOneFinal = resultSupplierOne;
if (Utils.isPrefix(subtotalSpec, queryDimNames)) {
// Since subtotalSpec is a prefix of base query dimensions, so results from base query are also sorted
// by subtotalSpec as needed by stream merging.
subtotalsResults.add(processSubtotalsResultAndOptionallyClose(() -> resultSupplierOneFinal, subTotalDimensionSpec, subtotalQuery, false));
} else {
// Since subtotalSpec is not a prefix of base query dimensions, so results from base query are not sorted
// by subtotalSpec. So we first add the result of base query into another resultSupplier which are sorted
// by subtotalSpec and then stream merge them.
// Also note, we can't create the ResultSupplier eagerly here or as we don't want to eagerly allocate
// merge buffers for processing subtotal.
Supplier<GroupByRowProcessor.ResultSupplier> resultSupplierTwo = () -> GroupByRowProcessor.process(baseSubtotalQuery, subtotalQuery, resultSupplierOneFinal.results(subTotalDimensionSpec), configSupplier.get(), resource, spillMapper, processingConfig.getTmpDir(), processingConfig.intermediateComputeSizeBytes());
subtotalsResults.add(processSubtotalsResultAndOptionallyClose(resultSupplierTwo, subTotalDimensionSpec, subtotalQuery, true));
}
}
return Sequences.withBaggage(query.postProcess(Sequences.concat(subtotalsResults)), // this will close resources allocated by resultSupplierOne after sequence read
resultSupplierOne);
} catch (Throwable e) {
throw CloseableUtils.closeAndWrapInCatch(e, resultSupplierOne);
}
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class AutoStrategy method getExecutionPlan.
@Override
public List<SearchQueryExecutor> getExecutionPlan(SearchQuery query, Segment segment) {
final QueryableIndex index = segment.asQueryableIndex();
if (index != null) {
final BitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector(index.getBitmapFactoryForDimensions(), VirtualColumns.EMPTY, index);
// from the non-bitmap-support filters, and then use it to compute the filtered result by intersecting bitmaps.
if (filter == null || filter.supportsSelectivityEstimation(index, selector)) {
final List<DimensionSpec> dimsToSearch = getDimsToSearch(index.getAvailableDimensions(), query.getDimensions());
// Choose a search query execution strategy depending on the query.
// The costs of index-only plan and cursor-based plan can be computed like below.
//
// c_index = (total cardinality of all search dimensions) * (bitmap intersection cost)
// * (search predicate processing cost)
// c_cursor = (# of rows in a segment) * (filter selectivity) * (# of dimensions)
// * (search predicate processing cost)
final SearchQueryDecisionHelper helper = getDecisionHelper(index);
final double useIndexStrategyCost = helper.getBitmapIntersectCost() * computeTotalCard(index, dimsToSearch);
final double cursorOnlyStrategyCost = (filter == null ? 1. : filter.estimateSelectivity(selector)) * selector.getNumRows() * dimsToSearch.size();
log.debug("Use-index strategy cost: %f, cursor-only strategy cost: %f", useIndexStrategyCost, cursorOnlyStrategyCost);
if (useIndexStrategyCost < cursorOnlyStrategyCost) {
log.debug("Use-index execution strategy is selected, query id [%s]", query.getId());
return UseIndexesStrategy.of(query).getExecutionPlan(query, segment);
} else {
log.debug("Cursor-only execution strategy is selected, query id [%s]", query.getId());
return CursorOnlyStrategy.of(query).getExecutionPlan(query, segment);
}
} else {
log.debug("Filter doesn't support bitmap index. Fall back to cursor-only execution strategy, query id [%s]", query.getId());
return CursorOnlyStrategy.of(query).getExecutionPlan(query, segment);
}
} else {
log.debug("Index doesn't exist. Fall back to cursor-only execution strategy, query id [%s]", query.getId());
return CursorOnlyStrategy.of(query).getExecutionPlan(query, segment);
}
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class CursorOnlyStrategy method getExecutionPlan.
@Override
public List<SearchQueryExecutor> getExecutionPlan(SearchQuery query, Segment segment) {
final StorageAdapter adapter = segment.asStorageAdapter();
final List<DimensionSpec> dimensionSpecs = getDimsToSearch(adapter.getAvailableDimensions(), query.getDimensions());
return ImmutableList.of(new CursorBasedExecutor(query, segment, filter, interval, dimensionSpecs));
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class UseIndexesStrategy method partitionDimensionList.
/**
* Split the given dimensions list into bitmap-supporting dimensions and non-bitmap supporting ones.
* Note that the returned lists are free to modify.
*/
private static Pair<List<DimensionSpec>, List<DimensionSpec>> partitionDimensionList(StorageAdapter adapter, List<DimensionSpec> dimensions) {
final List<DimensionSpec> bitmapDims = new ArrayList<>();
final List<DimensionSpec> nonBitmapDims = new ArrayList<>();
final List<DimensionSpec> dimsToSearch = getDimsToSearch(adapter.getAvailableDimensions(), dimensions);
for (DimensionSpec spec : dimsToSearch) {
ColumnCapabilities capabilities = adapter.getColumnCapabilities(spec.getDimension());
if (capabilities == null) {
continue;
}
if (capabilities.hasBitmapIndexes()) {
bitmapDims.add(spec);
} else {
nonBitmapDims.add(spec);
}
}
return new Pair<>(bitmapDims, nonBitmapDims);
}
use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.
the class BaseVarianceSqlAggregator method toDruidAggregation.
@Nullable
@Override
public Aggregation toDruidAggregation(PlannerContext plannerContext, RowSignature rowSignature, VirtualColumnRegistry virtualColumnRegistry, RexBuilder rexBuilder, String name, AggregateCall aggregateCall, Project project, List<Aggregation> existingAggregations, boolean finalizeAggregations) {
final RexNode inputOperand = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0));
final DruidExpression input = Aggregations.toDruidExpressionForNumericAggregator(plannerContext, rowSignature, inputOperand);
if (input == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
final RelDataType dataType = inputOperand.getType();
final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
final DimensionSpec dimensionSpec;
final String aggName = StringUtils.format("%s:agg", name);
final SqlAggFunction func = calciteFunction();
final String estimator;
final String inputTypeName;
PostAggregator postAggregator = null;
if (input.isSimpleExtraction()) {
dimensionSpec = input.getSimpleExtraction().toDimensionSpec(null, inputType);
} else {
String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(input, dataType);
dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
}
if (inputType == null) {
throw new IAE("VarianceSqlAggregator[%s] has invalid inputType", func);
}
if (inputType.isNumeric()) {
inputTypeName = StringUtils.toLowerCase(inputType.getType().name());
} else {
throw new IAE("VarianceSqlAggregator[%s] has invalid inputType[%s]", func, inputType.asTypeString());
}
if (func == SqlStdOperatorTable.VAR_POP || func == SqlStdOperatorTable.STDDEV_POP) {
estimator = "population";
} else {
estimator = "sample";
}
aggregatorFactory = new VarianceAggregatorFactory(aggName, dimensionSpec.getDimension(), estimator, inputTypeName);
if (func == SqlStdOperatorTable.STDDEV_POP || func == SqlStdOperatorTable.STDDEV_SAMP || func == SqlStdOperatorTable.STDDEV) {
postAggregator = new StandardDeviationPostAggregator(name, aggregatorFactory.getName(), estimator);
}
return Aggregation.create(ImmutableList.of(aggregatorFactory), postAggregator);
}
Aggregations