use of org.apache.druid.sql.calcite.aggregation.DimensionExpression in project druid by druid-io.
the class DruidQuery method computeGrouping.
@Nonnull
private static Grouping computeGrouping(final PartialDruidQuery partialQuery, final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final boolean finalizeAggregations) {
final Aggregate aggregate = Preconditions.checkNotNull(partialQuery.getAggregate(), "aggregate");
final Project aggregateProject = partialQuery.getAggregateProject();
final List<DimensionExpression> dimensions = computeDimensions(partialQuery, plannerContext, rowSignature, virtualColumnRegistry);
final Subtotals subtotals = computeSubtotals(partialQuery, rowSignature);
final List<Aggregation> aggregations = computeAggregations(partialQuery, plannerContext, rowSignature, virtualColumnRegistry, rexBuilder, finalizeAggregations);
final RowSignature aggregateRowSignature = RowSignatures.fromRelDataType(ImmutableList.copyOf(Iterators.concat(dimensions.stream().map(DimensionExpression::getOutputName).iterator(), aggregations.stream().map(Aggregation::getOutputName).iterator())), aggregate.getRowType());
final DimFilter havingFilter = computeHavingFilter(partialQuery, plannerContext, aggregateRowSignature);
final Grouping grouping = Grouping.create(dimensions, subtotals, aggregations, havingFilter, aggregateRowSignature);
if (aggregateProject == null) {
return grouping;
} else {
return grouping.applyProject(plannerContext, aggregateProject);
}
}
use of org.apache.druid.sql.calcite.aggregation.DimensionExpression in project druid by druid-io.
the class DruidQuery method toGroupByQuery.
/**
* Return this query as a GroupBy query, or null if this query is not compatible with GroupBy.
*
* @return query or null
*/
@Nullable
private GroupByQuery toGroupByQuery(final QueryFeatureInspector queryFeatureInspector) {
if (grouping == null) {
return null;
}
if (sorting != null && sorting.getOffsetLimit().hasLimit() && sorting.getOffsetLimit().getLimit() <= 0) {
// Cannot handle zero or negative limits.
return null;
}
final Pair<DataSource, Filtration> dataSourceFiltrationPair = getFiltration(dataSource, filter, virtualColumnRegistry);
final DataSource newDataSource = dataSourceFiltrationPair.lhs;
final Filtration filtration = dataSourceFiltrationPair.rhs;
final DimFilterHavingSpec havingSpec;
if (grouping.getHavingFilter() != null) {
havingSpec = new DimFilterHavingSpec(Filtration.create(grouping.getHavingFilter()).optimizeFilterOnly(grouping.getOutputRowSignature()).getDimFilter(), true);
} else {
havingSpec = null;
}
final List<PostAggregator> postAggregators = new ArrayList<>(grouping.getPostAggregators());
if (sorting != null && sorting.getProjection() != null) {
postAggregators.addAll(sorting.getProjection().getPostAggregators());
}
GroupByQuery query = new GroupByQuery(newDataSource, filtration.getQuerySegmentSpec(), getVirtualColumns(true), filtration.getDimFilter(), Granularities.ALL, grouping.getDimensionSpecs(), grouping.getAggregatorFactories(), postAggregators, havingSpec, Optional.ofNullable(sorting).orElse(Sorting.none()).limitSpec(), grouping.getSubtotals().toSubtotalsSpec(grouping.getDimensionSpecs()), ImmutableSortedMap.copyOf(plannerContext.getQueryContext()));
// We don't apply timestamp computation optimization yet when limit is pushed down. Maybe someday.
if (query.getLimitSpec() instanceof DefaultLimitSpec && query.isApplyLimitPushDown()) {
return query;
}
Map<String, Object> theContext = new HashMap<>();
Granularity queryGranularity = null;
// now, part of the query plan logic is handled in GroupByStrategyV2.
if (!grouping.getDimensions().isEmpty()) {
for (DimensionExpression dimensionExpression : grouping.getDimensions()) {
Granularity granularity = Expressions.toQueryGranularity(dimensionExpression.getDruidExpression(), plannerContext.getExprMacroTable());
if (granularity == null) {
continue;
}
if (queryGranularity != null) {
// group by more than one timestamp_floor
// eg: group by timestamp_floor(__time to DAY),timestamp_floor(__time, to HOUR)
queryGranularity = null;
break;
}
queryGranularity = granularity;
int timestampDimensionIndexInDimensions = grouping.getDimensions().indexOf(dimensionExpression);
// these settings will only affect the most inner query sent to the down streaming compute nodes
theContext.put(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD, dimensionExpression.getOutputName());
theContext.put(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_INDEX, timestampDimensionIndexInDimensions);
theContext.put(GroupByQuery.CTX_TIMESTAMP_RESULT_FIELD_GRANULARITY, queryGranularity);
}
}
if (queryGranularity == null) {
return query;
}
return query.withOverriddenContext(theContext);
}
use of org.apache.druid.sql.calcite.aggregation.DimensionExpression in project druid by druid-io.
the class DruidQuery method computeDimensions.
/**
* Returns dimensions corresponding to {@code aggregate.getGroupSet()}, in the same order.
*
* @param partialQuery partial query
* @param plannerContext planner context
* @param rowSignature source row signature
* @param virtualColumnRegistry re-usable virtual column references
*
* @return dimensions
*
* @throws CannotBuildQueryException if dimensions cannot be computed
*/
private static List<DimensionExpression> computeDimensions(final PartialDruidQuery partialQuery, final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry) {
final Aggregate aggregate = Preconditions.checkNotNull(partialQuery.getAggregate());
final List<DimensionExpression> dimensions = new ArrayList<>();
final String outputNamePrefix = Calcites.findUnusedPrefixForDigits("d", rowSignature.getColumnNames());
int outputNameCounter = 0;
for (int i : aggregate.getGroupSet()) {
// Dimension might need to create virtual columns. Avoid giving it a name that would lead to colliding columns.
final RexNode rexNode = Expressions.fromFieldAccess(rowSignature, partialQuery.getSelectProject(), i);
final DruidExpression druidExpression = Expressions.toDruidExpression(plannerContext, rowSignature, rexNode);
if (druidExpression == null) {
throw new CannotBuildQueryException(aggregate, rexNode);
}
final RelDataType dataType = rexNode.getType();
final ColumnType outputType = Calcites.getColumnTypeForRelDataType(dataType);
if (Types.isNullOr(outputType, ValueType.COMPLEX)) {
// Can't group on unknown or COMPLEX types.
plannerContext.setPlanningError("SQL requires a group-by on a column of type %s that is unsupported.", outputType);
throw new CannotBuildQueryException(aggregate, rexNode);
}
final String dimOutputName = outputNamePrefix + outputNameCounter++;
if (!druidExpression.isSimpleExtraction()) {
final String virtualColumn = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(druidExpression, dataType);
dimensions.add(DimensionExpression.ofVirtualColumn(virtualColumn, dimOutputName, druidExpression, outputType));
} else {
dimensions.add(DimensionExpression.ofSimpleColumn(dimOutputName, druidExpression, outputType));
}
}
return dimensions;
}
use of org.apache.druid.sql.calcite.aggregation.DimensionExpression in project druid by druid-io.
the class Grouping method applyProject.
/**
* Applies a post-grouping projection.
*
* @see DruidQuery#computeGrouping which uses this
*/
public Grouping applyProject(final PlannerContext plannerContext, final Project project) {
final List<DimensionExpression> newDimensions = new ArrayList<>();
final List<Aggregation> newAggregations = new ArrayList<>(aggregations);
final Subtotals newSubtotals;
final Projection postAggregationProjection = Projection.postAggregation(project, plannerContext, outputRowSignature, "p");
postAggregationProjection.getPostAggregators().forEach(postAggregator -> newAggregations.add(Aggregation.create(postAggregator)));
// Remove literal dimensions that did not appear in the projection. This is useful for queries
// like "SELECT COUNT(*) FROM tbl GROUP BY 'dummy'" which some tools can generate, and for which we don't
// actually want to include a dimension 'dummy'.
final ImmutableBitSet aggregateProjectBits = RelOptUtil.InputFinder.bits(project.getChildExps(), null);
final int[] newDimIndexes = new int[dimensions.size()];
boolean droppedDimensions = false;
for (int i = 0; i < dimensions.size(); i++) {
final DimensionExpression dimension = dimensions.get(i);
if (Parser.parse(dimension.getDruidExpression().getExpression(), plannerContext.getExprMacroTable()).isLiteral() && !aggregateProjectBits.get(i)) {
droppedDimensions = true;
newDimIndexes[i] = -1;
} else {
newDimIndexes[i] = newDimensions.size();
newDimensions.add(dimension);
}
}
// Renumber subtotals, if needed, to account for removed dummy dimensions.
if (newDimensions.size() != dimensions.size()) {
final List<IntList> newSubtotalsList = new ArrayList<>();
for (IntList subtotal : subtotals.getSubtotals()) {
final IntList newSubtotal = new IntArrayList();
for (int dimIndex : subtotal) {
final int newDimIndex = newDimIndexes[dimIndex];
if (newDimIndex >= 0) {
newSubtotal.add(newDimIndex);
}
}
newSubtotalsList.add(newSubtotal);
}
newSubtotals = new Subtotals(newSubtotalsList);
} else {
newSubtotals = subtotals;
}
return Grouping.create(newDimensions, newSubtotals, newAggregations, havingFilter, postAggregationProjection.getOutputRowSignature(), droppedDimensions);
}
use of org.apache.druid.sql.calcite.aggregation.DimensionExpression in project druid by druid-io.
the class DruidQuery method toTimeseriesQuery.
/**
* Return this query as a Timeseries query, or null if this query is not compatible with Timeseries.
*
* @return query
*/
@Nullable
private TimeseriesQuery toTimeseriesQuery(final QueryFeatureInspector queryFeatureInspector) {
if (!queryFeatureInspector.feature(QueryFeature.CAN_RUN_TIMESERIES) || grouping == null || grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs()) || grouping.getHavingFilter() != null) {
return null;
}
if (sorting != null && sorting.getOffsetLimit().hasOffset()) {
// Timeseries cannot handle offsets.
return null;
}
final Granularity queryGranularity;
final boolean descending;
int timeseriesLimit = 0;
final Map<String, Object> theContext = new HashMap<>();
if (grouping.getDimensions().isEmpty()) {
queryGranularity = Granularities.ALL;
descending = false;
} else if (grouping.getDimensions().size() == 1) {
final DimensionExpression dimensionExpression = Iterables.getOnlyElement(grouping.getDimensions());
queryGranularity = Expressions.toQueryGranularity(dimensionExpression.getDruidExpression(), plannerContext.getExprMacroTable());
if (queryGranularity == null) {
// Timeseries only applies if the single dimension is granular __time.
return null;
}
theContext.put(TimeseriesQuery.CTX_TIMESTAMP_RESULT_FIELD, Iterables.getOnlyElement(grouping.getDimensions()).toDimensionSpec().getOutputName());
if (sorting != null) {
if (sorting.getOffsetLimit().hasLimit()) {
final long limit = sorting.getOffsetLimit().getLimit();
if (limit == 0) {
// Can't handle zero limit (the Timeseries query engine would treat it as unlimited).
return null;
}
timeseriesLimit = Ints.checkedCast(limit);
}
switch(sorting.getTimeSortKind(dimensionExpression.getOutputName())) {
case UNORDERED:
case TIME_ASCENDING:
descending = false;
break;
case TIME_DESCENDING:
descending = true;
break;
default:
// Sorting on a metric, maybe. Timeseries cannot handle.
return null;
}
} else {
// No limitSpec.
descending = false;
}
} else {
// More than one dimension, timeseries cannot handle.
return null;
}
// was originally a groupBy query, but with the grouping dimensions removed away in Grouping#applyProject
if (!Granularities.ALL.equals(queryGranularity) || grouping.hasGroupingDimensionsDropped()) {
theContext.put(TimeseriesQuery.SKIP_EMPTY_BUCKETS, true);
}
theContext.putAll(plannerContext.getQueryContext());
final Pair<DataSource, Filtration> dataSourceFiltrationPair = getFiltration(dataSource, filter, virtualColumnRegistry);
final DataSource newDataSource = dataSourceFiltrationPair.lhs;
final Filtration filtration = dataSourceFiltrationPair.rhs;
final List<PostAggregator> postAggregators = new ArrayList<>(grouping.getPostAggregators());
if (sorting != null && sorting.getProjection() != null) {
postAggregators.addAll(sorting.getProjection().getPostAggregators());
}
return new TimeseriesQuery(newDataSource, filtration.getQuerySegmentSpec(), descending, getVirtualColumns(false), filtration.getDimFilter(), queryGranularity, grouping.getAggregatorFactories(), postAggregators, timeseriesLimit, ImmutableSortedMap.copyOf(theContext));
}
Aggregations