use of io.druid.sql.calcite.aggregation.Aggregation in project druid by druid-io.
the class QuantileSqlAggregator method toDruidAggregation.
@Override
public Aggregation toDruidAggregation(final String name, final RowSignature rowSignature, final DruidOperatorTable operatorTable, final PlannerContext plannerContext, final List<Aggregation> existingAggregations, final Project project, final AggregateCall aggregateCall, final DimFilter filter) {
final RowExtraction rex = Expressions.toRowExtraction(operatorTable, plannerContext, rowSignature.getRowOrder(), Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0)));
if (rex == null) {
return null;
}
final AggregatorFactory aggregatorFactory;
final String histogramName = String.format("%s:agg", name);
final RexNode probabilityArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
final float probability = ((Number) RexLiteral.value(probabilityArg)).floatValue();
final int resolution;
if (aggregateCall.getArgList().size() >= 3) {
final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(2));
resolution = ((Number) RexLiteral.value(resolutionArg)).intValue();
} else {
resolution = ApproximateHistogram.DEFAULT_HISTOGRAM_SIZE;
}
final int numBuckets = ApproximateHistogram.DEFAULT_BUCKET_SIZE;
final float lowerLimit = Float.NEGATIVE_INFINITY;
final float upperLimit = Float.POSITIVE_INFINITY;
// Look for existing matching aggregatorFactory.
for (final Aggregation existing : existingAggregations) {
for (AggregatorFactory factory : existing.getAggregatorFactories()) {
final boolean matches = Aggregations.aggregatorMatches(factory, filter, ApproximateHistogramAggregatorFactory.class, new Predicate<ApproximateHistogramAggregatorFactory>() {
@Override
public boolean apply(final ApproximateHistogramAggregatorFactory theFactory) {
return theFactory.getFieldName().equals(rex.getColumn()) && theFactory.getResolution() == resolution && theFactory.getNumBuckets() == numBuckets && theFactory.getLowerLimit() == lowerLimit && theFactory.getUpperLimit() == upperLimit;
}
});
if (matches) {
// Found existing one. Use this.
return Aggregation.create(ImmutableList.<AggregatorFactory>of(), new QuantilePostAggregator(name, factory.getName(), probability));
}
}
}
if (rowSignature.getColumnType(rex.getColumn()) == ValueType.COMPLEX) {
aggregatorFactory = new ApproximateHistogramFoldingAggregatorFactory(histogramName, rex.getColumn(), resolution, numBuckets, lowerLimit, upperLimit);
} else {
aggregatorFactory = new ApproximateHistogramAggregatorFactory(histogramName, rex.getColumn(), resolution, numBuckets, lowerLimit, upperLimit);
}
return Aggregation.create(ImmutableList.of(aggregatorFactory), new QuantilePostAggregator(name, histogramName, probability)).filter(filter);
}
use of io.druid.sql.calcite.aggregation.Aggregation in project druid by druid-io.
the class GroupByRules method translateAggregateCall.
/**
* Translate an AggregateCall to Druid equivalents.
*
* @return translated aggregation, or null if translation failed.
*/
private static Aggregation translateAggregateCall(final PlannerContext plannerContext, final RowSignature sourceRowSignature, final Project project, final AggregateCall call, final DruidOperatorTable operatorTable, final List<Aggregation> existingAggregations, final int aggNumber, final boolean approximateCountDistinct) {
final List<DimFilter> filters = Lists.newArrayList();
final List<String> rowOrder = sourceRowSignature.getRowOrder();
final String name = aggOutputName(aggNumber);
final SqlKind kind = call.getAggregation().getKind();
final SqlTypeName outputType = call.getType().getSqlTypeName();
if (call.filterArg >= 0) {
// AGG(xxx) FILTER(WHERE yyy)
if (project == null) {
// We need some kind of projection to support filtered aggregations.
return null;
}
final RexNode expression = project.getChildExps().get(call.filterArg);
final DimFilter filter = Expressions.toFilter(operatorTable, plannerContext, sourceRowSignature, expression);
if (filter == null) {
return null;
}
filters.add(filter);
}
if (kind == SqlKind.COUNT && call.getArgList().isEmpty()) {
// COUNT(*)
return Aggregation.create(new CountAggregatorFactory(name)).filter(makeFilter(filters, sourceRowSignature));
} else if (kind == SqlKind.COUNT && call.isDistinct()) {
// COUNT(DISTINCT x)
return approximateCountDistinct ? APPROX_COUNT_DISTINCT.toDruidAggregation(name, sourceRowSignature, operatorTable, plannerContext, existingAggregations, project, call, makeFilter(filters, sourceRowSignature)) : null;
} else if (kind == SqlKind.COUNT || kind == SqlKind.SUM || kind == SqlKind.SUM0 || kind == SqlKind.MIN || kind == SqlKind.MAX || kind == SqlKind.AVG) {
// Built-in agg, not distinct, not COUNT(*)
boolean forceCount = false;
final FieldOrExpression input;
final int inputField = Iterables.getOnlyElement(call.getArgList());
final RexNode rexNode = Expressions.fromFieldAccess(sourceRowSignature, project, inputField);
final FieldOrExpression foe = FieldOrExpression.fromRexNode(operatorTable, plannerContext, rowOrder, rexNode);
if (foe != null) {
input = foe;
} else if (rexNode.getKind() == SqlKind.CASE && ((RexCall) rexNode).getOperands().size() == 3) {
// Possibly a CASE-style filtered aggregation. Styles supported:
// A: SUM(CASE WHEN x = 'foo' THEN cnt END) => operands (x = 'foo', cnt, null)
// B: SUM(CASE WHEN x = 'foo' THEN 1 ELSE 0 END) => operands (x = 'foo', 1, 0)
// C: COUNT(CASE WHEN x = 'foo' THEN 'dummy' END) => operands (x = 'foo', 'dummy', null)
// If the null and non-null args are switched, "flip" is set, which negates the filter.
final RexCall caseCall = (RexCall) rexNode;
final boolean flip = RexLiteral.isNullLiteral(caseCall.getOperands().get(1)) && !RexLiteral.isNullLiteral(caseCall.getOperands().get(2));
final RexNode arg1 = caseCall.getOperands().get(flip ? 2 : 1);
final RexNode arg2 = caseCall.getOperands().get(flip ? 1 : 2);
// Operand 1: Filter
final DimFilter filter = Expressions.toFilter(operatorTable, plannerContext, sourceRowSignature, caseCall.getOperands().get(0));
if (filter == null) {
return null;
} else {
filters.add(flip ? new NotDimFilter(filter) : filter);
}
if (call.getAggregation().getKind() == SqlKind.COUNT && arg1 instanceof RexLiteral && !RexLiteral.isNullLiteral(arg1) && RexLiteral.isNullLiteral(arg2)) {
// Case C
forceCount = true;
input = null;
} else if (call.getAggregation().getKind() == SqlKind.SUM && arg1 instanceof RexLiteral && ((Number) RexLiteral.value(arg1)).intValue() == 1 && arg2 instanceof RexLiteral && ((Number) RexLiteral.value(arg2)).intValue() == 0) {
// Case B
forceCount = true;
input = null;
} else if (RexLiteral.isNullLiteral(arg2)) {
// Maybe case A
input = FieldOrExpression.fromRexNode(operatorTable, plannerContext, rowOrder, arg1);
if (input == null) {
return null;
}
} else {
// Can't translate CASE into a filter.
return null;
}
} else {
// Can't translate operand.
return null;
}
if (!forceCount) {
Preconditions.checkNotNull(input, "WTF?! input was null for non-COUNT aggregation");
}
if (forceCount || kind == SqlKind.COUNT) {
// COUNT(x)
return Aggregation.create(new CountAggregatorFactory(name)).filter(makeFilter(filters, sourceRowSignature));
} else {
// Built-in aggregator that is not COUNT.
final Aggregation retVal;
final String fieldName = input.getFieldName();
final String expression = input.getExpression();
final boolean isLong = SqlTypeName.INT_TYPES.contains(outputType) || SqlTypeName.TIMESTAMP == outputType || SqlTypeName.DATE == outputType;
if (kind == SqlKind.SUM || kind == SqlKind.SUM0) {
retVal = isLong ? Aggregation.create(new LongSumAggregatorFactory(name, fieldName, expression)) : Aggregation.create(new DoubleSumAggregatorFactory(name, fieldName, expression));
} else if (kind == SqlKind.MIN) {
retVal = isLong ? Aggregation.create(new LongMinAggregatorFactory(name, fieldName, expression)) : Aggregation.create(new DoubleMinAggregatorFactory(name, fieldName, expression));
} else if (kind == SqlKind.MAX) {
retVal = isLong ? Aggregation.create(new LongMaxAggregatorFactory(name, fieldName, expression)) : Aggregation.create(new DoubleMaxAggregatorFactory(name, fieldName, expression));
} else if (kind == SqlKind.AVG) {
final String sumName = aggInternalName(aggNumber, "sum");
final String countName = aggInternalName(aggNumber, "count");
final AggregatorFactory sum = isLong ? new LongSumAggregatorFactory(sumName, fieldName, expression) : new DoubleSumAggregatorFactory(sumName, fieldName, expression);
final AggregatorFactory count = new CountAggregatorFactory(countName);
retVal = Aggregation.create(ImmutableList.of(sum, count), new ArithmeticPostAggregator(name, "quotient", ImmutableList.<PostAggregator>of(new FieldAccessPostAggregator(null, sumName), new FieldAccessPostAggregator(null, countName))));
} else {
// Not reached.
throw new ISE("WTF?! Kind[%s] got into the built-in aggregator path somehow?!", kind);
}
return retVal.filter(makeFilter(filters, sourceRowSignature));
}
} else {
// Not a built-in aggregator, check operator table.
final SqlAggregator sqlAggregator = operatorTable.lookupAggregator(call.getAggregation().getName());
return sqlAggregator != null ? sqlAggregator.toDruidAggregation(name, sourceRowSignature, operatorTable, plannerContext, existingAggregations, project, call, makeFilter(filters, sourceRowSignature)) : null;
}
}
use of io.druid.sql.calcite.aggregation.Aggregation in project druid by druid-io.
the class GroupByRules method applyPostAggregation.
/**
* Applies a projection to the aggregations of a druidRel, by potentially adding post-aggregators.
*
* @return new rel, or null if the projection cannot be applied
*/
private static DruidRel applyPostAggregation(final DruidRel druidRel, final Project postProject) {
Preconditions.checkState(canApplyPostAggregation(druidRel), "Cannot applyPostAggregation");
final List<String> rowOrder = druidRel.getQueryBuilder().getRowOrder();
final Grouping grouping = druidRel.getQueryBuilder().getGrouping();
final List<Aggregation> newAggregations = Lists.newArrayList(grouping.getAggregations());
final List<PostAggregatorFactory> finalizingPostAggregatorFactories = Lists.newArrayList();
final List<String> newRowOrder = Lists.newArrayList();
// Build list of finalizingPostAggregatorFactories.
final Map<String, Aggregation> aggregationMap = Maps.newHashMap();
for (final Aggregation aggregation : grouping.getAggregations()) {
aggregationMap.put(aggregation.getOutputName(), aggregation);
}
for (final String field : rowOrder) {
final Aggregation aggregation = aggregationMap.get(field);
finalizingPostAggregatorFactories.add(aggregation == null ? null : aggregation.getFinalizingPostAggregatorFactory());
}
// Walk through the postProject expressions.
for (final RexNode projectExpression : postProject.getChildExps()) {
if (projectExpression.isA(SqlKind.INPUT_REF)) {
final RexInputRef ref = (RexInputRef) projectExpression;
final String fieldName = rowOrder.get(ref.getIndex());
newRowOrder.add(fieldName);
finalizingPostAggregatorFactories.add(null);
} else {
// Attempt to convert to PostAggregator.
final String postAggregatorName = aggOutputName(newAggregations.size());
final PostAggregator postAggregator = Expressions.toPostAggregator(postAggregatorName, rowOrder, finalizingPostAggregatorFactories, projectExpression);
if (postAggregator != null) {
newAggregations.add(Aggregation.create(postAggregator));
newRowOrder.add(postAggregator.getName());
finalizingPostAggregatorFactories.add(null);
} else {
return null;
}
}
}
return druidRel.withQueryBuilder(druidRel.getQueryBuilder().withAdjustedGrouping(Grouping.create(grouping.getDimensions(), newAggregations), postProject.getRowType(), newRowOrder));
}
use of io.druid.sql.calcite.aggregation.Aggregation in project druid by druid-io.
the class GroupByRules method applyAggregate.
/**
* Applies a filter -> project -> aggregate chain to a druidRel. Do not call this method unless
* {@link #canApplyAggregate(DruidRel, Filter, Project, Aggregate)} returns true.
*
* @return new rel, or null if the chain cannot be applied
*/
private static DruidRel applyAggregate(final DruidRel druidRel, final Filter filter0, final Project project0, final Aggregate aggregate, final DruidOperatorTable operatorTable, final boolean approximateCountDistinct) {
Preconditions.checkState(canApplyAggregate(druidRel, filter0, project0, aggregate), "Cannot applyAggregate.");
final RowSignature sourceRowSignature;
final boolean isNestedQuery = druidRel.getQueryBuilder().getGrouping() != null;
if (isNestedQuery) {
// Nested groupBy; source row signature is the output signature of druidRel.
sourceRowSignature = druidRel.getOutputRowSignature();
} else {
sourceRowSignature = druidRel.getSourceRowSignature();
}
// Filter that should be applied before aggregating.
final DimFilter filter;
if (filter0 != null) {
filter = Expressions.toFilter(operatorTable, druidRel.getPlannerContext(), sourceRowSignature, filter0.getCondition());
if (filter == null) {
// Can't plan this filter.
return null;
}
} else if (druidRel.getQueryBuilder().getFilter() != null && !isNestedQuery) {
// We're going to replace the existing druidRel, so inherit its filter.
filter = druidRel.getQueryBuilder().getFilter();
} else {
filter = null;
}
// Projection that should be applied before aggregating.
final Project project;
if (project0 != null) {
project = project0;
} else if (druidRel.getQueryBuilder().getSelectProjection() != null && !isNestedQuery) {
// We're going to replace the existing druidRel, so inherit its projection.
project = druidRel.getQueryBuilder().getSelectProjection().getProject();
} else {
project = null;
}
final List<DimensionSpec> dimensions = Lists.newArrayList();
final List<Aggregation> aggregations = Lists.newArrayList();
final List<String> rowOrder = Lists.newArrayList();
// Translate groupSet.
final ImmutableBitSet groupSet = aggregate.getGroupSet();
int dimOutputNameCounter = 0;
for (int i : groupSet) {
if (project != null && project.getChildExps().get(i) instanceof RexLiteral) {
// Ignore literals in GROUP BY, so a user can write e.g. "GROUP BY 'dummy'" to group everything into a single
// row. Add dummy rowOrder entry so NULLs come out. This is not strictly correct but it works as long as
// nobody actually expects to see the literal.
rowOrder.add(dimOutputName(dimOutputNameCounter++));
} else {
final RexNode rexNode = Expressions.fromFieldAccess(sourceRowSignature, project, i);
final RowExtraction rex = Expressions.toRowExtraction(operatorTable, druidRel.getPlannerContext(), sourceRowSignature.getRowOrder(), rexNode);
if (rex == null) {
return null;
}
final SqlTypeName sqlTypeName = rexNode.getType().getSqlTypeName();
final ValueType outputType = Calcites.getValueTypeForSqlTypeName(sqlTypeName);
if (outputType == null) {
throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", sqlTypeName, rowOrder.get(i));
}
final DimensionSpec dimensionSpec = rex.toDimensionSpec(sourceRowSignature, dimOutputName(dimOutputNameCounter++), outputType);
if (dimensionSpec == null) {
return null;
}
dimensions.add(dimensionSpec);
rowOrder.add(dimensionSpec.getOutputName());
}
}
// Translate aggregates.
for (int i = 0; i < aggregate.getAggCallList().size(); i++) {
final AggregateCall aggCall = aggregate.getAggCallList().get(i);
final Aggregation aggregation = translateAggregateCall(druidRel.getPlannerContext(), sourceRowSignature, project, aggCall, operatorTable, aggregations, i, approximateCountDistinct);
if (aggregation == null) {
return null;
}
aggregations.add(aggregation);
rowOrder.add(aggregation.getOutputName());
}
if (isNestedQuery) {
// Nested groupBy.
return DruidNestedGroupBy.from(druidRel, filter, Grouping.create(dimensions, aggregations), aggregate.getRowType(), rowOrder);
} else {
// groupBy on a base dataSource.
return druidRel.withQueryBuilder(druidRel.getQueryBuilder().withFilter(filter).withGrouping(Grouping.create(dimensions, aggregations), aggregate.getRowType(), rowOrder));
}
}
Aggregations