Search in sources :

Example 1 with Aggregation

use of io.druid.sql.calcite.aggregation.Aggregation in project druid by druid-io.

the class QuantileSqlAggregator method toDruidAggregation.

@Override
public Aggregation toDruidAggregation(final String name, final RowSignature rowSignature, final DruidOperatorTable operatorTable, final PlannerContext plannerContext, final List<Aggregation> existingAggregations, final Project project, final AggregateCall aggregateCall, final DimFilter filter) {
    final RowExtraction rex = Expressions.toRowExtraction(operatorTable, plannerContext, rowSignature.getRowOrder(), Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(0)));
    if (rex == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final String histogramName = String.format("%s:agg", name);
    final RexNode probabilityArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(1));
    final float probability = ((Number) RexLiteral.value(probabilityArg)).floatValue();
    final int resolution;
    if (aggregateCall.getArgList().size() >= 3) {
        final RexNode resolutionArg = Expressions.fromFieldAccess(rowSignature, project, aggregateCall.getArgList().get(2));
        resolution = ((Number) RexLiteral.value(resolutionArg)).intValue();
    } else {
        resolution = ApproximateHistogram.DEFAULT_HISTOGRAM_SIZE;
    }
    final int numBuckets = ApproximateHistogram.DEFAULT_BUCKET_SIZE;
    final float lowerLimit = Float.NEGATIVE_INFINITY;
    final float upperLimit = Float.POSITIVE_INFINITY;
    // Look for existing matching aggregatorFactory.
    for (final Aggregation existing : existingAggregations) {
        for (AggregatorFactory factory : existing.getAggregatorFactories()) {
            final boolean matches = Aggregations.aggregatorMatches(factory, filter, ApproximateHistogramAggregatorFactory.class, new Predicate<ApproximateHistogramAggregatorFactory>() {

                @Override
                public boolean apply(final ApproximateHistogramAggregatorFactory theFactory) {
                    return theFactory.getFieldName().equals(rex.getColumn()) && theFactory.getResolution() == resolution && theFactory.getNumBuckets() == numBuckets && theFactory.getLowerLimit() == lowerLimit && theFactory.getUpperLimit() == upperLimit;
                }
            });
            if (matches) {
                // Found existing one. Use this.
                return Aggregation.create(ImmutableList.<AggregatorFactory>of(), new QuantilePostAggregator(name, factory.getName(), probability));
            }
        }
    }
    if (rowSignature.getColumnType(rex.getColumn()) == ValueType.COMPLEX) {
        aggregatorFactory = new ApproximateHistogramFoldingAggregatorFactory(histogramName, rex.getColumn(), resolution, numBuckets, lowerLimit, upperLimit);
    } else {
        aggregatorFactory = new ApproximateHistogramAggregatorFactory(histogramName, rex.getColumn(), resolution, numBuckets, lowerLimit, upperLimit);
    }
    return Aggregation.create(ImmutableList.of(aggregatorFactory), new QuantilePostAggregator(name, histogramName, probability)).filter(filter);
}
Also used : ApproximateHistogramFoldingAggregatorFactory(io.druid.query.aggregation.histogram.ApproximateHistogramFoldingAggregatorFactory) QuantilePostAggregator(io.druid.query.aggregation.histogram.QuantilePostAggregator) RowExtraction(io.druid.sql.calcite.expression.RowExtraction) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) ApproximateHistogramFoldingAggregatorFactory(io.druid.query.aggregation.histogram.ApproximateHistogramFoldingAggregatorFactory) ApproximateHistogramAggregatorFactory(io.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) Aggregation(io.druid.sql.calcite.aggregation.Aggregation) ApproximateHistogramAggregatorFactory(io.druid.query.aggregation.histogram.ApproximateHistogramAggregatorFactory) RexNode(org.apache.calcite.rex.RexNode)

Example 2 with Aggregation

use of io.druid.sql.calcite.aggregation.Aggregation in project druid by druid-io.

the class GroupByRules method translateAggregateCall.

/**
   * Translate an AggregateCall to Druid equivalents.
   *
   * @return translated aggregation, or null if translation failed.
   */
private static Aggregation translateAggregateCall(final PlannerContext plannerContext, final RowSignature sourceRowSignature, final Project project, final AggregateCall call, final DruidOperatorTable operatorTable, final List<Aggregation> existingAggregations, final int aggNumber, final boolean approximateCountDistinct) {
    final List<DimFilter> filters = Lists.newArrayList();
    final List<String> rowOrder = sourceRowSignature.getRowOrder();
    final String name = aggOutputName(aggNumber);
    final SqlKind kind = call.getAggregation().getKind();
    final SqlTypeName outputType = call.getType().getSqlTypeName();
    if (call.filterArg >= 0) {
        // AGG(xxx) FILTER(WHERE yyy)
        if (project == null) {
            // We need some kind of projection to support filtered aggregations.
            return null;
        }
        final RexNode expression = project.getChildExps().get(call.filterArg);
        final DimFilter filter = Expressions.toFilter(operatorTable, plannerContext, sourceRowSignature, expression);
        if (filter == null) {
            return null;
        }
        filters.add(filter);
    }
    if (kind == SqlKind.COUNT && call.getArgList().isEmpty()) {
        // COUNT(*)
        return Aggregation.create(new CountAggregatorFactory(name)).filter(makeFilter(filters, sourceRowSignature));
    } else if (kind == SqlKind.COUNT && call.isDistinct()) {
        // COUNT(DISTINCT x)
        return approximateCountDistinct ? APPROX_COUNT_DISTINCT.toDruidAggregation(name, sourceRowSignature, operatorTable, plannerContext, existingAggregations, project, call, makeFilter(filters, sourceRowSignature)) : null;
    } else if (kind == SqlKind.COUNT || kind == SqlKind.SUM || kind == SqlKind.SUM0 || kind == SqlKind.MIN || kind == SqlKind.MAX || kind == SqlKind.AVG) {
        // Built-in agg, not distinct, not COUNT(*)
        boolean forceCount = false;
        final FieldOrExpression input;
        final int inputField = Iterables.getOnlyElement(call.getArgList());
        final RexNode rexNode = Expressions.fromFieldAccess(sourceRowSignature, project, inputField);
        final FieldOrExpression foe = FieldOrExpression.fromRexNode(operatorTable, plannerContext, rowOrder, rexNode);
        if (foe != null) {
            input = foe;
        } else if (rexNode.getKind() == SqlKind.CASE && ((RexCall) rexNode).getOperands().size() == 3) {
            // Possibly a CASE-style filtered aggregation. Styles supported:
            // A: SUM(CASE WHEN x = 'foo' THEN cnt END) => operands (x = 'foo', cnt, null)
            // B: SUM(CASE WHEN x = 'foo' THEN 1 ELSE 0 END) => operands (x = 'foo', 1, 0)
            // C: COUNT(CASE WHEN x = 'foo' THEN 'dummy' END) => operands (x = 'foo', 'dummy', null)
            // If the null and non-null args are switched, "flip" is set, which negates the filter.
            final RexCall caseCall = (RexCall) rexNode;
            final boolean flip = RexLiteral.isNullLiteral(caseCall.getOperands().get(1)) && !RexLiteral.isNullLiteral(caseCall.getOperands().get(2));
            final RexNode arg1 = caseCall.getOperands().get(flip ? 2 : 1);
            final RexNode arg2 = caseCall.getOperands().get(flip ? 1 : 2);
            // Operand 1: Filter
            final DimFilter filter = Expressions.toFilter(operatorTable, plannerContext, sourceRowSignature, caseCall.getOperands().get(0));
            if (filter == null) {
                return null;
            } else {
                filters.add(flip ? new NotDimFilter(filter) : filter);
            }
            if (call.getAggregation().getKind() == SqlKind.COUNT && arg1 instanceof RexLiteral && !RexLiteral.isNullLiteral(arg1) && RexLiteral.isNullLiteral(arg2)) {
                // Case C
                forceCount = true;
                input = null;
            } else if (call.getAggregation().getKind() == SqlKind.SUM && arg1 instanceof RexLiteral && ((Number) RexLiteral.value(arg1)).intValue() == 1 && arg2 instanceof RexLiteral && ((Number) RexLiteral.value(arg2)).intValue() == 0) {
                // Case B
                forceCount = true;
                input = null;
            } else if (RexLiteral.isNullLiteral(arg2)) {
                // Maybe case A
                input = FieldOrExpression.fromRexNode(operatorTable, plannerContext, rowOrder, arg1);
                if (input == null) {
                    return null;
                }
            } else {
                // Can't translate CASE into a filter.
                return null;
            }
        } else {
            // Can't translate operand.
            return null;
        }
        if (!forceCount) {
            Preconditions.checkNotNull(input, "WTF?! input was null for non-COUNT aggregation");
        }
        if (forceCount || kind == SqlKind.COUNT) {
            // COUNT(x)
            return Aggregation.create(new CountAggregatorFactory(name)).filter(makeFilter(filters, sourceRowSignature));
        } else {
            // Built-in aggregator that is not COUNT.
            final Aggregation retVal;
            final String fieldName = input.getFieldName();
            final String expression = input.getExpression();
            final boolean isLong = SqlTypeName.INT_TYPES.contains(outputType) || SqlTypeName.TIMESTAMP == outputType || SqlTypeName.DATE == outputType;
            if (kind == SqlKind.SUM || kind == SqlKind.SUM0) {
                retVal = isLong ? Aggregation.create(new LongSumAggregatorFactory(name, fieldName, expression)) : Aggregation.create(new DoubleSumAggregatorFactory(name, fieldName, expression));
            } else if (kind == SqlKind.MIN) {
                retVal = isLong ? Aggregation.create(new LongMinAggregatorFactory(name, fieldName, expression)) : Aggregation.create(new DoubleMinAggregatorFactory(name, fieldName, expression));
            } else if (kind == SqlKind.MAX) {
                retVal = isLong ? Aggregation.create(new LongMaxAggregatorFactory(name, fieldName, expression)) : Aggregation.create(new DoubleMaxAggregatorFactory(name, fieldName, expression));
            } else if (kind == SqlKind.AVG) {
                final String sumName = aggInternalName(aggNumber, "sum");
                final String countName = aggInternalName(aggNumber, "count");
                final AggregatorFactory sum = isLong ? new LongSumAggregatorFactory(sumName, fieldName, expression) : new DoubleSumAggregatorFactory(sumName, fieldName, expression);
                final AggregatorFactory count = new CountAggregatorFactory(countName);
                retVal = Aggregation.create(ImmutableList.of(sum, count), new ArithmeticPostAggregator(name, "quotient", ImmutableList.<PostAggregator>of(new FieldAccessPostAggregator(null, sumName), new FieldAccessPostAggregator(null, countName))));
            } else {
                // Not reached.
                throw new ISE("WTF?! Kind[%s] got into the built-in aggregator path somehow?!", kind);
            }
            return retVal.filter(makeFilter(filters, sourceRowSignature));
        }
    } else {
        // Not a built-in aggregator, check operator table.
        final SqlAggregator sqlAggregator = operatorTable.lookupAggregator(call.getAggregation().getName());
        return sqlAggregator != null ? sqlAggregator.toDruidAggregation(name, sourceRowSignature, operatorTable, plannerContext, existingAggregations, project, call, makeFilter(filters, sourceRowSignature)) : null;
    }
}
Also used : RexLiteral(org.apache.calcite.rex.RexLiteral) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) DoubleMaxAggregatorFactory(io.druid.query.aggregation.DoubleMaxAggregatorFactory) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) DoubleMinAggregatorFactory(io.druid.query.aggregation.DoubleMinAggregatorFactory) LongMinAggregatorFactory(io.druid.query.aggregation.LongMinAggregatorFactory) RexCall(org.apache.calcite.rex.RexCall) Aggregation(io.druid.sql.calcite.aggregation.Aggregation) ISE(io.druid.java.util.common.ISE) LongMaxAggregatorFactory(io.druid.query.aggregation.LongMaxAggregatorFactory) NotDimFilter(io.druid.query.filter.NotDimFilter) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) PostAggregator(io.druid.query.aggregation.PostAggregator) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) SqlKind(org.apache.calcite.sql.SqlKind) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DoubleMaxAggregatorFactory(io.druid.query.aggregation.DoubleMaxAggregatorFactory) LongMaxAggregatorFactory(io.druid.query.aggregation.LongMaxAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongMinAggregatorFactory(io.druid.query.aggregation.LongMinAggregatorFactory) PostAggregatorFactory(io.druid.sql.calcite.aggregation.PostAggregatorFactory) DoubleMinAggregatorFactory(io.druid.query.aggregation.DoubleMinAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) SqlAggregator(io.druid.sql.calcite.aggregation.SqlAggregator) ApproxCountDistinctSqlAggregator(io.druid.sql.calcite.aggregation.ApproxCountDistinctSqlAggregator) DimFilter(io.druid.query.filter.DimFilter) NotDimFilter(io.druid.query.filter.NotDimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) RexNode(org.apache.calcite.rex.RexNode)

Example 3 with Aggregation

use of io.druid.sql.calcite.aggregation.Aggregation in project druid by druid-io.

the class GroupByRules method applyPostAggregation.

/**
   * Applies a projection to the aggregations of a druidRel, by potentially adding post-aggregators.
   *
   * @return new rel, or null if the projection cannot be applied
   */
private static DruidRel applyPostAggregation(final DruidRel druidRel, final Project postProject) {
    Preconditions.checkState(canApplyPostAggregation(druidRel), "Cannot applyPostAggregation");
    final List<String> rowOrder = druidRel.getQueryBuilder().getRowOrder();
    final Grouping grouping = druidRel.getQueryBuilder().getGrouping();
    final List<Aggregation> newAggregations = Lists.newArrayList(grouping.getAggregations());
    final List<PostAggregatorFactory> finalizingPostAggregatorFactories = Lists.newArrayList();
    final List<String> newRowOrder = Lists.newArrayList();
    // Build list of finalizingPostAggregatorFactories.
    final Map<String, Aggregation> aggregationMap = Maps.newHashMap();
    for (final Aggregation aggregation : grouping.getAggregations()) {
        aggregationMap.put(aggregation.getOutputName(), aggregation);
    }
    for (final String field : rowOrder) {
        final Aggregation aggregation = aggregationMap.get(field);
        finalizingPostAggregatorFactories.add(aggregation == null ? null : aggregation.getFinalizingPostAggregatorFactory());
    }
    // Walk through the postProject expressions.
    for (final RexNode projectExpression : postProject.getChildExps()) {
        if (projectExpression.isA(SqlKind.INPUT_REF)) {
            final RexInputRef ref = (RexInputRef) projectExpression;
            final String fieldName = rowOrder.get(ref.getIndex());
            newRowOrder.add(fieldName);
            finalizingPostAggregatorFactories.add(null);
        } else {
            // Attempt to convert to PostAggregator.
            final String postAggregatorName = aggOutputName(newAggregations.size());
            final PostAggregator postAggregator = Expressions.toPostAggregator(postAggregatorName, rowOrder, finalizingPostAggregatorFactories, projectExpression);
            if (postAggregator != null) {
                newAggregations.add(Aggregation.create(postAggregator));
                newRowOrder.add(postAggregator.getName());
                finalizingPostAggregatorFactories.add(null);
            } else {
                return null;
            }
        }
    }
    return druidRel.withQueryBuilder(druidRel.getQueryBuilder().withAdjustedGrouping(Grouping.create(grouping.getDimensions(), newAggregations), postProject.getRowType(), newRowOrder));
}
Also used : Aggregation(io.druid.sql.calcite.aggregation.Aggregation) PostAggregator(io.druid.query.aggregation.PostAggregator) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) RexInputRef(org.apache.calcite.rex.RexInputRef) Grouping(io.druid.sql.calcite.rel.Grouping) PostAggregatorFactory(io.druid.sql.calcite.aggregation.PostAggregatorFactory) RexNode(org.apache.calcite.rex.RexNode)

Example 4 with Aggregation

use of io.druid.sql.calcite.aggregation.Aggregation in project druid by druid-io.

the class GroupByRules method applyAggregate.

/**
   * Applies a filter -> project -> aggregate chain to a druidRel. Do not call this method unless
   * {@link #canApplyAggregate(DruidRel, Filter, Project, Aggregate)} returns true.
   *
   * @return new rel, or null if the chain cannot be applied
   */
private static DruidRel applyAggregate(final DruidRel druidRel, final Filter filter0, final Project project0, final Aggregate aggregate, final DruidOperatorTable operatorTable, final boolean approximateCountDistinct) {
    Preconditions.checkState(canApplyAggregate(druidRel, filter0, project0, aggregate), "Cannot applyAggregate.");
    final RowSignature sourceRowSignature;
    final boolean isNestedQuery = druidRel.getQueryBuilder().getGrouping() != null;
    if (isNestedQuery) {
        // Nested groupBy; source row signature is the output signature of druidRel.
        sourceRowSignature = druidRel.getOutputRowSignature();
    } else {
        sourceRowSignature = druidRel.getSourceRowSignature();
    }
    // Filter that should be applied before aggregating.
    final DimFilter filter;
    if (filter0 != null) {
        filter = Expressions.toFilter(operatorTable, druidRel.getPlannerContext(), sourceRowSignature, filter0.getCondition());
        if (filter == null) {
            // Can't plan this filter.
            return null;
        }
    } else if (druidRel.getQueryBuilder().getFilter() != null && !isNestedQuery) {
        // We're going to replace the existing druidRel, so inherit its filter.
        filter = druidRel.getQueryBuilder().getFilter();
    } else {
        filter = null;
    }
    // Projection that should be applied before aggregating.
    final Project project;
    if (project0 != null) {
        project = project0;
    } else if (druidRel.getQueryBuilder().getSelectProjection() != null && !isNestedQuery) {
        // We're going to replace the existing druidRel, so inherit its projection.
        project = druidRel.getQueryBuilder().getSelectProjection().getProject();
    } else {
        project = null;
    }
    final List<DimensionSpec> dimensions = Lists.newArrayList();
    final List<Aggregation> aggregations = Lists.newArrayList();
    final List<String> rowOrder = Lists.newArrayList();
    // Translate groupSet.
    final ImmutableBitSet groupSet = aggregate.getGroupSet();
    int dimOutputNameCounter = 0;
    for (int i : groupSet) {
        if (project != null && project.getChildExps().get(i) instanceof RexLiteral) {
            // Ignore literals in GROUP BY, so a user can write e.g. "GROUP BY 'dummy'" to group everything into a single
            // row. Add dummy rowOrder entry so NULLs come out. This is not strictly correct but it works as long as
            // nobody actually expects to see the literal.
            rowOrder.add(dimOutputName(dimOutputNameCounter++));
        } else {
            final RexNode rexNode = Expressions.fromFieldAccess(sourceRowSignature, project, i);
            final RowExtraction rex = Expressions.toRowExtraction(operatorTable, druidRel.getPlannerContext(), sourceRowSignature.getRowOrder(), rexNode);
            if (rex == null) {
                return null;
            }
            final SqlTypeName sqlTypeName = rexNode.getType().getSqlTypeName();
            final ValueType outputType = Calcites.getValueTypeForSqlTypeName(sqlTypeName);
            if (outputType == null) {
                throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", sqlTypeName, rowOrder.get(i));
            }
            final DimensionSpec dimensionSpec = rex.toDimensionSpec(sourceRowSignature, dimOutputName(dimOutputNameCounter++), outputType);
            if (dimensionSpec == null) {
                return null;
            }
            dimensions.add(dimensionSpec);
            rowOrder.add(dimensionSpec.getOutputName());
        }
    }
    // Translate aggregates.
    for (int i = 0; i < aggregate.getAggCallList().size(); i++) {
        final AggregateCall aggCall = aggregate.getAggCallList().get(i);
        final Aggregation aggregation = translateAggregateCall(druidRel.getPlannerContext(), sourceRowSignature, project, aggCall, operatorTable, aggregations, i, approximateCountDistinct);
        if (aggregation == null) {
            return null;
        }
        aggregations.add(aggregation);
        rowOrder.add(aggregation.getOutputName());
    }
    if (isNestedQuery) {
        // Nested groupBy.
        return DruidNestedGroupBy.from(druidRel, filter, Grouping.create(dimensions, aggregations), aggregate.getRowType(), rowOrder);
    } else {
        // groupBy on a base dataSource.
        return druidRel.withQueryBuilder(druidRel.getQueryBuilder().withFilter(filter).withGrouping(Grouping.create(dimensions, aggregations), aggregate.getRowType(), rowOrder));
    }
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) RexLiteral(org.apache.calcite.rex.RexLiteral) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) ValueType(io.druid.segment.column.ValueType) RowExtraction(io.druid.sql.calcite.expression.RowExtraction) Aggregation(io.druid.sql.calcite.aggregation.Aggregation) AggregateCall(org.apache.calcite.rel.core.AggregateCall) Project(org.apache.calcite.rel.core.Project) ISE(io.druid.java.util.common.ISE) RowSignature(io.druid.sql.calcite.table.RowSignature) DimFilter(io.druid.query.filter.DimFilter) NotDimFilter(io.druid.query.filter.NotDimFilter) AndDimFilter(io.druid.query.filter.AndDimFilter) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

Aggregation (io.druid.sql.calcite.aggregation.Aggregation)4 RexNode (org.apache.calcite.rex.RexNode)4 ISE (io.druid.java.util.common.ISE)2 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)2 PostAggregator (io.druid.query.aggregation.PostAggregator)2 ArithmeticPostAggregator (io.druid.query.aggregation.post.ArithmeticPostAggregator)2 FieldAccessPostAggregator (io.druid.query.aggregation.post.FieldAccessPostAggregator)2 AndDimFilter (io.druid.query.filter.AndDimFilter)2 DimFilter (io.druid.query.filter.DimFilter)2 NotDimFilter (io.druid.query.filter.NotDimFilter)2 PostAggregatorFactory (io.druid.sql.calcite.aggregation.PostAggregatorFactory)2 RowExtraction (io.druid.sql.calcite.expression.RowExtraction)2 RexLiteral (org.apache.calcite.rex.RexLiteral)2 SqlTypeName (org.apache.calcite.sql.type.SqlTypeName)2 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)1 DoubleMaxAggregatorFactory (io.druid.query.aggregation.DoubleMaxAggregatorFactory)1 DoubleMinAggregatorFactory (io.druid.query.aggregation.DoubleMinAggregatorFactory)1 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)1 LongMaxAggregatorFactory (io.druid.query.aggregation.LongMaxAggregatorFactory)1 LongMinAggregatorFactory (io.druid.query.aggregation.LongMinAggregatorFactory)1