Search in sources :

Example 36 with AggregateCall

use of in project drill by apache.

the class ConvertCountToDirectScanRule method collectCounts.

 * Collects counts for each aggregation call by using the metadata summary information
 * Will return empty result map if was not able to determine count for at least one aggregation call.
 * For each aggregate call will determine if count can be calculated. Collects counts only for COUNT function.
 *   1. First, we get the total row count from the metadata summary.
 *   2. For COUNT(*) and COUNT(<non null column>) and COUNT(<implicit column>), the count = total row count
 *   3. For COUNT(nullable column), count = (total row count - column's null count)
 *   4. Also count can not be calculated for parition columns.
 *   5. For the columns that are not present in the Summary(Non-existent columns), the count = 0
 * @param settings planner options
 * @param metadataSummary metadata summary containing row counts and column counts
 * @param agg aggregate relational expression
 * @param scan scan relational expression
 * @param project project relational expression
 * @return result map where key is count column name, value is count value
private Map<String, Long> collectCounts(PlannerSettings settings, Metadata_V4.MetadataSummary metadataSummary, Aggregate agg, TableScan scan, Project project) {
    final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet();
    final long totalRecordCount = metadataSummary.getTotalRowCount();
    final LinkedHashMap<String, Long> result = new LinkedHashMap<>();
    for (int i = 0; i < agg.getAggCallList().size(); i++) {
        AggregateCall aggCall = agg.getAggCallList().get(i);
        long cnt;
        // rule can be applied only for count function, return empty counts
        if (!"count".equalsIgnoreCase(aggCall.getAggregation().getName())) {
            return ImmutableMap.of();
        if (CountToDirectScanUtils.containsStarOrNotNullInput(aggCall, agg)) {
            cnt = totalRecordCount;
        } else if (aggCall.getArgList().size() == 1) {
            // count(columnName) ==> Agg ( Scan )) ==> columnValueCount
            int index = aggCall.getArgList().get(0);
            if (project != null) {
                // return count of "col2" in Scan's metadata, if found.
                if (!(project.getProjects().get(index) instanceof RexInputRef)) {
                    // do not apply for all other cases.
                    return ImmutableMap.of();
                index = ((RexInputRef) project.getProjects().get(index)).getIndex();
            String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
            // for implicit column count will be the same as total record count
            if (implicitColumnsNames.contains(columnName)) {
                cnt = totalRecordCount;
            } else {
                SchemaPath simplePath = SchemaPath.getSimplePath(columnName);
                if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) {
                    return ImmutableMap.of();
                Metadata_V4.ColumnTypeMetadata_v4 columnMetadata = metadataSummary.getColumnTypeInfo(new Metadata_V4.ColumnTypeMetadata_v4.Key(simplePath));
                if (columnMetadata == null) {
                    // If the column doesn't exist in the table, row count is set to 0
                    cnt = 0;
                } else if (columnMetadata.totalNullCount == Statistic.NO_COLUMN_STATS) {
                    // if column stats is not available don't apply this rule, return empty counts
                    return ImmutableMap.of();
                } else {
                    // count of a nullable column = (total row count - column's null count)
                    cnt = totalRecordCount - columnMetadata.totalNullCount;
        } else {
            return ImmutableMap.of();
        String name = "count" + i + "$" + (aggCall.getName() == null ? aggCall.toString() : aggCall.getName());
        result.put(name, cnt);
    return ImmutableMap.copyOf(result);
Also used : LinkedHashMap(java.util.LinkedHashMap) AggregateCall(org.apache.calcite.rel.core.AggregateCall) Metadata_V4( SchemaPath(org.apache.drill.common.expression.SchemaPath) RexInputRef(org.apache.calcite.rex.RexInputRef)

Example 37 with AggregateCall

use of in project drill by apache.

the class AggPrelBase method createKeysAndExprs.

protected void createKeysAndExprs() {
    final List<String> childFields = getInput().getRowType().getFieldNames();
    final List<String> fields = getRowType().getFieldNames();
    for (int group : BitSets.toIter(groupSet)) {
        FieldReference fr = FieldReference.getWithQuotedRef(childFields.get(group));
        keys.add(new NamedExpression(fr, fr));
    for (Ord<AggregateCall> aggCall : {
        int aggExprOrdinal = groupSet.cardinality() + aggCall.i;
        FieldReference ref = FieldReference.getWithQuotedRef(fields.get(aggExprOrdinal));
        LogicalExpression expr = toDrill(aggCall.e, childFields);
        NamedExpression ne = new NamedExpression(expr, ref);
        if (getOperatorPhase() == OperatorPhase.PHASE_1of2) {
            if (aggCall.e.getAggregation().getName().equals("COUNT")) {
                // If we are doing a COUNT aggregate in Phase1of2, then in Phase2of2 we should SUM the COUNTs,
                SqlAggFunction sumAggFun = new SqlSumCountAggFunction(aggCall.e.getType());
                AggregateCall newAggCall = AggregateCall.create(sumAggFun, aggCall.e.isDistinct(), aggCall.e.isApproximate(), Collections.singletonList(aggExprOrdinal), aggCall.e.filterArg, aggCall.e.getType(), aggCall.e.getName());
            } else {
                AggregateCall newAggCall = AggregateCall.create(aggCall.e.getAggregation(), aggCall.e.isDistinct(), aggCall.e.isApproximate(), Collections.singletonList(aggExprOrdinal), aggCall.e.filterArg, aggCall.e.getType(), aggCall.e.getName());
Also used : AggregateCall(org.apache.calcite.rel.core.AggregateCall) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) FieldReference(org.apache.drill.common.expression.FieldReference) NamedExpression( SqlAggFunction(org.apache.calcite.sql.SqlAggFunction)

Example 38 with AggregateCall

use of in project drill by apache.

the class AggPrelBase method prepareForLateralUnnestPipeline.

public Prel prepareForLateralUnnestPipeline(List<RelNode> children) {
    List<Integer> groupingCols = Lists.newArrayList();
    for (int groupingCol : groupSet.asList()) {
        groupingCols.add(groupingCol + 1);
    ImmutableBitSet groupingSet = ImmutableBitSet.of(groupingCols);
    List<ImmutableBitSet> groupingSets = Lists.newArrayList();
    List<AggregateCall> aggregateCalls = Lists.newArrayList();
    for (AggregateCall aggCall : aggCalls) {
        List<Integer> arglist = Lists.newArrayList();
        for (int arg : aggCall.getArgList()) {
            arglist.add(arg + 1);
        aggregateCalls.add(AggregateCall.create(aggCall.getAggregation(), aggCall.isDistinct(), aggCall.isApproximate(), arglist, aggCall.filterArg, aggCall.type,;
    return (Prel) copy(traitSet, children.get(0), indicator, groupingSet, groupingSets, aggregateCalls);
Also used : AggregateCall(org.apache.calcite.rel.core.AggregateCall) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet)

Example 39 with AggregateCall

use of in project drill by apache.

the class AggPruleBase method create2PhasePlan.

// Create 2 phase aggr plan for aggregates such as SUM, MIN, MAX
// If any of the aggregate functions are not one of these, then we
// currently won't generate a 2 phase plan.
protected boolean create2PhasePlan(RelOptRuleCall call, DrillAggregateRel aggregate) {
    PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
    RelNode child = call.rel(0).getInputs().get(0);
    boolean smallInput = child.estimateRowCount(child.getCluster().getMetadataQuery()) < settings.getSliceTarget();
    if (!settings.isMultiPhaseAggEnabled() || settings.isSingleMode() || // Can override a small child - e.g., for testing with a small table
    (smallInput && !settings.isForce2phaseAggr())) {
        return false;
    for (AggregateCall aggCall : aggregate.getAggCallList()) {
        String name = aggCall.getAggregation().getName();
        if (!(name.equals( || name.equals( || name.equals( || name.equals( || name.equals("$SUM0"))) {
            return false;
    return true;
Also used : AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelNode(org.apache.calcite.rel.RelNode)

Example 40 with AggregateCall

use of in project drill by apache.

the class ConvertCountToDirectScanPrule method collectCounts.

 * Collects counts for each aggregation call.
 * Will return empty result map if was not able to determine count for at least one aggregation call,
 * For each aggregate call will determine if count can be calculated. Collects counts only for COUNT function.
 * For star, not null expressions and implicit columns sets count to total record number.
 * For other cases obtains counts from group scan operator. Also count can not be calculated for partition columns.
 * @param agg aggregate relational expression
 * @param scan scan relational expression
 * @param project project relational expression
 * @return result map where key is count column name, value is count value
private Map<String, Long> collectCounts(PlannerSettings settings, DrillAggregateRel agg, DrillScanRel scan, DrillProjectRel project) {
    final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet();
    final GroupScan oldGrpScan = scan.getGroupScan();
    final long totalRecordCount = (long) oldGrpScan.getScanStats(settings).getRecordCount();
    final LinkedHashMap<String, Long> result = new LinkedHashMap<>();
    for (int i = 0; i < agg.getAggCallList().size(); i++) {
        AggregateCall aggCall = agg.getAggCallList().get(i);
        long cnt;
        // rule can be applied only for count function, return empty counts
        if (!"count".equalsIgnoreCase(aggCall.getAggregation().getName())) {
            return ImmutableMap.of();
        if (CountToDirectScanUtils.containsStarOrNotNullInput(aggCall, agg)) {
            cnt = totalRecordCount;
        } else if (aggCall.getArgList().size() == 1) {
            // count(columnName) ==> Agg ( Scan )) ==> columnValueCount
            int index = aggCall.getArgList().get(0);
            if (project != null) {
                // return count of "col2" in Scan's metadata, if found.
                if (!(project.getProjects().get(index) instanceof RexInputRef)) {
                    // do not apply for all other cases.
                    return ImmutableMap.of();
                index = ((RexInputRef) project.getProjects().get(index)).getIndex();
            String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
            // for implicit column count will the same as total record count
            if (implicitColumnsNames.contains(columnName)) {
                cnt = totalRecordCount;
            } else {
                SchemaPath simplePath = SchemaPath.getSimplePath(columnName);
                if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) {
                    return ImmutableMap.of();
                cnt = oldGrpScan.getColumnValueCount(simplePath);
                if (cnt == Statistic.NO_COLUMN_STATS) {
                    // if column stats is not available don't apply this rule, return empty counts
                    return ImmutableMap.of();
        } else {
            return ImmutableMap.of();
        String name = "count" + i + "$" + (aggCall.getName() == null ? aggCall.toString() : aggCall.getName());
        result.put(name, cnt);
    return ImmutableMap.copyOf(result);
Also used : MetadataDirectGroupScan( GroupScan(org.apache.drill.exec.physical.base.GroupScan) AggregateCall(org.apache.calcite.rel.core.AggregateCall) SchemaPath(org.apache.drill.common.expression.SchemaPath) RexInputRef(org.apache.calcite.rex.RexInputRef) LinkedHashMap(java.util.LinkedHashMap)


AggregateCall (org.apache.calcite.rel.core.AggregateCall)158 ArrayList (java.util.ArrayList)82 RexNode (org.apache.calcite.rex.RexNode)78 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)57 RelNode (org.apache.calcite.rel.RelNode)54 RexBuilder (org.apache.calcite.rex.RexBuilder)52 RelDataType (org.apache.calcite.rel.type.RelDataType)42 Aggregate (org.apache.calcite.rel.core.Aggregate)37 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)36 RexInputRef (org.apache.calcite.rex.RexInputRef)33 RelBuilder ( HashMap (java.util.HashMap)28 SqlAggFunction (org.apache.calcite.sql.SqlAggFunction)28 List (java.util.List)27 RexLiteral (org.apache.calcite.rex.RexLiteral)23 Pair (org.apache.calcite.util.Pair)20 ImmutableList ( Project (org.apache.calcite.rel.core.Project)17 RelDataTypeFactory (org.apache.calcite.rel.type.RelDataTypeFactory)17 LogicalAggregate (org.apache.calcite.rel.logical.LogicalAggregate)16