Search in sources :

Example 36 with AggregateCall

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.AggregateCall in project drill by apache.

the class ConvertCountToDirectScanRule method collectCounts.

/**
 * Collects counts for each aggregation call by using the metadata summary information
 * Will return empty result map if was not able to determine count for at least one aggregation call.
 *
 * For each aggregate call will determine if count can be calculated. Collects counts only for COUNT function.
 *   1. First, we get the total row count from the metadata summary.
 *   2. For COUNT(*) and COUNT(<non null column>) and COUNT(<implicit column>), the count = total row count
 *   3. For COUNT(nullable column), count = (total row count - column's null count)
 *   4. Also count can not be calculated for parition columns.
 *   5. For the columns that are not present in the Summary(Non-existent columns), the count = 0
 *
 * @param settings planner options
 * @param metadataSummary metadata summary containing row counts and column counts
 * @param agg aggregate relational expression
 * @param scan scan relational expression
 * @param project project relational expression
 * @return result map where key is count column name, value is count value
 */
private Map<String, Long> collectCounts(PlannerSettings settings, Metadata_V4.MetadataSummary metadataSummary, Aggregate agg, TableScan scan, Project project) {
    final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet();
    final long totalRecordCount = metadataSummary.getTotalRowCount();
    final LinkedHashMap<String, Long> result = new LinkedHashMap<>();
    for (int i = 0; i < agg.getAggCallList().size(); i++) {
        AggregateCall aggCall = agg.getAggCallList().get(i);
        long cnt;
        // rule can be applied only for count function, return empty counts
        if (!"count".equalsIgnoreCase(aggCall.getAggregation().getName())) {
            return ImmutableMap.of();
        }
        if (CountToDirectScanUtils.containsStarOrNotNullInput(aggCall, agg)) {
            cnt = totalRecordCount;
        } else if (aggCall.getArgList().size() == 1) {
            // count(columnName) ==> Agg ( Scan )) ==> columnValueCount
            int index = aggCall.getArgList().get(0);
            if (project != null) {
                // return count of "col2" in Scan's metadata, if found.
                if (!(project.getProjects().get(index) instanceof RexInputRef)) {
                    // do not apply for all other cases.
                    return ImmutableMap.of();
                }
                index = ((RexInputRef) project.getProjects().get(index)).getIndex();
            }
            String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
            // for implicit column count will be the same as total record count
            if (implicitColumnsNames.contains(columnName)) {
                cnt = totalRecordCount;
            } else {
                SchemaPath simplePath = SchemaPath.getSimplePath(columnName);
                if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) {
                    return ImmutableMap.of();
                }
                Metadata_V4.ColumnTypeMetadata_v4 columnMetadata = metadataSummary.getColumnTypeInfo(new Metadata_V4.ColumnTypeMetadata_v4.Key(simplePath));
                if (columnMetadata == null) {
                    // If the column doesn't exist in the table, row count is set to 0
                    cnt = 0;
                } else if (columnMetadata.totalNullCount == Statistic.NO_COLUMN_STATS) {
                    // if column stats is not available don't apply this rule, return empty counts
                    return ImmutableMap.of();
                } else {
                    // count of a nullable column = (total row count - column's null count)
                    cnt = totalRecordCount - columnMetadata.totalNullCount;
                }
            }
        } else {
            return ImmutableMap.of();
        }
        String name = "count" + i + "$" + (aggCall.getName() == null ? aggCall.toString() : aggCall.getName());
        result.put(name, cnt);
    }
    return ImmutableMap.copyOf(result);
}
Also used : LinkedHashMap(java.util.LinkedHashMap) AggregateCall(org.apache.calcite.rel.core.AggregateCall) Metadata_V4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4) SchemaPath(org.apache.drill.common.expression.SchemaPath) RexInputRef(org.apache.calcite.rex.RexInputRef)

Example 37 with AggregateCall

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.AggregateCall in project drill by apache.

the class AggPrelBase method createKeysAndExprs.

protected void createKeysAndExprs() {
    final List<String> childFields = getInput().getRowType().getFieldNames();
    final List<String> fields = getRowType().getFieldNames();
    for (int group : BitSets.toIter(groupSet)) {
        FieldReference fr = FieldReference.getWithQuotedRef(childFields.get(group));
        keys.add(new NamedExpression(fr, fr));
    }
    for (Ord<AggregateCall> aggCall : Ord.zip(aggCalls)) {
        int aggExprOrdinal = groupSet.cardinality() + aggCall.i;
        FieldReference ref = FieldReference.getWithQuotedRef(fields.get(aggExprOrdinal));
        LogicalExpression expr = toDrill(aggCall.e, childFields);
        NamedExpression ne = new NamedExpression(expr, ref);
        aggExprs.add(ne);
        if (getOperatorPhase() == OperatorPhase.PHASE_1of2) {
            if (aggCall.e.getAggregation().getName().equals("COUNT")) {
                // If we are doing a COUNT aggregate in Phase1of2, then in Phase2of2 we should SUM the COUNTs,
                SqlAggFunction sumAggFun = new SqlSumCountAggFunction(aggCall.e.getType());
                AggregateCall newAggCall = AggregateCall.create(sumAggFun, aggCall.e.isDistinct(), aggCall.e.isApproximate(), Collections.singletonList(aggExprOrdinal), aggCall.e.filterArg, aggCall.e.getType(), aggCall.e.getName());
                phase2AggCallList.add(newAggCall);
            } else {
                AggregateCall newAggCall = AggregateCall.create(aggCall.e.getAggregation(), aggCall.e.isDistinct(), aggCall.e.isApproximate(), Collections.singletonList(aggExprOrdinal), aggCall.e.filterArg, aggCall.e.getType(), aggCall.e.getName());
                phase2AggCallList.add(newAggCall);
            }
        }
    }
}
Also used : AggregateCall(org.apache.calcite.rel.core.AggregateCall) LogicalExpression(org.apache.drill.common.expression.LogicalExpression) FieldReference(org.apache.drill.common.expression.FieldReference) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) SqlAggFunction(org.apache.calcite.sql.SqlAggFunction)

Example 38 with AggregateCall

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.AggregateCall in project drill by apache.

the class AggPrelBase method prepareForLateralUnnestPipeline.

@Override
public Prel prepareForLateralUnnestPipeline(List<RelNode> children) {
    List<Integer> groupingCols = Lists.newArrayList();
    groupingCols.add(0);
    for (int groupingCol : groupSet.asList()) {
        groupingCols.add(groupingCol + 1);
    }
    ImmutableBitSet groupingSet = ImmutableBitSet.of(groupingCols);
    List<ImmutableBitSet> groupingSets = Lists.newArrayList();
    groupingSets.add(groupingSet);
    List<AggregateCall> aggregateCalls = Lists.newArrayList();
    for (AggregateCall aggCall : aggCalls) {
        List<Integer> arglist = Lists.newArrayList();
        for (int arg : aggCall.getArgList()) {
            arglist.add(arg + 1);
        }
        aggregateCalls.add(AggregateCall.create(aggCall.getAggregation(), aggCall.isDistinct(), aggCall.isApproximate(), arglist, aggCall.filterArg, aggCall.type, aggCall.name));
    }
    return (Prel) copy(traitSet, children.get(0), indicator, groupingSet, groupingSets, aggregateCalls);
}
Also used : AggregateCall(org.apache.calcite.rel.core.AggregateCall) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet)

Example 39 with AggregateCall

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.AggregateCall in project drill by apache.

the class AggPruleBase method create2PhasePlan.

// Create 2 phase aggr plan for aggregates such as SUM, MIN, MAX
// If any of the aggregate functions are not one of these, then we
// currently won't generate a 2 phase plan.
protected boolean create2PhasePlan(RelOptRuleCall call, DrillAggregateRel aggregate) {
    PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
    RelNode child = call.rel(0).getInputs().get(0);
    boolean smallInput = child.estimateRowCount(child.getCluster().getMetadataQuery()) < settings.getSliceTarget();
    if (!settings.isMultiPhaseAggEnabled() || settings.isSingleMode() || // Can override a small child - e.g., for testing with a small table
    (smallInput && !settings.isForce2phaseAggr())) {
        return false;
    }
    for (AggregateCall aggCall : aggregate.getAggCallList()) {
        String name = aggCall.getAggregation().getName();
        if (!(name.equals(SqlKind.SUM.name()) || name.equals(SqlKind.MIN.name()) || name.equals(SqlKind.MAX.name()) || name.equals(SqlKind.COUNT.name()) || name.equals("$SUM0"))) {
            return false;
        }
    }
    return true;
}
Also used : AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelNode(org.apache.calcite.rel.RelNode)

Example 40 with AggregateCall

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.AggregateCall in project drill by apache.

the class ConvertCountToDirectScanPrule method collectCounts.

/**
 * Collects counts for each aggregation call.
 * Will return empty result map if was not able to determine count for at least one aggregation call,
 *
 * For each aggregate call will determine if count can be calculated. Collects counts only for COUNT function.
 * For star, not null expressions and implicit columns sets count to total record number.
 * For other cases obtains counts from group scan operator. Also count can not be calculated for partition columns.
 *
 * @param agg aggregate relational expression
 * @param scan scan relational expression
 * @param project project relational expression
 * @return result map where key is count column name, value is count value
 */
private Map<String, Long> collectCounts(PlannerSettings settings, DrillAggregateRel agg, DrillScanRel scan, DrillProjectRel project) {
    final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet();
    final GroupScan oldGrpScan = scan.getGroupScan();
    final long totalRecordCount = (long) oldGrpScan.getScanStats(settings).getRecordCount();
    final LinkedHashMap<String, Long> result = new LinkedHashMap<>();
    for (int i = 0; i < agg.getAggCallList().size(); i++) {
        AggregateCall aggCall = agg.getAggCallList().get(i);
        long cnt;
        // rule can be applied only for count function, return empty counts
        if (!"count".equalsIgnoreCase(aggCall.getAggregation().getName())) {
            return ImmutableMap.of();
        }
        if (CountToDirectScanUtils.containsStarOrNotNullInput(aggCall, agg)) {
            cnt = totalRecordCount;
        } else if (aggCall.getArgList().size() == 1) {
            // count(columnName) ==> Agg ( Scan )) ==> columnValueCount
            int index = aggCall.getArgList().get(0);
            if (project != null) {
                // return count of "col2" in Scan's metadata, if found.
                if (!(project.getProjects().get(index) instanceof RexInputRef)) {
                    // do not apply for all other cases.
                    return ImmutableMap.of();
                }
                index = ((RexInputRef) project.getProjects().get(index)).getIndex();
            }
            String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
            // for implicit column count will the same as total record count
            if (implicitColumnsNames.contains(columnName)) {
                cnt = totalRecordCount;
            } else {
                SchemaPath simplePath = SchemaPath.getSimplePath(columnName);
                if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) {
                    return ImmutableMap.of();
                }
                cnt = oldGrpScan.getColumnValueCount(simplePath);
                if (cnt == Statistic.NO_COLUMN_STATS) {
                    // if column stats is not available don't apply this rule, return empty counts
                    return ImmutableMap.of();
                }
            }
        } else {
            return ImmutableMap.of();
        }
        String name = "count" + i + "$" + (aggCall.getName() == null ? aggCall.toString() : aggCall.getName());
        result.put(name, cnt);
    }
    return ImmutableMap.copyOf(result);
}
Also used : MetadataDirectGroupScan(org.apache.drill.exec.store.direct.MetadataDirectGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) AggregateCall(org.apache.calcite.rel.core.AggregateCall) SchemaPath(org.apache.drill.common.expression.SchemaPath) RexInputRef(org.apache.calcite.rex.RexInputRef) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

AggregateCall (org.apache.calcite.rel.core.AggregateCall)158 ArrayList (java.util.ArrayList)82 RexNode (org.apache.calcite.rex.RexNode)78 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)57 RelNode (org.apache.calcite.rel.RelNode)54 RexBuilder (org.apache.calcite.rex.RexBuilder)52 RelDataType (org.apache.calcite.rel.type.RelDataType)42 Aggregate (org.apache.calcite.rel.core.Aggregate)37 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)36 RexInputRef (org.apache.calcite.rex.RexInputRef)33 RelBuilder (org.apache.calcite.tools.RelBuilder)29 HashMap (java.util.HashMap)28 SqlAggFunction (org.apache.calcite.sql.SqlAggFunction)28 List (java.util.List)27 RexLiteral (org.apache.calcite.rex.RexLiteral)23 Pair (org.apache.calcite.util.Pair)20 ImmutableList (com.google.common.collect.ImmutableList)19 Project (org.apache.calcite.rel.core.Project)17 RelDataTypeFactory (org.apache.calcite.rel.type.RelDataTypeFactory)17 LogicalAggregate (org.apache.calcite.rel.logical.LogicalAggregate)16