use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.AggregateCall in project drill by apache.
the class ConvertCountToDirectScanRule method collectCounts.
/**
* Collects counts for each aggregation call by using the metadata summary information
* Will return empty result map if was not able to determine count for at least one aggregation call.
*
* For each aggregate call will determine if count can be calculated. Collects counts only for COUNT function.
* 1. First, we get the total row count from the metadata summary.
* 2. For COUNT(*) and COUNT(<non null column>) and COUNT(<implicit column>), the count = total row count
* 3. For COUNT(nullable column), count = (total row count - column's null count)
* 4. Also count can not be calculated for parition columns.
* 5. For the columns that are not present in the Summary(Non-existent columns), the count = 0
*
* @param settings planner options
* @param metadataSummary metadata summary containing row counts and column counts
* @param agg aggregate relational expression
* @param scan scan relational expression
* @param project project relational expression
* @return result map where key is count column name, value is count value
*/
private Map<String, Long> collectCounts(PlannerSettings settings, Metadata_V4.MetadataSummary metadataSummary, Aggregate agg, TableScan scan, Project project) {
final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet();
final long totalRecordCount = metadataSummary.getTotalRowCount();
final LinkedHashMap<String, Long> result = new LinkedHashMap<>();
for (int i = 0; i < agg.getAggCallList().size(); i++) {
AggregateCall aggCall = agg.getAggCallList().get(i);
long cnt;
// rule can be applied only for count function, return empty counts
if (!"count".equalsIgnoreCase(aggCall.getAggregation().getName())) {
return ImmutableMap.of();
}
if (CountToDirectScanUtils.containsStarOrNotNullInput(aggCall, agg)) {
cnt = totalRecordCount;
} else if (aggCall.getArgList().size() == 1) {
// count(columnName) ==> Agg ( Scan )) ==> columnValueCount
int index = aggCall.getArgList().get(0);
if (project != null) {
// return count of "col2" in Scan's metadata, if found.
if (!(project.getProjects().get(index) instanceof RexInputRef)) {
// do not apply for all other cases.
return ImmutableMap.of();
}
index = ((RexInputRef) project.getProjects().get(index)).getIndex();
}
String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
// for implicit column count will be the same as total record count
if (implicitColumnsNames.contains(columnName)) {
cnt = totalRecordCount;
} else {
SchemaPath simplePath = SchemaPath.getSimplePath(columnName);
if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) {
return ImmutableMap.of();
}
Metadata_V4.ColumnTypeMetadata_v4 columnMetadata = metadataSummary.getColumnTypeInfo(new Metadata_V4.ColumnTypeMetadata_v4.Key(simplePath));
if (columnMetadata == null) {
// If the column doesn't exist in the table, row count is set to 0
cnt = 0;
} else if (columnMetadata.totalNullCount == Statistic.NO_COLUMN_STATS) {
// if column stats is not available don't apply this rule, return empty counts
return ImmutableMap.of();
} else {
// count of a nullable column = (total row count - column's null count)
cnt = totalRecordCount - columnMetadata.totalNullCount;
}
}
} else {
return ImmutableMap.of();
}
String name = "count" + i + "$" + (aggCall.getName() == null ? aggCall.toString() : aggCall.getName());
result.put(name, cnt);
}
return ImmutableMap.copyOf(result);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.AggregateCall in project drill by apache.
the class AggPrelBase method createKeysAndExprs.
protected void createKeysAndExprs() {
final List<String> childFields = getInput().getRowType().getFieldNames();
final List<String> fields = getRowType().getFieldNames();
for (int group : BitSets.toIter(groupSet)) {
FieldReference fr = FieldReference.getWithQuotedRef(childFields.get(group));
keys.add(new NamedExpression(fr, fr));
}
for (Ord<AggregateCall> aggCall : Ord.zip(aggCalls)) {
int aggExprOrdinal = groupSet.cardinality() + aggCall.i;
FieldReference ref = FieldReference.getWithQuotedRef(fields.get(aggExprOrdinal));
LogicalExpression expr = toDrill(aggCall.e, childFields);
NamedExpression ne = new NamedExpression(expr, ref);
aggExprs.add(ne);
if (getOperatorPhase() == OperatorPhase.PHASE_1of2) {
if (aggCall.e.getAggregation().getName().equals("COUNT")) {
// If we are doing a COUNT aggregate in Phase1of2, then in Phase2of2 we should SUM the COUNTs,
SqlAggFunction sumAggFun = new SqlSumCountAggFunction(aggCall.e.getType());
AggregateCall newAggCall = AggregateCall.create(sumAggFun, aggCall.e.isDistinct(), aggCall.e.isApproximate(), Collections.singletonList(aggExprOrdinal), aggCall.e.filterArg, aggCall.e.getType(), aggCall.e.getName());
phase2AggCallList.add(newAggCall);
} else {
AggregateCall newAggCall = AggregateCall.create(aggCall.e.getAggregation(), aggCall.e.isDistinct(), aggCall.e.isApproximate(), Collections.singletonList(aggExprOrdinal), aggCall.e.filterArg, aggCall.e.getType(), aggCall.e.getName());
phase2AggCallList.add(newAggCall);
}
}
}
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.AggregateCall in project drill by apache.
the class AggPrelBase method prepareForLateralUnnestPipeline.
@Override
public Prel prepareForLateralUnnestPipeline(List<RelNode> children) {
List<Integer> groupingCols = Lists.newArrayList();
groupingCols.add(0);
for (int groupingCol : groupSet.asList()) {
groupingCols.add(groupingCol + 1);
}
ImmutableBitSet groupingSet = ImmutableBitSet.of(groupingCols);
List<ImmutableBitSet> groupingSets = Lists.newArrayList();
groupingSets.add(groupingSet);
List<AggregateCall> aggregateCalls = Lists.newArrayList();
for (AggregateCall aggCall : aggCalls) {
List<Integer> arglist = Lists.newArrayList();
for (int arg : aggCall.getArgList()) {
arglist.add(arg + 1);
}
aggregateCalls.add(AggregateCall.create(aggCall.getAggregation(), aggCall.isDistinct(), aggCall.isApproximate(), arglist, aggCall.filterArg, aggCall.type, aggCall.name));
}
return (Prel) copy(traitSet, children.get(0), indicator, groupingSet, groupingSets, aggregateCalls);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.AggregateCall in project drill by apache.
the class AggPruleBase method create2PhasePlan.
// Create 2 phase aggr plan for aggregates such as SUM, MIN, MAX
// If any of the aggregate functions are not one of these, then we
// currently won't generate a 2 phase plan.
protected boolean create2PhasePlan(RelOptRuleCall call, DrillAggregateRel aggregate) {
PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
RelNode child = call.rel(0).getInputs().get(0);
boolean smallInput = child.estimateRowCount(child.getCluster().getMetadataQuery()) < settings.getSliceTarget();
if (!settings.isMultiPhaseAggEnabled() || settings.isSingleMode() || // Can override a small child - e.g., for testing with a small table
(smallInput && !settings.isForce2phaseAggr())) {
return false;
}
for (AggregateCall aggCall : aggregate.getAggCallList()) {
String name = aggCall.getAggregation().getName();
if (!(name.equals(SqlKind.SUM.name()) || name.equals(SqlKind.MIN.name()) || name.equals(SqlKind.MAX.name()) || name.equals(SqlKind.COUNT.name()) || name.equals("$SUM0"))) {
return false;
}
}
return true;
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.AggregateCall in project drill by apache.
the class ConvertCountToDirectScanPrule method collectCounts.
/**
* Collects counts for each aggregation call.
* Will return empty result map if was not able to determine count for at least one aggregation call,
*
* For each aggregate call will determine if count can be calculated. Collects counts only for COUNT function.
* For star, not null expressions and implicit columns sets count to total record number.
* For other cases obtains counts from group scan operator. Also count can not be calculated for partition columns.
*
* @param agg aggregate relational expression
* @param scan scan relational expression
* @param project project relational expression
* @return result map where key is count column name, value is count value
*/
private Map<String, Long> collectCounts(PlannerSettings settings, DrillAggregateRel agg, DrillScanRel scan, DrillProjectRel project) {
final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet();
final GroupScan oldGrpScan = scan.getGroupScan();
final long totalRecordCount = (long) oldGrpScan.getScanStats(settings).getRecordCount();
final LinkedHashMap<String, Long> result = new LinkedHashMap<>();
for (int i = 0; i < agg.getAggCallList().size(); i++) {
AggregateCall aggCall = agg.getAggCallList().get(i);
long cnt;
// rule can be applied only for count function, return empty counts
if (!"count".equalsIgnoreCase(aggCall.getAggregation().getName())) {
return ImmutableMap.of();
}
if (CountToDirectScanUtils.containsStarOrNotNullInput(aggCall, agg)) {
cnt = totalRecordCount;
} else if (aggCall.getArgList().size() == 1) {
// count(columnName) ==> Agg ( Scan )) ==> columnValueCount
int index = aggCall.getArgList().get(0);
if (project != null) {
// return count of "col2" in Scan's metadata, if found.
if (!(project.getProjects().get(index) instanceof RexInputRef)) {
// do not apply for all other cases.
return ImmutableMap.of();
}
index = ((RexInputRef) project.getProjects().get(index)).getIndex();
}
String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
// for implicit column count will the same as total record count
if (implicitColumnsNames.contains(columnName)) {
cnt = totalRecordCount;
} else {
SchemaPath simplePath = SchemaPath.getSimplePath(columnName);
if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) {
return ImmutableMap.of();
}
cnt = oldGrpScan.getColumnValueCount(simplePath);
if (cnt == Statistic.NO_COLUMN_STATS) {
// if column stats is not available don't apply this rule, return empty counts
return ImmutableMap.of();
}
}
} else {
return ImmutableMap.of();
}
String name = "count" + i + "$" + (aggCall.getName() == null ? aggCall.toString() : aggCall.getName());
result.put(name, cnt);
}
return ImmutableMap.copyOf(result);
}
Aggregations