Search in sources :

Example 76 with Aggregate

use of org.apache.calcite.rel.core.Aggregate in project drill by apache.

the class ConvertCountToDirectScanRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    final Aggregate agg = call.rel(0);
    final TableScan scan = call.rel(call.rels.length - 1);
    final Project project = call.rels.length == 3 ? (Project) call.rel(1) : null;
    // 3) Additional checks are done further below ..
    if (agg.getGroupCount() > 0 || agg.containsDistinctCall()) {
        return;
    }
    DrillTable drillTable = DrillRelOptUtil.getDrillTable(scan);
    if (drillTable == null) {
        logger.debug("Rule does not apply since an eligible drill table instance was not found.");
        return;
    }
    Object selection = drillTable.getSelection();
    if (!(selection instanceof FormatSelection)) {
        logger.debug("Rule does not apply since only Parquet file format is eligible.");
        return;
    }
    PlannerSettings settings = call.getPlanner().getContext().unwrap(PlannerSettings.class);
    // Rule is applicable only if the statistics for row count and null count are available from the metadata,
    FormatSelection formatSelection = (FormatSelection) selection;
    // Rule cannot be applied if the selection had wildcard since the totalrowcount cannot be read from the parent directory
    if (formatSelection.getSelection().hadWildcard()) {
        logger.debug("Rule does not apply when there is a wild card since the COUNT could not be determined from metadata.");
        return;
    }
    Pair<Boolean, Metadata_V4.MetadataSummary> status = checkMetadataForScanStats(settings, drillTable, formatSelection);
    if (!status.getLeft()) {
        logger.debug("Rule does not apply since MetadataSummary metadata was not found.");
        return;
    }
    Metadata_V4.MetadataSummary metadataSummary = status.getRight();
    Map<String, Long> result = collectCounts(settings, metadataSummary, agg, scan, project);
    logger.trace("Calculated the following aggregate counts: {}", result);
    // if counts could not be determined, rule won't be applied
    if (result.isEmpty()) {
        logger.debug("Rule does not apply since one or more COUNTs could not be determined from metadata.");
        return;
    }
    Path summaryFileName = Metadata.getSummaryFileName(formatSelection.getSelection().getSelectionRoot());
    final RelDataType scanRowType = CountToDirectScanUtils.constructDataType(agg, result.keySet());
    final DynamicPojoRecordReader<Long> reader = new DynamicPojoRecordReader<>(CountToDirectScanUtils.buildSchema(scanRowType.getFieldNames()), Collections.singletonList(new ArrayList<>(result.values())));
    final ScanStats scanStats = new ScanStats(ScanStats.GroupScanProperty.EXACT_ROW_COUNT, 1, 1, scanRowType.getFieldCount());
    final MetadataDirectGroupScan directScan = new MetadataDirectGroupScan(reader, summaryFileName, 1, scanStats, true, false);
    final DrillDirectScanRel newScan = new DrillDirectScanRel(scan.getCluster(), scan.getTraitSet().plus(DrillRel.DRILL_LOGICAL), directScan, scanRowType);
    final DrillProjectRel newProject = new DrillProjectRel(agg.getCluster(), agg.getTraitSet().plus(DrillRel.DRILL_LOGICAL), newScan, CountToDirectScanUtils.prepareFieldExpressions(scanRowType), agg.getRowType());
    call.transformTo(newProject);
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) TableScan(org.apache.calcite.rel.core.TableScan) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) DynamicPojoRecordReader(org.apache.drill.exec.store.pojo.DynamicPojoRecordReader) ArrayList(java.util.ArrayList) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) RelDataType(org.apache.calcite.rel.type.RelDataType) Project(org.apache.calcite.rel.core.Project) MetadataDirectGroupScan(org.apache.drill.exec.store.direct.MetadataDirectGroupScan) Metadata_V4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4) Aggregate(org.apache.calcite.rel.core.Aggregate) ScanStats(org.apache.drill.exec.physical.base.ScanStats)

Example 77 with Aggregate

use of org.apache.calcite.rel.core.Aggregate in project drill by apache.

the class DrillReduceAggregatesRule method reduceAggs.

/*
  private boolean isMatch(AggregateCall call) {
    if (call.getAggregation() instanceof SqlAvgAggFunction) {
      final SqlAvgAggFunction.Subtype subtype =
          ((SqlAvgAggFunction) call.getAggregation()).getSubtype();
      return (subtype == SqlAvgAggFunction.Subtype.AVG);
    }
    return false;
  }
 */
/**
 * Reduces all calls to AVG, STDDEV_POP, STDDEV_SAMP, VAR_POP, VAR_SAMP in
 * the aggregates list to.
 *
 * <p>It handles newly generated common subexpressions since this was done
 * at the sql2rel stage.
 */
private void reduceAggs(RelOptRuleCall ruleCall, Aggregate oldAggRel) {
    RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
    List<AggregateCall> oldCalls = oldAggRel.getAggCallList();
    final int nGroups = oldAggRel.getGroupCount();
    List<AggregateCall> newCalls = new ArrayList<>();
    Map<AggregateCall, RexNode> aggCallMapping = new HashMap<>();
    List<RexNode> projList = new ArrayList<>();
    // pass through group key
    for (int i = 0; i < nGroups; ++i) {
        projList.add(rexBuilder.makeInputRef(getFieldType(oldAggRel, i), i));
    }
    // List of input expressions. If a particular aggregate needs more, it
    // will add an expression to the end, and we will create an extra
    // project.
    RelNode input = oldAggRel.getInput();
    List<RexNode> inputExprs = new ArrayList<>();
    for (RelDataTypeField field : input.getRowType().getFieldList()) {
        inputExprs.add(rexBuilder.makeInputRef(field.getType(), inputExprs.size()));
    }
    // create new agg function calls and rest of project list together
    for (AggregateCall oldCall : oldCalls) {
        projList.add(reduceAgg(oldAggRel, oldCall, newCalls, aggCallMapping, inputExprs));
    }
    final int extraArgCount = inputExprs.size() - input.getRowType().getFieldCount();
    if (extraArgCount > 0) {
        input = relBuilderFactory.create(input.getCluster(), null).push(input).projectNamed(inputExprs, CompositeList.of(input.getRowType().getFieldNames(), Collections.nCopies(extraArgCount, null)), true).build();
    }
    Aggregate newAggRel = newAggregateRel(oldAggRel, input, newCalls);
    RelNode projectRel = relBuilderFactory.create(newAggRel.getCluster(), null).push(newAggRel).projectNamed(projList, oldAggRel.getRowType().getFieldNames(), true).build();
    ruleCall.transformTo(projectRel);
}
Also used : AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) RexBuilder(org.apache.calcite.rex.RexBuilder) Aggregate(org.apache.calcite.rel.core.Aggregate) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

Aggregate (org.apache.calcite.rel.core.Aggregate)75 RelNode (org.apache.calcite.rel.RelNode)44 ArrayList (java.util.ArrayList)39 AggregateCall (org.apache.calcite.rel.core.AggregateCall)37 RexNode (org.apache.calcite.rex.RexNode)32 RelBuilder (org.apache.calcite.tools.RelBuilder)27 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)27 LogicalAggregate (org.apache.calcite.rel.logical.LogicalAggregate)23 RexBuilder (org.apache.calcite.rex.RexBuilder)22 Project (org.apache.calcite.rel.core.Project)21 HashMap (java.util.HashMap)17 RexInputRef (org.apache.calcite.rex.RexInputRef)15 Join (org.apache.calcite.rel.core.Join)14 RelMetadataQuery (org.apache.calcite.rel.metadata.RelMetadataQuery)14 RelDataType (org.apache.calcite.rel.type.RelDataType)14 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)14 Filter (org.apache.calcite.rel.core.Filter)13 HiveAggregate (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate)13 List (java.util.List)12 ImmutableList (com.google.common.collect.ImmutableList)11