Search in sources :

Example 71 with RexInputRef

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexInputRef in project drill by apache.

the class DrillProjectPushIntoLateralJoinRule method onMatch.

public void onMatch(RelOptRuleCall call) {
    DrillProjectRel origProj = call.rel(0);
    final DrillLateralJoinRel corr = call.rel(1);
    if (StarColumnHelper.containsStarColumn(origProj.getRowType()) || StarColumnHelper.containsStarColumn(corr.getRowType()) || corr.excludeCorrelateColumn) {
        return;
    }
    DrillRelOptUtil.InputRefVisitor collectRefs = new DrillRelOptUtil.InputRefVisitor();
    for (RexNode exp : origProj.getChildExps()) {
        exp.accept(collectRefs);
    }
    int correlationIndex = corr.getRequiredColumns().nextSetBit(0);
    for (RexInputRef inputRef : collectRefs.getInputRefs()) {
        if (inputRef.getIndex() == correlationIndex) {
            return;
        }
    }
    final RelNode left = corr.getLeft();
    final RelNode right = corr.getRight();
    final RelNode convertedLeft = convert(left, left.getTraitSet().plus(DrillRel.DRILL_LOGICAL).simplify());
    final RelNode convertedRight = convert(right, right.getTraitSet().plus(DrillRel.DRILL_LOGICAL).simplify());
    final RelTraitSet traits = corr.getTraitSet().plus(DrillRel.DRILL_LOGICAL);
    boolean trivial = DrillRelOptUtil.isTrivialProject(origProj, true);
    RelNode relNode = new DrillLateralJoinRel(corr.getCluster(), traits, convertedLeft, convertedRight, true, corr.getCorrelationId(), corr.getRequiredColumns(), corr.getJoinType());
    if (!trivial) {
        Map<Integer, Integer> mapWithoutCorr = buildMapWithoutCorrColumn(corr, correlationIndex);
        List<RexNode> outputExprs = DrillRelOptUtil.transformExprs(origProj.getCluster().getRexBuilder(), origProj.getChildExps(), mapWithoutCorr);
        relNode = new DrillProjectRel(origProj.getCluster(), left.getTraitSet().plus(DrillRel.DRILL_LOGICAL), relNode, outputExprs, origProj.getRowType());
    }
    call.transformTo(relNode);
}
Also used : DrillRelOptUtil(org.apache.drill.exec.planner.common.DrillRelOptUtil) RelTraitSet(org.apache.calcite.plan.RelTraitSet) RelNode(org.apache.calcite.rel.RelNode) RexInputRef(org.apache.calcite.rex.RexInputRef) RexNode(org.apache.calcite.rex.RexNode)

Example 72 with RexInputRef

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexInputRef in project drill by apache.

the class DrillPushRowKeyJoinToScanRule method isRowKeyColumn.

/* Finds whether the given column reference is for the rowkey col(also known as primary-key col).
   * We need to recurse down the operators looking at their references down to the scan
   * to figure out whether the reference is a rowkey col. Projections can rearrange the
   * incoming columns. We also need to handle HepRelVertex/RelSubset while handling the rels.
   */
private static boolean isRowKeyColumn(int index, RelNode rel) {
    RelNode curRel = rel;
    int curIndex = index;
    while (curRel != null && !(curRel instanceof DrillScanRel)) {
        logger.debug("IsRowKeyColumn: Rel={}, RowTypePos={}, RowType={}", curRel.toString(), curIndex, curRel.getRowType().toString());
        if (curRel instanceof HepRelVertex) {
            curRel = ((HepRelVertex) curRel).getCurrentRel();
        } else if (curRel instanceof RelSubset) {
            if (((RelSubset) curRel).getBest() != null) {
                curRel = ((RelSubset) curRel).getBest();
            } else {
                curRel = ((RelSubset) curRel).getOriginal();
            }
        } else {
            RelNode child = null;
            // before recursing down that child rel.
            for (RelNode input : curRel.getInputs()) {
                if (input.getRowType().getFieldList().size() <= curIndex) {
                    curIndex -= input.getRowType().getFieldList().size();
                } else {
                    child = input;
                    break;
                }
            }
            curRel = child;
        }
        // Otherwise, the column index is the `RexInputRef` index.
        if (curRel != null && curRel instanceof DrillProjectRel) {
            List<RexNode> childExprs = curRel.getChildExps();
            if (childExprs != null && childExprs.size() > 0) {
                if (childExprs.get(curIndex) instanceof RexInputRef) {
                    curIndex = ((RexInputRef) childExprs.get(curIndex)).getIndex();
                } else {
                    // Currently do not support expressions on rowkey col. So if an expr is present,
                    // return false
                    logger.debug("IsRowKeyColumn: ABORT: Primary-key EXPR$={}", childExprs.get(curIndex).toString());
                    return false;
                }
            }
        }
    }
    logger.debug("IsRowKeyColumn:Primary-key Col={} ", curRel != null ? curRel.getRowType().getFieldNames().get(curIndex) : "??");
    // Get the primary-key col name from the scan and match with the column being referenced.
    if (curRel != null && curRel instanceof DrillScanRel) {
        if (((DrillScanRel) curRel).getGroupScan() instanceof DbGroupScan) {
            DbGroupScan dbGroupScan = (DbGroupScan) ((DrillScanRel) curRel).getGroupScan();
            String rowKeyName = dbGroupScan.getRowKeyName();
            DbGroupScan restrictedGroupScan = dbGroupScan.getRestrictedScan(((DrillScanRel) curRel).getColumns());
            // Also verify this scan supports restricted groupscans(random seeks)
            if (restrictedGroupScan != null && curRel.getRowType().getFieldNames().get(curIndex).equalsIgnoreCase(rowKeyName)) {
                logger.debug("IsRowKeyColumn: FOUND: Rel={}, RowTypePos={}, RowType={}", curRel.toString(), curIndex, curRel.getRowType().toString());
                return true;
            }
        }
    }
    logger.debug("IsRowKeyColumn: NOT FOUND");
    return false;
}
Also used : HepRelVertex(org.apache.calcite.plan.hep.HepRelVertex) RelNode(org.apache.calcite.rel.RelNode) DbGroupScan(org.apache.drill.exec.physical.base.DbGroupScan) RexInputRef(org.apache.calcite.rex.RexInputRef) RelSubset(org.apache.calcite.plan.volcano.RelSubset) RexNode(org.apache.calcite.rex.RexNode)

Example 73 with RexInputRef

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexInputRef in project drill by apache.

the class ElasticsearchProjectRule method convert.

@Override
public RelNode convert(RelNode relNode) {
    Project project = (Project) relNode;
    NodeTypeFinder projectFinder = new NodeTypeFinder(ElasticsearchProject.class);
    project.getInput().accept(projectFinder);
    if (projectFinder.containsNode) {
        // Calcite adapter allows only a single Elasticsearch project per tree
        return null;
    }
    RelTraitSet traitSet = project.getTraitSet().replace(out);
    List<RexNode> innerProjections = new ArrayList<>();
    RelDataType rowType = project.getInput().getRowType();
    // check for literals only without input exprs
    DrillRelOptUtil.InputRefVisitor collectRefs = new DrillRelOptUtil.InputRefVisitor();
    project.getChildExps().forEach(exp -> exp.accept(collectRefs));
    if (!collectRefs.getInputRefs().isEmpty()) {
        for (RelDataTypeField relDataTypeField : rowType.getFieldList()) {
            innerProjections.add(project.getCluster().getRexBuilder().makeInputRef(project.getInput(), relDataTypeField.getIndex()));
        }
    }
    boolean allExprsInputRefs = project.getChildExps().stream().allMatch(rexNode -> rexNode instanceof RexInputRef);
    if (collectRefs.getInputRefs().isEmpty() || allExprsInputRefs) {
        return CalciteUtils.createProject(traitSet, convert(project.getInput(), out), project.getProjects(), project.getRowType());
    } else {
        Project elasticsearchProject = CalciteUtils.createProject(traitSet, convert(project.getInput(), out), innerProjections, project.getInput().getRowType());
        return project.copy(project.getTraitSet(), elasticsearchProject, project.getProjects(), project.getRowType());
    }
}
Also used : DrillRelOptUtil(org.apache.drill.exec.planner.common.DrillRelOptUtil) ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType) RelTraitSet(org.apache.calcite.plan.RelTraitSet) Project(org.apache.calcite.rel.core.Project) ElasticsearchProject(org.apache.calcite.adapter.elasticsearch.ElasticsearchProject) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RexInputRef(org.apache.calcite.rex.RexInputRef) RexNode(org.apache.calcite.rex.RexNode)

Example 74 with RexInputRef

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexInputRef in project drill by apache.

the class ConvertCountToDirectScanRule method collectCounts.

/**
 * Collects counts for each aggregation call by using the metadata summary information
 * Will return empty result map if was not able to determine count for at least one aggregation call.
 *
 * For each aggregate call will determine if count can be calculated. Collects counts only for COUNT function.
 *   1. First, we get the total row count from the metadata summary.
 *   2. For COUNT(*) and COUNT(<non null column>) and COUNT(<implicit column>), the count = total row count
 *   3. For COUNT(nullable column), count = (total row count - column's null count)
 *   4. Also count can not be calculated for parition columns.
 *   5. For the columns that are not present in the Summary(Non-existent columns), the count = 0
 *
 * @param settings planner options
 * @param metadataSummary metadata summary containing row counts and column counts
 * @param agg aggregate relational expression
 * @param scan scan relational expression
 * @param project project relational expression
 * @return result map where key is count column name, value is count value
 */
private Map<String, Long> collectCounts(PlannerSettings settings, Metadata_V4.MetadataSummary metadataSummary, Aggregate agg, TableScan scan, Project project) {
    final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet();
    final long totalRecordCount = metadataSummary.getTotalRowCount();
    final LinkedHashMap<String, Long> result = new LinkedHashMap<>();
    for (int i = 0; i < agg.getAggCallList().size(); i++) {
        AggregateCall aggCall = agg.getAggCallList().get(i);
        long cnt;
        // rule can be applied only for count function, return empty counts
        if (!"count".equalsIgnoreCase(aggCall.getAggregation().getName())) {
            return ImmutableMap.of();
        }
        if (CountToDirectScanUtils.containsStarOrNotNullInput(aggCall, agg)) {
            cnt = totalRecordCount;
        } else if (aggCall.getArgList().size() == 1) {
            // count(columnName) ==> Agg ( Scan )) ==> columnValueCount
            int index = aggCall.getArgList().get(0);
            if (project != null) {
                // return count of "col2" in Scan's metadata, if found.
                if (!(project.getProjects().get(index) instanceof RexInputRef)) {
                    // do not apply for all other cases.
                    return ImmutableMap.of();
                }
                index = ((RexInputRef) project.getProjects().get(index)).getIndex();
            }
            String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
            // for implicit column count will be the same as total record count
            if (implicitColumnsNames.contains(columnName)) {
                cnt = totalRecordCount;
            } else {
                SchemaPath simplePath = SchemaPath.getSimplePath(columnName);
                if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) {
                    return ImmutableMap.of();
                }
                Metadata_V4.ColumnTypeMetadata_v4 columnMetadata = metadataSummary.getColumnTypeInfo(new Metadata_V4.ColumnTypeMetadata_v4.Key(simplePath));
                if (columnMetadata == null) {
                    // If the column doesn't exist in the table, row count is set to 0
                    cnt = 0;
                } else if (columnMetadata.totalNullCount == Statistic.NO_COLUMN_STATS) {
                    // if column stats is not available don't apply this rule, return empty counts
                    return ImmutableMap.of();
                } else {
                    // count of a nullable column = (total row count - column's null count)
                    cnt = totalRecordCount - columnMetadata.totalNullCount;
                }
            }
        } else {
            return ImmutableMap.of();
        }
        String name = "count" + i + "$" + (aggCall.getName() == null ? aggCall.toString() : aggCall.getName());
        result.put(name, cnt);
    }
    return ImmutableMap.copyOf(result);
}
Also used : LinkedHashMap(java.util.LinkedHashMap) AggregateCall(org.apache.calcite.rel.core.AggregateCall) Metadata_V4(org.apache.drill.exec.store.parquet.metadata.Metadata_V4) SchemaPath(org.apache.drill.common.expression.SchemaPath) RexInputRef(org.apache.calcite.rex.RexInputRef)

Example 75 with RexInputRef

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rex.RexInputRef in project drill by apache.

the class ConvertCountToDirectScanPrule method collectCounts.

/**
 * Collects counts for each aggregation call.
 * Will return empty result map if was not able to determine count for at least one aggregation call,
 *
 * For each aggregate call will determine if count can be calculated. Collects counts only for COUNT function.
 * For star, not null expressions and implicit columns sets count to total record number.
 * For other cases obtains counts from group scan operator. Also count can not be calculated for partition columns.
 *
 * @param agg aggregate relational expression
 * @param scan scan relational expression
 * @param project project relational expression
 * @return result map where key is count column name, value is count value
 */
private Map<String, Long> collectCounts(PlannerSettings settings, DrillAggregateRel agg, DrillScanRel scan, DrillProjectRel project) {
    final Set<String> implicitColumnsNames = ColumnExplorer.initImplicitFileColumns(settings.getOptions()).keySet();
    final GroupScan oldGrpScan = scan.getGroupScan();
    final long totalRecordCount = (long) oldGrpScan.getScanStats(settings).getRecordCount();
    final LinkedHashMap<String, Long> result = new LinkedHashMap<>();
    for (int i = 0; i < agg.getAggCallList().size(); i++) {
        AggregateCall aggCall = agg.getAggCallList().get(i);
        long cnt;
        // rule can be applied only for count function, return empty counts
        if (!"count".equalsIgnoreCase(aggCall.getAggregation().getName())) {
            return ImmutableMap.of();
        }
        if (CountToDirectScanUtils.containsStarOrNotNullInput(aggCall, agg)) {
            cnt = totalRecordCount;
        } else if (aggCall.getArgList().size() == 1) {
            // count(columnName) ==> Agg ( Scan )) ==> columnValueCount
            int index = aggCall.getArgList().get(0);
            if (project != null) {
                // return count of "col2" in Scan's metadata, if found.
                if (!(project.getProjects().get(index) instanceof RexInputRef)) {
                    // do not apply for all other cases.
                    return ImmutableMap.of();
                }
                index = ((RexInputRef) project.getProjects().get(index)).getIndex();
            }
            String columnName = scan.getRowType().getFieldNames().get(index).toLowerCase();
            // for implicit column count will the same as total record count
            if (implicitColumnsNames.contains(columnName)) {
                cnt = totalRecordCount;
            } else {
                SchemaPath simplePath = SchemaPath.getSimplePath(columnName);
                if (ColumnExplorer.isPartitionColumn(settings.getOptions(), simplePath)) {
                    return ImmutableMap.of();
                }
                cnt = oldGrpScan.getColumnValueCount(simplePath);
                if (cnt == Statistic.NO_COLUMN_STATS) {
                    // if column stats is not available don't apply this rule, return empty counts
                    return ImmutableMap.of();
                }
            }
        } else {
            return ImmutableMap.of();
        }
        String name = "count" + i + "$" + (aggCall.getName() == null ? aggCall.toString() : aggCall.getName());
        result.put(name, cnt);
    }
    return ImmutableMap.copyOf(result);
}
Also used : MetadataDirectGroupScan(org.apache.drill.exec.store.direct.MetadataDirectGroupScan) GroupScan(org.apache.drill.exec.physical.base.GroupScan) AggregateCall(org.apache.calcite.rel.core.AggregateCall) SchemaPath(org.apache.drill.common.expression.SchemaPath) RexInputRef(org.apache.calcite.rex.RexInputRef) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

RexInputRef (org.apache.calcite.rex.RexInputRef)241 RexNode (org.apache.calcite.rex.RexNode)200 ArrayList (java.util.ArrayList)105 RelNode (org.apache.calcite.rel.RelNode)85 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)80 RexCall (org.apache.calcite.rex.RexCall)67 RelDataType (org.apache.calcite.rel.type.RelDataType)63 RexBuilder (org.apache.calcite.rex.RexBuilder)54 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)52 HashMap (java.util.HashMap)47 AggregateCall (org.apache.calcite.rel.core.AggregateCall)36 List (java.util.List)35 HashSet (java.util.HashSet)32 Pair (org.apache.calcite.util.Pair)32 RexLiteral (org.apache.calcite.rex.RexLiteral)29 Map (java.util.Map)24 RelOptUtil (org.apache.calcite.plan.RelOptUtil)24 Set (java.util.Set)20 ImmutableList (com.google.common.collect.ImmutableList)19 LinkedHashMap (java.util.LinkedHashMap)19