Search in sources :

Example 1 with RewritablePKFKJoinInfo

use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo in project hive by apache.

the class HiveJoinConstraintsRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    final Project project = call.rel(0);
    final RexBuilder rexBuilder = project.getCluster().getRexBuilder();
    List<RexNode> topProjExprs = project.getProjects();
    Join join = call.rel(1);
    final JoinRelType joinType = join.getJoinType();
    final RelNode leftInput = join.getLeft();
    final RelNode rightInput = join.getRight();
    final RexNode cond = join.getCondition();
    // TODO:https://issues.apache.org/jira/browse/HIVE-23920
    if (joinType == JoinRelType.ANTI) {
        return;
    }
    // 1) If it is an inner, check whether project only uses columns from one side.
    // That side will need to be the FK side.
    // If it is a left outer, left will be the FK side.
    // If it is a right outer, right will be the FK side.
    final RelNode fkInput;
    final RelNode nonFkInput;
    final ImmutableBitSet topRefs = RelOptUtil.InputFinder.bits(topProjExprs, null);
    final ImmutableBitSet leftBits = ImmutableBitSet.range(leftInput.getRowType().getFieldCount());
    final ImmutableBitSet rightBits = ImmutableBitSet.range(leftInput.getRowType().getFieldCount(), join.getRowType().getFieldCount());
    // These boolean values represent corresponding left, right input which is potential FK
    boolean leftInputPotentialFK = topRefs.intersects(leftBits);
    boolean rightInputPotentialFK = topRefs.intersects(rightBits);
    if (leftInputPotentialFK && rightInputPotentialFK && (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI)) {
        // Both inputs are referenced. Before making a decision, try to swap
        // references in join condition if it is an inner join, i.e. if a join
        // condition column is referenced above the join, then we can just
        // reference the column from the other side.
        // For example, given two relations R(a1,a2), S(b1) :
        // SELECT a2, b1 FROM R, S ON R.a1=R.b1 =>
        // SELECT a2, a1 FROM R, S ON R.a1=R.b1
        int joinFieldCount = join.getRowType().getFieldCount();
        Mapping mappingLR = Mappings.create(MappingType.PARTIAL_FUNCTION, joinFieldCount, joinFieldCount);
        Mapping mappingRL = Mappings.create(MappingType.PARTIAL_FUNCTION, joinFieldCount, joinFieldCount);
        for (RexNode conj : RelOptUtil.conjunctions(cond)) {
            if (!conj.isA(SqlKind.EQUALS)) {
                continue;
            }
            RexCall eq = (RexCall) conj;
            RexNode op1 = eq.getOperands().get(0);
            RexNode op2 = eq.getOperands().get(1);
            if (op1 instanceof RexInputRef && op2 instanceof RexInputRef) {
                // Check references
                int ref1 = ((RexInputRef) op1).getIndex();
                int ref2 = ((RexInputRef) op2).getIndex();
                int leftRef = -1;
                int rightRef = -1;
                if (leftBits.get(ref1) && rightBits.get(ref2)) {
                    leftRef = ref1;
                    rightRef = ref2;
                } else if (rightBits.get(ref1) && leftBits.get(ref2)) {
                    leftRef = ref2;
                    rightRef = ref1;
                }
                if (leftRef != -1 && rightRef != -1) {
                    // as it is useless
                    if (mappingLR.getTargetOpt(leftRef) == -1) {
                        mappingLR.set(leftRef, rightRef);
                    }
                    if (mappingRL.getTargetOpt(rightRef) == -1) {
                        mappingRL.set(rightRef, leftRef);
                    }
                }
            }
        }
        if (mappingLR.size() != 0) {
            // First insert missing elements into the mapping as identity mappings
            for (int i = 0; i < joinFieldCount; i++) {
                if (mappingLR.getTargetOpt(i) == -1) {
                    mappingLR.set(i, i);
                }
                if (mappingRL.getTargetOpt(i) == -1) {
                    mappingRL.set(i, i);
                }
            }
            // Then, we start by trying to reference only left side in top projections
            List<RexNode> swappedTopProjExprs = topProjExprs.stream().map(projExpr -> projExpr.accept(new RexPermuteInputsShuttle(mappingRL, call.rel(1)))).collect(Collectors.toList());
            rightInputPotentialFK = RelOptUtil.InputFinder.bits(swappedTopProjExprs, null).intersects(rightBits);
            if (!rightInputPotentialFK) {
                topProjExprs = swappedTopProjExprs;
            } else {
                // If it did not work, we try to reference only right side in top projections
                swappedTopProjExprs = topProjExprs.stream().map(projExpr -> projExpr.accept(new RexPermuteInputsShuttle(mappingLR, call.rel(1)))).collect(Collectors.toList());
                leftInputPotentialFK = RelOptUtil.InputFinder.bits(swappedTopProjExprs, null).intersects(leftBits);
                if (!leftInputPotentialFK) {
                    topProjExprs = swappedTopProjExprs;
                }
            }
        }
    } else if (!leftInputPotentialFK && !rightInputPotentialFK) {
        // TODO: There are no references in the project operator above.
        // In this case, we should probably do two passes, one for
        // left as FK and one for right as FK, although it may be expensive.
        // Currently we only assume left as FK
        leftInputPotentialFK = true;
    }
    final Mode mode;
    switch(joinType) {
        case SEMI:
        case INNER:
            // case ANTI: //TODO:https://issues.apache.org/jira/browse/HIVE-23920
            if (leftInputPotentialFK && rightInputPotentialFK) {
                // and there is nothing to transform
                return;
            }
            fkInput = leftInputPotentialFK ? leftInput : rightInput;
            nonFkInput = leftInputPotentialFK ? rightInput : leftInput;
            mode = Mode.REMOVE;
            break;
        case LEFT:
            fkInput = leftInput;
            nonFkInput = rightInput;
            mode = leftInputPotentialFK && !rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM;
            break;
        case RIGHT:
            fkInput = rightInput;
            nonFkInput = leftInput;
            mode = !leftInputPotentialFK && rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM;
            break;
        default:
            // Other type, bail out
            return;
    }
    // 2) Check whether this join can be rewritten or removed
    RewritablePKFKJoinInfo r = HiveRelOptUtil.isRewritablePKFKJoin(join, fkInput, nonFkInput, call.getMetadataQuery());
    // 3) If it is the only condition, we can trigger the rewriting
    if (r.rewritable) {
        rewrite(mode, fkInput, nonFkInput, join, topProjExprs, call, project, r.nullableNodes);
    } else {
        // Possibly this could be enhanced to take other join type into consideration.
        if (joinType != JoinRelType.INNER) {
            return;
        }
        // first swap fk and non-fk input and see if we can rewrite them
        RewritablePKFKJoinInfo fkRemoval = HiveRelOptUtil.isRewritablePKFKJoin(join, nonFkInput, fkInput, call.getMetadataQuery());
        if (fkRemoval.rewritable) {
            // we have established that nonFkInput is FK, and fkInput is PK
            // and there is no row filtering on FK side
            // check that FK side join column is distinct (i.e. have a group by)
            ImmutableBitSet fkSideBitSet;
            if (nonFkInput == leftInput) {
                fkSideBitSet = leftBits;
            } else {
                fkSideBitSet = rightBits;
            }
            ImmutableBitSet.Builder fkJoinColBuilder = ImmutableBitSet.builder();
            for (RexNode conj : RelOptUtil.conjunctions(cond)) {
                if (!conj.isA(SqlKind.EQUALS)) {
                    return;
                }
                RexCall eq = (RexCall) conj;
                RexNode op1 = eq.getOperands().get(0);
                RexNode op2 = eq.getOperands().get(1);
                if (op1 instanceof RexInputRef && op2 instanceof RexInputRef) {
                    // Check references
                    int ref1 = ((RexInputRef) op1).getIndex();
                    int ref2 = ((RexInputRef) op2).getIndex();
                    int leftRef = -1;
                    int rightRef = -1;
                    if (fkSideBitSet.get(ref1)) {
                        // check that join columns are not nullable
                        if (op1.getType().isNullable()) {
                            return;
                        }
                        fkJoinColBuilder.set(fkSideBitSet.indexOf(ref1));
                    } else {
                        if (op2.getType().isNullable()) {
                            return;
                        }
                        fkJoinColBuilder.set(fkSideBitSet.indexOf(ref2));
                    }
                }
            }
            if (!call.getMetadataQuery().areColumnsUnique(nonFkInput, fkJoinColBuilder.build())) {
                return;
            }
            // all conditions are met, therefore we can perform rewrite to remove fk side
            rewrite(mode, fkInput, nonFkInput, join, topProjExprs, call, project, fkRemoval.nullableNodes);
        }
    }
}
Also used : Project(org.apache.calcite.rel.core.Project) Mappings(org.apache.calcite.util.mapping.Mappings) MappingType(org.apache.calcite.util.mapping.MappingType) LoggerFactory(org.slf4j.LoggerFactory) RelOptUtil(org.apache.calcite.plan.RelOptUtil) Join(org.apache.calcite.rel.core.Join) ArrayList(java.util.ArrayList) RexUtil(org.apache.calcite.rex.RexUtil) RexNode(org.apache.calcite.rex.RexNode) Mapping(org.apache.calcite.util.mapping.Mapping) RelBuilderFactory(org.apache.calcite.tools.RelBuilderFactory) HiveRelFactories(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories) RexPermuteInputsShuttle(org.apache.calcite.rex.RexPermuteInputsShuttle) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) SqlKind(org.apache.calcite.sql.SqlKind) Logger(org.slf4j.Logger) RexBuilder(org.apache.calcite.rex.RexBuilder) HiveRelOptUtil(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil) RelNode(org.apache.calcite.rel.RelNode) Collectors(java.util.stream.Collectors) RelOptRuleCall(org.apache.calcite.plan.RelOptRuleCall) RexInputRef(org.apache.calcite.rex.RexInputRef) RewritablePKFKJoinInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo) RelOptRule(org.apache.calcite.plan.RelOptRule) HiveCalciteUtil(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil) List(java.util.List) SqlStdOperatorTable(org.apache.calcite.sql.fun.SqlStdOperatorTable) JoinRelType(org.apache.calcite.rel.core.JoinRelType) RexCall(org.apache.calcite.rex.RexCall) RewritablePKFKJoinInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) Join(org.apache.calcite.rel.core.Join) Mapping(org.apache.calcite.util.mapping.Mapping) RexCall(org.apache.calcite.rex.RexCall) Project(org.apache.calcite.rel.core.Project) JoinRelType(org.apache.calcite.rel.core.JoinRelType) RelNode(org.apache.calcite.rel.RelNode) RexBuilder(org.apache.calcite.rex.RexBuilder) RexInputRef(org.apache.calcite.rex.RexInputRef) RexPermuteInputsShuttle(org.apache.calcite.rex.RexPermuteInputsShuttle) RexNode(org.apache.calcite.rex.RexNode)

Example 2 with RewritablePKFKJoinInfo

use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo in project hive by apache.

the class HiveFilterProjectTransposeRule method matches.

@Override
public boolean matches(RelOptRuleCall call) {
    final Filter filterRel = call.rel(0);
    // The condition fetched here can reference a udf that is not deterministic, but defined
    // as part of the select list when a view is in play.  But the condition after the pushdown
    // will resolve to using the udf from select list.  The check here for deterministic filters
    // should be based on the resolved expression.  Refer to test case cbo_ppd_non_deterministic.q.
    RexNode condition = RelOptUtil.pushPastProject(filterRel.getCondition(), call.rel(1));
    if (this.onlyDeterministic && !HiveCalciteUtil.isDeterministic(condition)) {
        return false;
    }
    if (call.rels.length > 2) {
        final Join joinRel = call.rel(2);
        RewritablePKFKJoinInfo joinInfo = HiveRelOptUtil.isRewritablePKFKJoin(joinRel, joinRel.getLeft(), joinRel.getRight(), call.getMetadataQuery());
        if (!joinInfo.rewritable) {
            return false;
        }
    }
    return super.matches(call);
}
Also used : RewritablePKFKJoinInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo) Filter(org.apache.calcite.rel.core.Filter) Join(org.apache.calcite.rel.core.Join) RexNode(org.apache.calcite.rex.RexNode)

Example 3 with RewritablePKFKJoinInfo

use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo in project hive by apache.

the class HiveJoinSwapConstraintsRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    final Join topJoin = call.rel(0);
    final Join bottomJoin = call.rel(1);
    final RexBuilder rexBuilder = topJoin.getCluster().getRexBuilder();
    // 1) Check whether these joins can be swapped.
    if (topJoin.getJoinType().generatesNullsOnLeft() || bottomJoin.getJoinType().generatesNullsOnLeft() || bottomJoin.isSemiJoin()) {
        // Nothing to do
        return;
    }
    // 2) Check whether the bottom is a non-filtering column appending join.
    // - If the top one is a non-filtering column appending join, we do not
    // trigger the optimization, since we do not want to swap this type of
    // joins.
    // - If the bottom one is not a non-filtering column appending join,
    // we cannot trigger the optimization.
    RewritablePKFKJoinInfo topInfo = HiveRelOptUtil.isRewritablePKFKJoin(topJoin, topJoin.getLeft(), topJoin.getRight(), call.getMetadataQuery());
    RewritablePKFKJoinInfo bottomInfo = HiveRelOptUtil.isRewritablePKFKJoin(bottomJoin, bottomJoin.getLeft(), bottomJoin.getRight(), call.getMetadataQuery());
    if (topInfo.rewritable || !bottomInfo.rewritable) {
        // Nothing to do
        return;
    }
    // 3) Rewrite.
    // X is the left child of the join below
    // Y is the right child of the join below
    // Z is the right child of the top join
    int nFieldsX = bottomJoin.getLeft().getRowType().getFieldList().size();
    int nFieldsY = bottomJoin.getRight().getRowType().getFieldList().size();
    int nFieldsZ = topJoin.getRight().getRowType().getFieldList().size();
    int nTotalFields = nFieldsX + nFieldsY + nFieldsZ;
    List<RelDataTypeField> fields = new ArrayList<>();
    // create a list of fields for the full join result; note that
    // we can't simply use the fields because the row-type of a
    // semi-join would only include the left hand side fields
    List<RelDataTypeField> joinFields = topJoin.getRowType().getFieldList();
    for (int i = 0; i < (nFieldsX + nFieldsY); i++) {
        fields.add(joinFields.get(i));
    }
    joinFields = topJoin.getRight().getRowType().getFieldList();
    for (int i = 0; i < nFieldsZ; i++) {
        fields.add(joinFields.get(i));
    }
    // determine which operands below the join are the actual
    // rels that participate in it
    final Set<Integer> leftKeys = HiveCalciteUtil.getInputRefs(topJoin.getCondition());
    leftKeys.removeIf(i -> i >= topJoin.getLeft().getRowType().getFieldCount());
    int nKeysFromX = 0;
    for (int leftKey : leftKeys) {
        if (leftKey < nFieldsX) {
            nKeysFromX++;
        }
    }
    // the keys must all originate from the left
    if (nKeysFromX != leftKeys.size()) {
        // Nothing to do
        return;
    }
    // need to convert the conditions
    // (X, Y, Z) --> (X, Z, Y)
    int[] adjustments = new int[nTotalFields];
    setJoinAdjustments(adjustments, nFieldsX, nFieldsY, nFieldsZ, nFieldsZ, -nFieldsY);
    final RexNode newBottomCondition = topJoin.getCondition().accept(new RelOptUtil.RexInputConverter(rexBuilder, fields, adjustments));
    // create the new joins
    final Join newBottomJoin = topJoin.copy(topJoin.getTraitSet(), newBottomCondition, bottomJoin.getLeft(), topJoin.getRight(), topJoin.getJoinType(), topJoin.isSemiJoinDone());
    final RexNode newTopCondition;
    if (newBottomJoin.isSemiJoin()) {
        newTopCondition = bottomJoin.getCondition();
    } else {
        newTopCondition = bottomJoin.getCondition().accept(new RelOptUtil.RexInputConverter(rexBuilder, fields, adjustments));
    }
    final Join newTopJoin = bottomJoin.copy(bottomJoin.getTraitSet(), newTopCondition, newBottomJoin, bottomJoin.getRight(), bottomJoin.getJoinType(), bottomJoin.isSemiJoinDone());
    if (newBottomJoin.isSemiJoin()) {
        call.transformTo(newTopJoin);
    } else {
        // need to swap the columns to match the original join
        // (X, Y, Z) --> (X, Z, Y)
        List<RexNode> exprs = new ArrayList<>();
        for (int i = 0; i < nFieldsX; i++) {
            exprs.add(rexBuilder.makeInputRef(newTopJoin, i));
        }
        for (int i = nFieldsX + nFieldsZ; i < topJoin.getRowType().getFieldCount(); i++) {
            exprs.add(rexBuilder.makeInputRef(newTopJoin, i));
        }
        for (int i = nFieldsX; i < nFieldsX + nFieldsZ; i++) {
            exprs.add(rexBuilder.makeInputRef(newTopJoin, i));
        }
        call.transformTo(call.builder().push(newTopJoin).project(exprs).build());
    }
}
Also used : RewritablePKFKJoinInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo) HiveRelOptUtil(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil) RelOptUtil(org.apache.calcite.plan.RelOptUtil) ArrayList(java.util.ArrayList) Join(org.apache.calcite.rel.core.Join) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RexBuilder(org.apache.calcite.rex.RexBuilder) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

Join (org.apache.calcite.rel.core.Join)3 RexNode (org.apache.calcite.rex.RexNode)3 RewritablePKFKJoinInfo (org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo)3 ArrayList (java.util.ArrayList)2 RelOptUtil (org.apache.calcite.plan.RelOptUtil)2 RexBuilder (org.apache.calcite.rex.RexBuilder)2 HiveRelOptUtil (org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil)2 List (java.util.List)1 Collectors (java.util.stream.Collectors)1 RelOptRule (org.apache.calcite.plan.RelOptRule)1 RelOptRuleCall (org.apache.calcite.plan.RelOptRuleCall)1 RelNode (org.apache.calcite.rel.RelNode)1 Filter (org.apache.calcite.rel.core.Filter)1 JoinRelType (org.apache.calcite.rel.core.JoinRelType)1 Project (org.apache.calcite.rel.core.Project)1 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)1 RexCall (org.apache.calcite.rex.RexCall)1 RexInputRef (org.apache.calcite.rex.RexInputRef)1 RexPermuteInputsShuttle (org.apache.calcite.rex.RexPermuteInputsShuttle)1 RexUtil (org.apache.calcite.rex.RexUtil)1