Search in sources :

Example 46 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.

the class HiveJoinPushTransitivePredicatesRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    Join join = call.rel(0);
    RelOptPredicateList preds = call.getMetadataQuery().getPulledUpPredicates(join);
    HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
    assert registry != null;
    RexBuilder rB = join.getCluster().getRexBuilder();
    RelNode lChild = join.getLeft();
    RelNode rChild = join.getRight();
    Set<String> leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0));
    List<RexNode> leftPreds = getValidPreds(join.getCluster(), lChild, leftPushedPredicates, preds.leftInferredPredicates, lChild.getRowType());
    Set<String> rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1));
    List<RexNode> rightPreds = getValidPreds(join.getCluster(), rChild, rightPushedPredicates, preds.rightInferredPredicates, rChild.getRowType());
    RexNode newLeftPredicate = RexUtil.composeConjunction(rB, leftPreds, false);
    RexNode newRightPredicate = RexUtil.composeConjunction(rB, rightPreds, false);
    if (newLeftPredicate.isAlwaysTrue() && newRightPredicate.isAlwaysTrue()) {
        return;
    }
    if (!newLeftPredicate.isAlwaysTrue()) {
        RelNode curr = lChild;
        lChild = filterFactory.createFilter(lChild, newLeftPredicate.accept(new RexReplacer(lChild)), ImmutableSet.of());
        call.getPlanner().onCopy(curr, lChild);
    }
    if (!newRightPredicate.isAlwaysTrue()) {
        RelNode curr = rChild;
        rChild = filterFactory.createFilter(rChild, newRightPredicate.accept(new RexReplacer(rChild)), ImmutableSet.of());
        call.getPlanner().onCopy(curr, rChild);
    }
    RelNode newRel = join.copy(join.getTraitSet(), join.getCondition(), lChild, rChild, join.getJoinType(), join.isSemiJoinDone());
    call.getPlanner().onCopy(join, newRel);
    // Register information about pushed predicates
    registry.getPushedPredicates(newRel, 0).addAll(leftPushedPredicates);
    registry.getPushedPredicates(newRel, 1).addAll(rightPushedPredicates);
    call.transformTo(newRel);
}
Also used : RelNode(org.apache.calcite.rel.RelNode) RelOptPredicateList(org.apache.calcite.plan.RelOptPredicateList) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveAntiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) RexBuilder(org.apache.calcite.rex.RexBuilder) RexNode(org.apache.calcite.rex.RexNode)

Example 47 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.

the class HiveInsertExchange4JoinRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    JoinPredicateInfo joinPredInfo;
    if (call.rel(0) instanceof HiveMultiJoin) {
        HiveMultiJoin multiJoin = call.rel(0);
        joinPredInfo = multiJoin.getJoinPredicateInfo();
    } else if (call.rel(0) instanceof HiveJoin) {
        HiveJoin hiveJoin = call.rel(0);
        joinPredInfo = hiveJoin.getJoinPredicateInfo();
    } else if (call.rel(0) instanceof Join) {
        Join join = call.rel(0);
        try {
            joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join);
        } catch (CalciteSemanticException e) {
            throw new RuntimeException(e);
        }
    } else {
        return;
    }
    for (RelNode child : call.rel(0).getInputs()) {
        if (((HepRelVertex) child).getCurrentRel() instanceof Exchange) {
            return;
        }
    }
    // Get key columns from inputs. Those are the columns on which we will distribute on.
    // It is also the columns we will sort on.
    List<RelNode> newInputs = new ArrayList<RelNode>();
    for (int i = 0; i < call.rel(0).getInputs().size(); i++) {
        List<Integer> joinKeyPositions = new ArrayList<Integer>();
        ImmutableList.Builder<RexNode> joinExprsBuilder = new ImmutableList.Builder<RexNode>();
        Set<String> keySet = Sets.newHashSet();
        ImmutableList.Builder<RelFieldCollation> collationListBuilder = new ImmutableList.Builder<RelFieldCollation>();
        for (int j = 0; j < joinPredInfo.getEquiJoinPredicateElements().size(); j++) {
            JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.getEquiJoinPredicateElements().get(j);
            for (RexNode joinExprNode : joinLeafPredInfo.getJoinExprs(i)) {
                if (keySet.add(joinExprNode.toString())) {
                    joinExprsBuilder.add(joinExprNode);
                }
            }
            for (int pos : joinLeafPredInfo.getProjsJoinKeysInChildSchema(i)) {
                if (!joinKeyPositions.contains(pos)) {
                    joinKeyPositions.add(pos);
                    collationListBuilder.add(new RelFieldCollation(pos));
                }
            }
        }
        HiveSortExchange exchange = HiveSortExchange.create(call.rel(0).getInput(i), new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, joinKeyPositions), new HiveRelCollation(collationListBuilder.build()), joinExprsBuilder.build());
        newInputs.add(exchange);
    }
    RelNode newOp;
    if (call.rel(0) instanceof HiveMultiJoin) {
        HiveMultiJoin multiJoin = call.rel(0);
        newOp = multiJoin.copy(multiJoin.getTraitSet(), newInputs);
    } else if (call.rel(0) instanceof Join) {
        Join join = call.rel(0);
        newOp = join.copy(join.getTraitSet(), join.getCondition(), newInputs.get(0), newInputs.get(1), join.getJoinType(), join.isSemiJoinDone());
    } else {
        return;
    }
    call.getPlanner().onCopy(call.rel(0), newOp);
    call.transformTo(newOp);
}
Also used : HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) HiveSortExchange(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Join(org.apache.calcite.rel.core.Join) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) Exchange(org.apache.calcite.rel.core.Exchange) HiveSortExchange(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange) HiveRelDistribution(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution) JoinLeafPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo) HiveRelCollation(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation) RelNode(org.apache.calcite.rel.RelNode) JoinPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) RexNode(org.apache.calcite.rex.RexNode)

Example 48 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.

the class HiveFilterProjectTransposeRule method matches.

@Override
public boolean matches(RelOptRuleCall call) {
    final Filter filterRel = call.rel(0);
    // The condition fetched here can reference a udf that is not deterministic, but defined
    // as part of the select list when a view is in play.  But the condition after the pushdown
    // will resolve to using the udf from select list.  The check here for deterministic filters
    // should be based on the resolved expression.  Refer to test case cbo_ppd_non_deterministic.q.
    RexNode condition = RelOptUtil.pushPastProject(filterRel.getCondition(), call.rel(1));
    if (this.onlyDeterministic && !HiveCalciteUtil.isDeterministic(condition)) {
        return false;
    }
    if (call.rels.length > 2) {
        final Join joinRel = call.rel(2);
        RewritablePKFKJoinInfo joinInfo = HiveRelOptUtil.isRewritablePKFKJoin(joinRel, joinRel.getLeft(), joinRel.getRight(), call.getMetadataQuery());
        if (!joinInfo.rewritable) {
            return false;
        }
    }
    return super.matches(call);
}
Also used : RewritablePKFKJoinInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo) Filter(org.apache.calcite.rel.core.Filter) Join(org.apache.calcite.rel.core.Join) RexNode(org.apache.calcite.rex.RexNode)

Example 49 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.

the class HiveJoinCommuteRule method onMatch.

public void onMatch(final RelOptRuleCall call) {
    Project topProject = call.rel(0);
    Join join = call.rel(1);
    // 1. We check if it is a permutation project. If it is
    // not, or this is the identity, the rule will do nothing
    final Permutation topPermutation = topProject.getPermutation();
    if (topPermutation == null) {
        return;
    }
    if (topPermutation.isIdentity()) {
        return;
    }
    // 2. We swap the join
    final RelNode swapped = JoinCommuteRule.swap(join, true);
    if (swapped == null) {
        return;
    }
    // bail out.
    if (swapped instanceof Join) {
        return;
    }
    // 4. We check if it is a permutation project. If it is
    // not, or this is the identity, the rule will do nothing
    final Project bottomProject = (Project) swapped;
    final Permutation bottomPermutation = bottomProject.getPermutation();
    if (bottomPermutation == null) {
        return;
    }
    if (bottomPermutation.isIdentity()) {
        return;
    }
    // 5. If the product of the topPermutation and bottomPermutation yields
    // the identity, then we can swap the join and remove the project on
    // top.
    final Permutation product = topPermutation.product(bottomPermutation);
    if (!product.isIdentity()) {
        return;
    }
    // 6. Return the new join as a replacement
    final Join swappedJoin = (Join) bottomProject.getInput(0);
    call.transformTo(swappedJoin);
}
Also used : Project(org.apache.calcite.rel.core.Project) HiveProject(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject) RelNode(org.apache.calcite.rel.RelNode) Permutation(org.apache.calcite.util.Permutation) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) Join(org.apache.calcite.rel.core.Join)

Example 50 with Join

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.

the class HiveJoinSwapConstraintsRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    final Join topJoin = call.rel(0);
    final Join bottomJoin = call.rel(1);
    final RexBuilder rexBuilder = topJoin.getCluster().getRexBuilder();
    // 1) Check whether these joins can be swapped.
    if (topJoin.getJoinType().generatesNullsOnLeft() || bottomJoin.getJoinType().generatesNullsOnLeft() || bottomJoin.isSemiJoin()) {
        // Nothing to do
        return;
    }
    // 2) Check whether the bottom is a non-filtering column appending join.
    // - If the top one is a non-filtering column appending join, we do not
    // trigger the optimization, since we do not want to swap this type of
    // joins.
    // - If the bottom one is not a non-filtering column appending join,
    // we cannot trigger the optimization.
    RewritablePKFKJoinInfo topInfo = HiveRelOptUtil.isRewritablePKFKJoin(topJoin, topJoin.getLeft(), topJoin.getRight(), call.getMetadataQuery());
    RewritablePKFKJoinInfo bottomInfo = HiveRelOptUtil.isRewritablePKFKJoin(bottomJoin, bottomJoin.getLeft(), bottomJoin.getRight(), call.getMetadataQuery());
    if (topInfo.rewritable || !bottomInfo.rewritable) {
        // Nothing to do
        return;
    }
    // 3) Rewrite.
    // X is the left child of the join below
    // Y is the right child of the join below
    // Z is the right child of the top join
    int nFieldsX = bottomJoin.getLeft().getRowType().getFieldList().size();
    int nFieldsY = bottomJoin.getRight().getRowType().getFieldList().size();
    int nFieldsZ = topJoin.getRight().getRowType().getFieldList().size();
    int nTotalFields = nFieldsX + nFieldsY + nFieldsZ;
    List<RelDataTypeField> fields = new ArrayList<>();
    // create a list of fields for the full join result; note that
    // we can't simply use the fields because the row-type of a
    // semi-join would only include the left hand side fields
    List<RelDataTypeField> joinFields = topJoin.getRowType().getFieldList();
    for (int i = 0; i < (nFieldsX + nFieldsY); i++) {
        fields.add(joinFields.get(i));
    }
    joinFields = topJoin.getRight().getRowType().getFieldList();
    for (int i = 0; i < nFieldsZ; i++) {
        fields.add(joinFields.get(i));
    }
    // determine which operands below the join are the actual
    // rels that participate in it
    final Set<Integer> leftKeys = HiveCalciteUtil.getInputRefs(topJoin.getCondition());
    leftKeys.removeIf(i -> i >= topJoin.getLeft().getRowType().getFieldCount());
    int nKeysFromX = 0;
    for (int leftKey : leftKeys) {
        if (leftKey < nFieldsX) {
            nKeysFromX++;
        }
    }
    // the keys must all originate from the left
    if (nKeysFromX != leftKeys.size()) {
        // Nothing to do
        return;
    }
    // need to convert the conditions
    // (X, Y, Z) --> (X, Z, Y)
    int[] adjustments = new int[nTotalFields];
    setJoinAdjustments(adjustments, nFieldsX, nFieldsY, nFieldsZ, nFieldsZ, -nFieldsY);
    final RexNode newBottomCondition = topJoin.getCondition().accept(new RelOptUtil.RexInputConverter(rexBuilder, fields, adjustments));
    // create the new joins
    final Join newBottomJoin = topJoin.copy(topJoin.getTraitSet(), newBottomCondition, bottomJoin.getLeft(), topJoin.getRight(), topJoin.getJoinType(), topJoin.isSemiJoinDone());
    final RexNode newTopCondition;
    if (newBottomJoin.isSemiJoin()) {
        newTopCondition = bottomJoin.getCondition();
    } else {
        newTopCondition = bottomJoin.getCondition().accept(new RelOptUtil.RexInputConverter(rexBuilder, fields, adjustments));
    }
    final Join newTopJoin = bottomJoin.copy(bottomJoin.getTraitSet(), newTopCondition, newBottomJoin, bottomJoin.getRight(), bottomJoin.getJoinType(), bottomJoin.isSemiJoinDone());
    if (newBottomJoin.isSemiJoin()) {
        call.transformTo(newTopJoin);
    } else {
        // need to swap the columns to match the original join
        // (X, Y, Z) --> (X, Z, Y)
        List<RexNode> exprs = new ArrayList<>();
        for (int i = 0; i < nFieldsX; i++) {
            exprs.add(rexBuilder.makeInputRef(newTopJoin, i));
        }
        for (int i = nFieldsX + nFieldsZ; i < topJoin.getRowType().getFieldCount(); i++) {
            exprs.add(rexBuilder.makeInputRef(newTopJoin, i));
        }
        for (int i = nFieldsX; i < nFieldsX + nFieldsZ; i++) {
            exprs.add(rexBuilder.makeInputRef(newTopJoin, i));
        }
        call.transformTo(call.builder().push(newTopJoin).project(exprs).build());
    }
}
Also used : RewritablePKFKJoinInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo) HiveRelOptUtil(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil) RelOptUtil(org.apache.calcite.plan.RelOptUtil) ArrayList(java.util.ArrayList) Join(org.apache.calcite.rel.core.Join) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RexBuilder(org.apache.calcite.rex.RexBuilder) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

Join (org.apache.calcite.rel.core.Join)73 RelNode (org.apache.calcite.rel.RelNode)45 RexNode (org.apache.calcite.rex.RexNode)40 ArrayList (java.util.ArrayList)31 LogicalJoin (org.apache.calcite.rel.logical.LogicalJoin)25 Project (org.apache.calcite.rel.core.Project)22 RexBuilder (org.apache.calcite.rex.RexBuilder)20 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)18 RelBuilder (org.apache.calcite.tools.RelBuilder)17 HiveJoin (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin)14 Aggregate (org.apache.calcite.rel.core.Aggregate)13 Test (org.junit.Test)13 Filter (org.apache.calcite.rel.core.Filter)12 RelNode (org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelNode)11 SemiJoin (org.apache.calcite.rel.core.SemiJoin)11 RelOptCluster (org.apache.calcite.plan.RelOptCluster)10 JoinRelType (org.apache.calcite.rel.core.JoinRelType)9 RelMetadataQuery (org.apache.calcite.rel.metadata.RelMetadataQuery)9 Mappings (org.apache.calcite.util.mapping.Mappings)9 List (java.util.List)8