Search in sources :

Example 1 with JoinPredicateInfo

use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo in project hive by apache.

the class HiveRelMdSelectivity method computeInnerJoinSelectivity.

private Double computeInnerJoinSelectivity(Join j, RelMetadataQuery mq, RexNode predicate) {
    Pair<Boolean, RexNode> predInfo = getCombinedPredicateForJoin(j, predicate);
    if (!predInfo.getKey()) {
        return new FilterSelectivityEstimator(j).estimateSelectivity(predInfo.getValue());
    }
    RexNode combinedPredicate = predInfo.getValue();
    JoinPredicateInfo jpi;
    try {
        jpi = JoinPredicateInfo.constructJoinPredicateInfo(j, combinedPredicate);
    } catch (CalciteSemanticException e) {
        throw new RuntimeException(e);
    }
    ImmutableMap.Builder<Integer, Double> colStatMapBuilder = ImmutableMap.builder();
    ImmutableMap<Integer, Double> colStatMap;
    int rightOffSet = j.getLeft().getRowType().getFieldCount();
    // Join which are part of join keys
    for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
        colStatMapBuilder.put(ljk, HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), mq, ljk));
    }
    // Join which are part of join keys
    for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
        colStatMapBuilder.put(rjk + rightOffSet, HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), mq, rjk));
    }
    colStatMap = colStatMapBuilder.build();
    // 3. Walk through the Join Condition Building NDV for selectivity
    // NDV of the join can not exceed the cardinality of cross join.
    List<JoinLeafPredicateInfo> peLst = jpi.getEquiJoinPredicateElements();
    int noOfPE = peLst.size();
    double ndvCrossProduct = 1;
    if (noOfPE > 0) {
        ndvCrossProduct = exponentialBackoff(peLst, colStatMap);
        if (j instanceof SemiJoin) {
            ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()), ndvCrossProduct);
        } else if (j instanceof HiveJoin) {
            ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()) * mq.getRowCount(j.getRight()), ndvCrossProduct);
        } else {
            throw new RuntimeException("Unexpected Join type: " + j.getClass().getName());
        }
    }
    // 4. Join Selectivity = 1/NDV
    return (1 / ndvCrossProduct);
}
Also used : HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) ImmutableMap(com.google.common.collect.ImmutableMap) JoinLeafPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo) JoinPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo) SemiJoin(org.apache.calcite.rel.core.SemiJoin) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RexNode(org.apache.calcite.rex.RexNode)

Example 2 with JoinPredicateInfo

use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo in project hive by apache.

the class HiveJoinAddNotNullRule method onMatch.

//~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
    final Join join = call.rel(0);
    RelNode lChild = join.getLeft();
    RelNode rChild = join.getRight();
    HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
    assert registry != null;
    if (join.getJoinType() != JoinRelType.INNER) {
        return;
    }
    if (join.getCondition().isAlwaysTrue()) {
        return;
    }
    JoinPredicateInfo joinPredInfo;
    try {
        joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join);
    } catch (CalciteSemanticException e) {
        return;
    }
    Set<Integer> joinLeftKeyPositions = new HashSet<Integer>();
    Set<Integer> joinRightKeyPositions = new HashSet<Integer>();
    for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) {
        JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.getEquiJoinPredicateElements().get(i);
        joinLeftKeyPositions.addAll(joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema());
        joinRightKeyPositions.addAll(joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema());
    }
    // Build not null conditions
    final RelOptCluster cluster = join.getCluster();
    final RexBuilder rexBuilder = join.getCluster().getRexBuilder();
    Set<String> leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0));
    final List<RexNode> newLeftConditions = getNotNullConditions(cluster, rexBuilder, join.getLeft(), joinLeftKeyPositions, leftPushedPredicates);
    Set<String> rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1));
    final List<RexNode> newRightConditions = getNotNullConditions(cluster, rexBuilder, join.getRight(), joinRightKeyPositions, rightPushedPredicates);
    // Nothing will be added to the expression
    RexNode newLeftPredicate = RexUtil.composeConjunction(rexBuilder, newLeftConditions, false);
    RexNode newRightPredicate = RexUtil.composeConjunction(rexBuilder, newRightConditions, false);
    if (newLeftPredicate.isAlwaysTrue() && newRightPredicate.isAlwaysTrue()) {
        return;
    }
    if (!newLeftPredicate.isAlwaysTrue()) {
        RelNode curr = lChild;
        lChild = filterFactory.createFilter(lChild, newLeftPredicate);
        call.getPlanner().onCopy(curr, lChild);
    }
    if (!newRightPredicate.isAlwaysTrue()) {
        RelNode curr = rChild;
        rChild = filterFactory.createFilter(rChild, newRightPredicate);
        call.getPlanner().onCopy(curr, rChild);
    }
    Join newJoin = join.copy(join.getTraitSet(), join.getCondition(), lChild, rChild, join.getJoinType(), join.isSemiJoinDone());
    call.getPlanner().onCopy(join, newJoin);
    // Register information about created predicates
    registry.getPushedPredicates(newJoin, 0).addAll(leftPushedPredicates);
    registry.getPushedPredicates(newJoin, 1).addAll(rightPushedPredicates);
    call.transformTo(newJoin);
}
Also used : RelOptCluster(org.apache.calcite.plan.RelOptCluster) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) Join(org.apache.calcite.rel.core.Join) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) JoinLeafPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo) RelNode(org.apache.calcite.rel.RelNode) JoinPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo) RexBuilder(org.apache.calcite.rex.RexBuilder) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) HashSet(java.util.HashSet) RexNode(org.apache.calcite.rex.RexNode)

Example 3 with JoinPredicateInfo

use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo in project hive by apache.

the class HiveJoinToMultiJoinRule method isCombinableJoin.

/*
   * Returns true if the join conditions execute over the same keys
   */
private static boolean isCombinableJoin(HiveJoin join, HiveJoin leftChildJoin) throws CalciteSemanticException {
    final JoinPredicateInfo joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join, join.getCondition());
    final JoinPredicateInfo leftChildJoinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(leftChildJoin, leftChildJoin.getCondition());
    return isCombinablePredicate(joinPredInfo, leftChildJoinPredInfo, leftChildJoin.getInputs().size());
}
Also used : JoinPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo)

Example 4 with JoinPredicateInfo

use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo in project hive by apache.

the class HiveInsertExchange4JoinRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    JoinPredicateInfo joinPredInfo;
    if (call.rel(0) instanceof HiveMultiJoin) {
        HiveMultiJoin multiJoin = call.rel(0);
        joinPredInfo = multiJoin.getJoinPredicateInfo();
    } else if (call.rel(0) instanceof HiveJoin) {
        HiveJoin hiveJoin = call.rel(0);
        joinPredInfo = hiveJoin.getJoinPredicateInfo();
    } else if (call.rel(0) instanceof Join) {
        Join join = call.rel(0);
        try {
            joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join);
        } catch (CalciteSemanticException e) {
            throw new RuntimeException(e);
        }
    } else {
        return;
    }
    for (RelNode child : call.rel(0).getInputs()) {
        if (((HepRelVertex) child).getCurrentRel() instanceof Exchange) {
            return;
        }
    }
    // Get key columns from inputs. Those are the columns on which we will distribute on.
    // It is also the columns we will sort on.
    List<RelNode> newInputs = new ArrayList<RelNode>();
    for (int i = 0; i < call.rel(0).getInputs().size(); i++) {
        List<Integer> joinKeyPositions = new ArrayList<Integer>();
        ImmutableList.Builder<RexNode> joinExprsBuilder = new ImmutableList.Builder<RexNode>();
        Set<String> keySet = Sets.newHashSet();
        ImmutableList.Builder<RelFieldCollation> collationListBuilder = new ImmutableList.Builder<RelFieldCollation>();
        for (int j = 0; j < joinPredInfo.getEquiJoinPredicateElements().size(); j++) {
            JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.getEquiJoinPredicateElements().get(j);
            for (RexNode joinExprNode : joinLeafPredInfo.getJoinExprs(i)) {
                if (keySet.add(joinExprNode.toString())) {
                    joinExprsBuilder.add(joinExprNode);
                }
            }
            for (int pos : joinLeafPredInfo.getProjsJoinKeysInChildSchema(i)) {
                if (!joinKeyPositions.contains(pos)) {
                    joinKeyPositions.add(pos);
                    collationListBuilder.add(new RelFieldCollation(pos));
                }
            }
        }
        HiveSortExchange exchange = HiveSortExchange.create(call.rel(0).getInput(i), new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, joinKeyPositions), new HiveRelCollation(collationListBuilder.build()), joinExprsBuilder.build());
        newInputs.add(exchange);
    }
    RelNode newOp;
    if (call.rel(0) instanceof HiveMultiJoin) {
        HiveMultiJoin multiJoin = call.rel(0);
        newOp = multiJoin.copy(multiJoin.getTraitSet(), newInputs);
    } else if (call.rel(0) instanceof Join) {
        Join join = call.rel(0);
        newOp = join.copy(join.getTraitSet(), join.getCondition(), newInputs.get(0), newInputs.get(1), join.getJoinType(), join.isSemiJoinDone());
    } else {
        return;
    }
    call.getPlanner().onCopy(call.rel(0), newOp);
    call.transformTo(newOp);
}
Also used : HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) HiveSortExchange(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Join(org.apache.calcite.rel.core.Join) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) Exchange(org.apache.calcite.rel.core.Exchange) HiveSortExchange(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange) HiveRelDistribution(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution) JoinLeafPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo) HiveRelCollation(org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelCollation) RelNode(org.apache.calcite.rel.RelNode) JoinPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) RexNode(org.apache.calcite.rex.RexNode)

Example 5 with JoinPredicateInfo

use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo in project hive by apache.

the class HiveJoinToMultiJoinRule method mergeJoin.

// This method tries to merge the join with its left child. The left
// child should be a join for this to happen.
private static RelNode mergeJoin(HiveJoin join, RelNode left, RelNode right) {
    final RexBuilder rexBuilder = join.getCluster().getRexBuilder();
    // We check whether the join can be combined with any of its children
    final List<RelNode> newInputs = Lists.newArrayList();
    final List<RexNode> newJoinCondition = Lists.newArrayList();
    final List<Pair<Integer, Integer>> joinInputs = Lists.newArrayList();
    final List<JoinRelType> joinTypes = Lists.newArrayList();
    final List<RexNode> joinFilters = Lists.newArrayList();
    // Left child
    if (left instanceof HiveJoin || left instanceof HiveMultiJoin) {
        final RexNode leftCondition;
        final List<Pair<Integer, Integer>> leftJoinInputs;
        final List<JoinRelType> leftJoinTypes;
        final List<RexNode> leftJoinFilters;
        boolean combinable;
        if (left instanceof HiveJoin) {
            HiveJoin hj = (HiveJoin) left;
            leftCondition = hj.getCondition();
            leftJoinInputs = ImmutableList.of(Pair.of(0, 1));
            leftJoinTypes = ImmutableList.of(hj.getJoinType());
            leftJoinFilters = ImmutableList.of(hj.getJoinFilter());
            try {
                combinable = isCombinableJoin(join, hj);
            } catch (CalciteSemanticException e) {
                LOG.trace("Failed to merge join-join", e);
                combinable = false;
            }
        } else {
            HiveMultiJoin hmj = (HiveMultiJoin) left;
            leftCondition = hmj.getCondition();
            leftJoinInputs = hmj.getJoinInputs();
            leftJoinTypes = hmj.getJoinTypes();
            leftJoinFilters = hmj.getJoinFilters();
            try {
                combinable = isCombinableJoin(join, hmj);
            } catch (CalciteSemanticException e) {
                LOG.trace("Failed to merge join-multijoin", e);
                combinable = false;
            }
        }
        if (combinable) {
            newJoinCondition.add(leftCondition);
            for (int i = 0; i < leftJoinInputs.size(); i++) {
                joinInputs.add(leftJoinInputs.get(i));
                joinTypes.add(leftJoinTypes.get(i));
                joinFilters.add(leftJoinFilters.get(i));
            }
            newInputs.addAll(left.getInputs());
        } else {
            // The join operation in the child is not on the same keys
            return null;
        }
    } else {
        // The left child is not a join or multijoin operator
        return null;
    }
    final int numberLeftInputs = newInputs.size();
    // Right child
    newInputs.add(right);
    // If we cannot combine any of the children, we bail out
    newJoinCondition.add(join.getCondition());
    if (newJoinCondition.size() == 1) {
        return null;
    }
    final List<RelDataTypeField> systemFieldList = ImmutableList.of();
    List<List<RexNode>> joinKeyExprs = new ArrayList<List<RexNode>>();
    List<Integer> filterNulls = new ArrayList<Integer>();
    for (int i = 0; i < newInputs.size(); i++) {
        joinKeyExprs.add(new ArrayList<RexNode>());
    }
    RexNode filters;
    try {
        filters = HiveRelOptUtil.splitHiveJoinCondition(systemFieldList, newInputs, join.getCondition(), joinKeyExprs, filterNulls, null);
    } catch (CalciteSemanticException e) {
        LOG.trace("Failed to merge joins", e);
        return null;
    }
    ImmutableBitSet.Builder keysInInputsBuilder = ImmutableBitSet.builder();
    for (int i = 0; i < newInputs.size(); i++) {
        List<RexNode> partialCondition = joinKeyExprs.get(i);
        if (!partialCondition.isEmpty()) {
            keysInInputsBuilder.set(i);
        }
    }
    // If we cannot merge, we bail out
    ImmutableBitSet keysInInputs = keysInInputsBuilder.build();
    ImmutableBitSet leftReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs));
    ImmutableBitSet rightReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs, newInputs.size()));
    if (join.getJoinType() != JoinRelType.INNER && (leftReferencedInputs.cardinality() > 1 || rightReferencedInputs.cardinality() > 1)) {
        return null;
    }
    // Otherwise, we add to the join specs
    if (join.getJoinType() != JoinRelType.INNER) {
        int leftInput = keysInInputs.nextSetBit(0);
        int rightInput = keysInInputs.nextSetBit(numberLeftInputs);
        joinInputs.add(Pair.of(leftInput, rightInput));
        joinTypes.add(join.getJoinType());
        joinFilters.add(filters);
    } else {
        for (int i : leftReferencedInputs) {
            for (int j : rightReferencedInputs) {
                joinInputs.add(Pair.of(i, j));
                joinTypes.add(join.getJoinType());
                joinFilters.add(filters);
            }
        }
    }
    // We can now create a multijoin operator
    RexNode newCondition = RexUtil.flatten(rexBuilder, RexUtil.composeConjunction(rexBuilder, newJoinCondition, false));
    List<RelNode> newInputsArray = Lists.newArrayList(newInputs);
    JoinPredicateInfo joinPredInfo = null;
    try {
        joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(newInputsArray, systemFieldList, newCondition);
    } catch (CalciteSemanticException e) {
        throw new RuntimeException(e);
    }
    // If the number of joins < number of input tables-1, this is not a star join.
    if (joinPredInfo.getEquiJoinPredicateElements().size() < newInputs.size() - 1) {
        return null;
    }
    // Validate that the multi-join is a valid star join before returning it.
    for (int i = 0; i < newInputs.size(); i++) {
        List<RexNode> joinKeys = null;
        for (int j = 0; j < joinPredInfo.getEquiJoinPredicateElements().size(); j++) {
            List<RexNode> currJoinKeys = joinPredInfo.getEquiJoinPredicateElements().get(j).getJoinExprs(i);
            if (currJoinKeys.isEmpty()) {
                continue;
            }
            if (joinKeys == null) {
                joinKeys = currJoinKeys;
            } else {
                // Bail out if this is the case.
                if (!joinKeys.containsAll(currJoinKeys) || !currJoinKeys.containsAll(joinKeys)) {
                    return null;
                }
            }
        }
    }
    return new HiveMultiJoin(join.getCluster(), newInputsArray, newCondition, join.getRowType(), joinInputs, joinTypes, joinFilters, joinPredInfo);
}
Also used : HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) RexBuilder(org.apache.calcite.rex.RexBuilder) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Pair(org.apache.calcite.util.Pair) JoinRelType(org.apache.calcite.rel.core.JoinRelType) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) JoinPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

JoinPredicateInfo (org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo)7 RexNode (org.apache.calcite.rex.RexNode)4 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)4 HiveJoin (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin)4 ArrayList (java.util.ArrayList)3 RelNode (org.apache.calcite.rel.RelNode)3 JoinLeafPredicateInfo (org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo)3 ImmutableList (com.google.common.collect.ImmutableList)2 Join (org.apache.calcite.rel.core.Join)2 RexBuilder (org.apache.calcite.rex.RexBuilder)2 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)2 HiveMultiJoin (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 RelOptCluster (org.apache.calcite.plan.RelOptCluster)1 RelFieldCollation (org.apache.calcite.rel.RelFieldCollation)1 Exchange (org.apache.calcite.rel.core.Exchange)1 JoinRelType (org.apache.calcite.rel.core.JoinRelType)1 SemiJoin (org.apache.calcite.rel.core.SemiJoin)1