Examples with HiveJoin - org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin

Example 1 with HiveJoin

use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin in project hive by apache.

the class HiveRelMdSelectivity method computeInnerJoinSelectivity.

private Double computeInnerJoinSelectivity(Join j, RelMetadataQuery mq, RexNode predicate) {
    Pair<Boolean, RexNode> predInfo = getCombinedPredicateForJoin(j, predicate);
    if (!predInfo.getKey()) {
        return new FilterSelectivityEstimator(j).estimateSelectivity(predInfo.getValue());
    }
    RexNode combinedPredicate = predInfo.getValue();
    JoinPredicateInfo jpi;
    try {
        jpi = JoinPredicateInfo.constructJoinPredicateInfo(j, combinedPredicate);
    } catch (CalciteSemanticException e) {
        throw new RuntimeException(e);
    }
    ImmutableMap.Builder<Integer, Double> colStatMapBuilder = ImmutableMap.builder();
    ImmutableMap<Integer, Double> colStatMap;
    int rightOffSet = j.getLeft().getRowType().getFieldCount();
    // Join which are part of join keys
    for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
        colStatMapBuilder.put(ljk, HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), mq, ljk));
    }
    // Join which are part of join keys
    for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
        colStatMapBuilder.put(rjk + rightOffSet, HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), mq, rjk));
    }
    colStatMap = colStatMapBuilder.build();
    // 3. Walk through the Join Condition Building NDV for selectivity
    // NDV of the join can not exceed the cardinality of cross join.
    List<JoinLeafPredicateInfo> peLst = jpi.getEquiJoinPredicateElements();
    int noOfPE = peLst.size();
    double ndvCrossProduct = 1;
    if (noOfPE > 0) {
        ndvCrossProduct = exponentialBackoff(peLst, colStatMap);
        if (j instanceof SemiJoin) {
            ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()), ndvCrossProduct);
        } else if (j instanceof HiveJoin) {
            ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()) * mq.getRowCount(j.getRight()), ndvCrossProduct);
        } else {
            throw new RuntimeException("Unexpected Join type: " + j.getClass().getName());
        }
    }
    // 4. Join Selectivity = 1/NDV
    return (1 / ndvCrossProduct);
}

Also used : HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) ImmutableMap(com.google.common.collect.ImmutableMap) JoinLeafPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo) JoinPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo) SemiJoin(org.apache.calcite.rel.core.SemiJoin) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) RexNode(org.apache.calcite.rex.RexNode)

Example 2 with HiveJoin

use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin in project hive by apache.

the class HiveOpConverter method translateJoin.

private OpAttr translateJoin(RelNode joinRel) throws SemanticException {
    // 0. Additional data structures needed for the join optimization
    // through Hive
    String[] baseSrc = new String[joinRel.getInputs().size()];
    String tabAlias = getHiveDerivedTableAlias();
    // 1. Convert inputs
    OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()];
    List<Operator<?>> children = new ArrayList<Operator<?>>(joinRel.getInputs().size());
    for (int i = 0; i < inputs.length; i++) {
        inputs[i] = dispatch(joinRel.getInput(i));
        children.add(inputs[i].inputs.get(0));
        baseSrc[i] = inputs[i].tabAlias;
    }
    // 2. Generate tags
    for (int tag = 0; tag < children.size(); tag++) {
        ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator) children.get(tag);
        reduceSinkOp.getConf().setTag(tag);
    }
    // 3. Virtual columns
    Set<Integer> newVcolsInCalcite = new HashSet<Integer>();
    newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite);
    if (joinRel instanceof HiveMultiJoin || !(joinRel instanceof SemiJoin)) {
        int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
        for (int i = 1; i < inputs.length; i++) {
            newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift));
            shift += inputs[i].inputs.get(0).getSchema().getSignature().size();
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName() + " with row type: [" + joinRel.getRowType() + "]");
    }
    // 4. Extract join key expressions from HiveSortExchange
    ExprNodeDesc[][] joinExpressions = new ExprNodeDesc[inputs.length][];
    for (int i = 0; i < inputs.length; i++) {
        joinExpressions[i] = ((HiveSortExchange) joinRel.getInput(i)).getJoinExpressions();
    }
    // 5. Extract rest of join predicate info. We infer the rest of join condition
    //    that will be added to the filters (join conditions that are not part of
    //    the join key)
    List<RexNode> joinFilters;
    if (joinRel instanceof HiveJoin) {
        joinFilters = ImmutableList.of(((HiveJoin) joinRel).getJoinFilter());
    } else if (joinRel instanceof HiveMultiJoin) {
        joinFilters = ((HiveMultiJoin) joinRel).getJoinFilters();
    } else if (joinRel instanceof HiveSemiJoin) {
        joinFilters = ImmutableList.of(((HiveSemiJoin) joinRel).getJoinFilter());
    } else {
        throw new SemanticException("Can't handle join type: " + joinRel.getClass().getName());
    }
    List<List<ExprNodeDesc>> filterExpressions = Lists.newArrayList();
    for (int i = 0; i < joinFilters.size(); i++) {
        List<ExprNodeDesc> filterExpressionsForInput = new ArrayList<ExprNodeDesc>();
        if (joinFilters.get(i) != null) {
            for (RexNode conj : RelOptUtil.conjunctions(joinFilters.get(i))) {
                ExprNodeDesc expr = convertToExprNode(conj, joinRel, null, newVcolsInCalcite);
                filterExpressionsForInput.add(expr);
            }
        }
        filterExpressions.add(filterExpressionsForInput);
    }
    // 6. Generate Join operator
    JoinOperator joinOp = genJoin(joinRel, joinExpressions, filterExpressions, children, baseSrc, tabAlias);
    // 7. Return result
    return new OpAttr(tabAlias, newVcolsInCalcite, joinOp);
}

Also used : ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) SelectOperator(org.apache.hadoop.hive.ql.exec.SelectOperator) FilterOperator(org.apache.hadoop.hive.ql.exec.FilterOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) LimitOperator(org.apache.hadoop.hive.ql.exec.LimitOperator) JoinOperator(org.apache.hadoop.hive.ql.exec.JoinOperator) HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ArrayList(java.util.ArrayList) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) SemiJoin(org.apache.calcite.rel.core.SemiJoin) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) HashSet(java.util.HashSet) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) ReduceSinkOperator(org.apache.hadoop.hive.ql.exec.ReduceSinkOperator) HiveSemiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin) RexNode(org.apache.calcite.rex.RexNode)

Example 3 with HiveJoin

use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin in project hive by apache.

the class HiveJoinToMultiJoinRule method onMatch.

//~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
    final HiveJoin join = call.rel(0);
    final RelNode left = call.rel(1);
    final RelNode right = call.rel(2);
    // 1. We try to merge this join with the left child
    RelNode multiJoin = mergeJoin(join, left, right);
    if (multiJoin != null) {
        call.transformTo(multiJoin);
        return;
    }
    // 2. If we cannot, we swap the inputs so we can try
    //    to merge it with its right child
    RelNode swapped = JoinCommuteRule.swap(join, true);
    assert swapped != null;
    //    The result of the swapping operation is either
    //    i)  a Project or,
    //    ii) if the project is trivial, a raw join
    final HiveJoin newJoin;
    Project topProject = null;
    if (swapped instanceof HiveJoin) {
        newJoin = (HiveJoin) swapped;
    } else {
        topProject = (Project) swapped;
        newJoin = (HiveJoin) swapped.getInput(0);
    }
    // 3. We try to merge the join with the right child
    multiJoin = mergeJoin(newJoin, right, left);
    if (multiJoin != null) {
        if (topProject != null) {
            multiJoin = projectFactory.createProject(multiJoin, topProject.getChildExps(), topProject.getRowType().getFieldNames());
        }
        call.transformTo(multiJoin);
        return;
    }
}

Also used : Project(org.apache.calcite.rel.core.Project) RelNode(org.apache.calcite.rel.RelNode) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin)

Example 4 with HiveJoin

use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin in project hive by apache.

the class HiveSortJoinReduceRule method matches.

//~ Methods ----------------------------------------------------------------
@Override
public boolean matches(RelOptRuleCall call) {
    final HiveSortLimit sortLimit = call.rel(0);
    final HiveJoin join = call.rel(1);
    // If sort does not contain a limit operation or limit is 0, we bail out
    if (!HiveCalciteUtil.limitRelNode(sortLimit) || RexLiteral.intValue(sortLimit.fetch) == 0) {
        return false;
    }
    // 1) If join is not a left or right outer, we bail out
    // 2) If any sort column is not part of the input where the
    // sort is pushed, we bail out
    RelNode reducedInput;
    if (join.getJoinType() == JoinRelType.LEFT) {
        reducedInput = join.getLeft();
        if (sortLimit.getCollation() != RelCollations.EMPTY) {
            for (RelFieldCollation relFieldCollation : sortLimit.getCollation().getFieldCollations()) {
                if (relFieldCollation.getFieldIndex() >= join.getLeft().getRowType().getFieldCount()) {
                    return false;
                }
            }
        }
    } else if (join.getJoinType() == JoinRelType.RIGHT) {
        reducedInput = join.getRight();
        if (sortLimit.getCollation() != RelCollations.EMPTY) {
            for (RelFieldCollation relFieldCollation : sortLimit.getCollation().getFieldCollations()) {
                if (relFieldCollation.getFieldIndex() < join.getLeft().getRowType().getFieldCount()) {
                    return false;
                }
            }
        }
    } else {
        return false;
    }
    // Finally, if we do not reduce the input size, we bail out
    final int offset = sortLimit.offset == null ? 0 : RexLiteral.intValue(sortLimit.offset);
    if (offset + RexLiteral.intValue(sortLimit.fetch) >= RelMetadataQuery.instance().getRowCount(reducedInput)) {
        return false;
    }
    return true;
}

Also used : RelNode(org.apache.calcite.rel.RelNode) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) HiveSortLimit(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin)

Example 5 with HiveJoin

use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin in project hive by apache.

the class HiveSortJoinReduceRule method onMatch.

@Override
public void onMatch(RelOptRuleCall call) {
    final HiveSortLimit sortLimit = call.rel(0);
    final HiveJoin join = call.rel(1);
    RelNode inputLeft = join.getLeft();
    RelNode inputRight = join.getRight();
    // We create a new sort operator on the corresponding input
    if (join.getJoinType() == JoinRelType.LEFT) {
        inputLeft = sortLimit.copy(sortLimit.getTraitSet(), inputLeft, sortLimit.getCollation(), sortLimit.offset, sortLimit.fetch);
        ((HiveSortLimit) inputLeft).setRuleCreated(true);
    } else {
        // Adjust right collation
        final RelCollation rightCollation = RelCollationTraitDef.INSTANCE.canonize(RelCollations.shift(sortLimit.getCollation(), -join.getLeft().getRowType().getFieldCount()));
        inputRight = sortLimit.copy(sortLimit.getTraitSet().replace(rightCollation), inputRight, rightCollation, sortLimit.offset, sortLimit.fetch);
        ((HiveSortLimit) inputRight).setRuleCreated(true);
    }
    // We copy the join and the top sort operator
    RelNode result = join.copy(join.getTraitSet(), join.getCondition(), inputLeft, inputRight, join.getJoinType(), join.isSemiJoinDone());
    result = sortLimit.copy(sortLimit.getTraitSet(), result, sortLimit.getCollation(), sortLimit.offset, sortLimit.fetch);
    call.transformTo(result);
}

Also used : RelCollation(org.apache.calcite.rel.RelCollation) RelNode(org.apache.calcite.rel.RelNode) HiveSortLimit(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin)

Aggregations

HiveJoin (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin)7 RelNode (org.apache.calcite.rel.RelNode)5 RexNode (org.apache.calcite.rex.RexNode)4 ImmutableList (com.google.common.collect.ImmutableList)3 ArrayList (java.util.ArrayList)3 CalciteSemanticException (org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException)3 JoinPredicateInfo (org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo)3 HiveMultiJoin (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin)3 List (java.util.List)2 RelFieldCollation (org.apache.calcite.rel.RelFieldCollation)2 SemiJoin (org.apache.calcite.rel.core.SemiJoin)2 JoinLeafPredicateInfo (org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinLeafPredicateInfo)2 HiveSortLimit (org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 HashSet (java.util.HashSet)1 RelCollation (org.apache.calcite.rel.RelCollation)1 Exchange (org.apache.calcite.rel.core.Exchange)1 Join (org.apache.calcite.rel.core.Join)1 JoinRelType (org.apache.calcite.rel.core.JoinRelType)1 Project (org.apache.calcite.rel.core.Project)1