Search in sources :

Example 1 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class MapJoinProcessor method getBigTableCandidates.

/**
 * Get a list of big table candidates. Only the tables in the returned set can
 * be used as big table in the join operation.
 *
 * The logic here is to scan the join condition array from left to right. If
 * see a inner join, and the bigTableCandidates is empty or the outer join
 * that we last saw is a right outer join, add both side of this inner join to
 * big table candidates only if they are not in bad position. If see a left
 * outer join, set lastSeenRightOuterJoin to false, and the bigTableCandidates
 * is empty, add the left side to it, and if the bigTableCandidates is not
 * empty, do nothing (which means the bigTableCandidates is from left side).
 * If see a right outer join, set lastSeenRightOuterJoin to true, clear the
 * bigTableCandidates, and add right side to the bigTableCandidates, it means
 * the right side of a right outer join always win. If see a full outer join,
 * return empty set immediately (no one can be the big table, can not do a
 * mapjoin).
 *
 * @param condns
 * @return set of big table candidates
 */
public static Set<Integer> getBigTableCandidates(JoinCondDesc[] condns) {
    Set<Integer> bigTableCandidates = new HashSet<Integer>();
    boolean seenOuterJoin = false;
    Set<Integer> seenPostitions = new HashSet<Integer>();
    Set<Integer> leftPosListOfLastRightOuterJoin = new HashSet<Integer>();
    // is the outer join that we saw most recently is a right outer join?
    boolean lastSeenRightOuterJoin = false;
    for (JoinCondDesc condn : condns) {
        int joinType = condn.getType();
        seenPostitions.add(condn.getLeft());
        seenPostitions.add(condn.getRight());
        if (joinType == JoinDesc.FULL_OUTER_JOIN) {
            // setting these 2 parameters here just in case that if the code got
            // changed in future, these 2 are not missing.
            seenOuterJoin = true;
            lastSeenRightOuterJoin = false;
            // empty set - cannot convert
            return new HashSet<Integer>();
        } else if (joinType == JoinDesc.LEFT_OUTER_JOIN || joinType == JoinDesc.LEFT_SEMI_JOIN) {
            seenOuterJoin = true;
            if (bigTableCandidates.size() == 0) {
                bigTableCandidates.add(condn.getLeft());
            }
            lastSeenRightOuterJoin = false;
        } else if (joinType == JoinDesc.RIGHT_OUTER_JOIN) {
            seenOuterJoin = true;
            lastSeenRightOuterJoin = true;
            // add all except the right side to the bad positions
            leftPosListOfLastRightOuterJoin.clear();
            leftPosListOfLastRightOuterJoin.addAll(seenPostitions);
            leftPosListOfLastRightOuterJoin.remove(condn.getRight());
            bigTableCandidates.clear();
            bigTableCandidates.add(condn.getRight());
        } else if (joinType == JoinDesc.INNER_JOIN) {
            if (!seenOuterJoin || lastSeenRightOuterJoin) {
                // is the left was at the left side of a right outer join?
                if (!leftPosListOfLastRightOuterJoin.contains(condn.getLeft())) {
                    bigTableCandidates.add(condn.getLeft());
                }
                // is the right was at the left side of a right outer join?
                if (!leftPosListOfLastRightOuterJoin.contains(condn.getRight())) {
                    bigTableCandidates.add(condn.getRight());
                }
            }
        }
    }
    return bigTableCandidates;
}
Also used : JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) HashSet(java.util.HashSet)

Example 2 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class ConvertJoinMapJoin method checkAndConvertSMBJoin.

@SuppressWarnings("unchecked")
private Object checkAndConvertSMBJoin(OptimizeTezProcContext context, JoinOperator joinOp, TezBucketJoinProcCtx tezBucketJoinProcCtx, final long maxSize) throws SemanticException {
    // map join either based on the size. Check if we can convert to SMB join.
    if (!(HiveConf.getBoolVar(context.conf, ConfVars.HIVE_AUTO_SORTMERGE_JOIN)) || ((!HiveConf.getBoolVar(context.conf, ConfVars.HIVE_AUTO_SORTMERGE_JOIN_REDUCE)) && joinOp.getOpTraits().getNumReduceSinks() >= 2)) {
        fallbackToReduceSideJoin(joinOp, context, maxSize);
        return null;
    }
    Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null;
    try {
        String selector = HiveConf.getVar(context.parseContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
        bigTableMatcherClass = JavaUtils.loadClass(selector);
    } catch (ClassNotFoundException e) {
        throw new SemanticException(e.getMessage());
    }
    BigTableSelectorForAutoSMJ bigTableMatcher = ReflectionUtils.newInstance(bigTableMatcherClass, null);
    JoinDesc joinDesc = joinOp.getConf();
    JoinCondDesc[] joinCondns = joinDesc.getConds();
    Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
    if (joinCandidates.isEmpty()) {
        // of any type. So return false.
        return false;
    }
    int mapJoinConversionPos = bigTableMatcher.getBigTablePosition(context.parseContext, joinOp, joinCandidates);
    if (mapJoinConversionPos < 0) {
        // contains aliases from sub-query
        // we are just converting to a common merge join operator. The shuffle
        // join in map-reduce case.
        fallbackToReduceSideJoin(joinOp, context, maxSize);
        return null;
    }
    if (checkConvertJoinSMBJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx)) {
        convertJoinSMBJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx.getNumBuckets(), true);
    } else {
        // we are just converting to a common merge join operator. The shuffle
        // join in map-reduce case.
        fallbackToReduceSideJoin(joinOp, context, maxSize);
    }
    return null;
}
Also used : MapJoinDesc(org.apache.hadoop.hive.ql.plan.MapJoinDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) CommonMergeJoinDesc(org.apache.hadoop.hive.ql.plan.CommonMergeJoinDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 3 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class MapJoinProcessor method getBigTableCandidates.

public static Set<Integer> getBigTableCandidates(JoinCondDesc[] condns, boolean isSupportFullOuter) {
    Set<Integer> bigTableCandidates = new HashSet<Integer>();
    if (condns.length == 1) {
        JoinCondDesc condn = condns[0];
        if (condn.getType() == JoinDesc.FULL_OUTER_JOIN) {
            if (!isSupportFullOuter) {
                return new HashSet<Integer>();
            }
            // FULL OUTER MapJoin must be a single condition.
            bigTableCandidates.add(condn.getLeft());
            bigTableCandidates.add(condn.getRight());
            return bigTableCandidates;
        }
    }
    boolean seenOuterJoin = false;
    Set<Integer> seenPostitions = new HashSet<Integer>();
    Set<Integer> leftPosListOfLastRightOuterJoin = new HashSet<Integer>();
    // is the outer join that we saw most recently is a right outer join?
    boolean lastSeenRightOuterJoin = false;
    for (JoinCondDesc condn : condns) {
        int joinType = condn.getType();
        if (joinType == JoinDesc.FULL_OUTER_JOIN) {
            return new HashSet<Integer>();
        }
        seenPostitions.add(condn.getLeft());
        seenPostitions.add(condn.getRight());
        if (joinType == JoinDesc.LEFT_OUTER_JOIN || joinType == JoinDesc.LEFT_SEMI_JOIN || joinType == JoinDesc.ANTI_JOIN) {
            seenOuterJoin = true;
            if (bigTableCandidates.size() == 0) {
                bigTableCandidates.add(condn.getLeft());
            }
            lastSeenRightOuterJoin = false;
        } else if (joinType == JoinDesc.RIGHT_OUTER_JOIN) {
            seenOuterJoin = true;
            lastSeenRightOuterJoin = true;
            // add all except the right side to the bad positions
            leftPosListOfLastRightOuterJoin.clear();
            leftPosListOfLastRightOuterJoin.addAll(seenPostitions);
            leftPosListOfLastRightOuterJoin.remove(condn.getRight());
            bigTableCandidates.clear();
            bigTableCandidates.add(condn.getRight());
        } else if (joinType == JoinDesc.INNER_JOIN) {
            if (!seenOuterJoin || lastSeenRightOuterJoin) {
                // is the left was at the left side of a right outer join?
                if (!leftPosListOfLastRightOuterJoin.contains(condn.getLeft())) {
                    bigTableCandidates.add(condn.getLeft());
                }
                // is the right was at the left side of a right outer join?
                if (!leftPosListOfLastRightOuterJoin.contains(condn.getRight())) {
                    bigTableCandidates.add(condn.getRight());
                }
            }
        }
    }
    return bigTableCandidates;
}
Also used : JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc) HashSet(java.util.HashSet)

Example 4 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class TopNKeyPushdownProcessor method pushDownThroughJoin.

// Only push down through Left Outer Join is supported.
// Right and Full Outer Join support will be added in a follow up patch.
private void pushDownThroughJoin(TopNKeyOperator topNKey) throws SemanticException {
    CommonJoinOperator<? extends JoinDesc> parent = (CommonJoinOperator<? extends JoinDesc>) topNKey.getParentOperators().get(0);
    JoinDesc joinDesc = parent.getConf();
    JoinCondDesc[] joinConds = joinDesc.getConds();
    JoinCondDesc firstJoinCond = joinConds[0];
    for (JoinCondDesc joinCond : joinConds) {
        if (!firstJoinCond.equals(joinCond)) {
            return;
        }
    }
    if (firstJoinCond.getType() == JoinDesc.LEFT_OUTER_JOIN) {
        pushdownThroughLeftOuterJoin(topNKey);
    } else if (firstJoinCond.getType() == JoinDesc.INNER_JOIN && joinDesc.isPkFkJoin()) {
        pushdownInnerJoin(topNKey, joinDesc.getFkJoinTableIndex(), joinDesc.isNonFkSideIsFiltered());
    }
}
Also used : CommonJoinOperator(org.apache.hadoop.hive.ql.exec.CommonJoinOperator) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc)

Example 5 with JoinCondDesc

use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.

the class PredicateTransitivePropagate method getTargets.

// calculate filter propagation directions for each alias
// L<->R for inner/semi join, L->R for left outer join, R->L for right outer join
public static int[][] getTargets(CommonJoinOperator<JoinDesc> join) {
    JoinCondDesc[] conds = join.getConf().getConds();
    int aliases = conds.length + 1;
    Vectors vector = new Vectors(aliases);
    for (JoinCondDesc cond : conds) {
        int left = cond.getLeft();
        int right = cond.getRight();
        switch(cond.getType()) {
            case JoinDesc.INNER_JOIN:
            case JoinDesc.LEFT_SEMI_JOIN:
                vector.add(left, right);
                vector.add(right, left);
                break;
            case JoinDesc.LEFT_OUTER_JOIN:
            case JoinDesc.ANTI_JOIN:
                vector.add(left, right);
                break;
            case JoinDesc.RIGHT_OUTER_JOIN:
                vector.add(right, left);
                break;
            case JoinDesc.FULL_OUTER_JOIN:
                break;
        }
    }
    int[][] result = new int[aliases][];
    for (int pos = 0; pos < aliases; pos++) {
        // find all targets recursively
        result[pos] = vector.traverse(pos);
    }
    return result;
}
Also used : JoinCondDesc(org.apache.hadoop.hive.ql.plan.JoinCondDesc)

Aggregations

JoinCondDesc (org.apache.hadoop.hive.ql.plan.JoinCondDesc)22 JoinDesc (org.apache.hadoop.hive.ql.plan.JoinDesc)12 MapJoinDesc (org.apache.hadoop.hive.ql.plan.MapJoinDesc)11 ArrayList (java.util.ArrayList)10 List (java.util.List)9 HashMap (java.util.HashMap)8 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)8 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)7 Operator (org.apache.hadoop.hive.ql.exec.Operator)7 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)7 HashSet (java.util.HashSet)6 LinkedHashMap (java.util.LinkedHashMap)5 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)5 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)5 ReduceSinkDesc (org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)5 Set (java.util.Set)4 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)4 SMBMapJoinOperator (org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator)4 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)4