use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.
the class MapJoinProcessor method getBigTableCandidates.
/**
* Get a list of big table candidates. Only the tables in the returned set can
* be used as big table in the join operation.
*
* The logic here is to scan the join condition array from left to right. If
* see a inner join, and the bigTableCandidates is empty or the outer join
* that we last saw is a right outer join, add both side of this inner join to
* big table candidates only if they are not in bad position. If see a left
* outer join, set lastSeenRightOuterJoin to false, and the bigTableCandidates
* is empty, add the left side to it, and if the bigTableCandidates is not
* empty, do nothing (which means the bigTableCandidates is from left side).
* If see a right outer join, set lastSeenRightOuterJoin to true, clear the
* bigTableCandidates, and add right side to the bigTableCandidates, it means
* the right side of a right outer join always win. If see a full outer join,
* return empty set immediately (no one can be the big table, can not do a
* mapjoin).
*
* @param condns
* @return set of big table candidates
*/
public static Set<Integer> getBigTableCandidates(JoinCondDesc[] condns) {
Set<Integer> bigTableCandidates = new HashSet<Integer>();
boolean seenOuterJoin = false;
Set<Integer> seenPostitions = new HashSet<Integer>();
Set<Integer> leftPosListOfLastRightOuterJoin = new HashSet<Integer>();
// is the outer join that we saw most recently is a right outer join?
boolean lastSeenRightOuterJoin = false;
for (JoinCondDesc condn : condns) {
int joinType = condn.getType();
seenPostitions.add(condn.getLeft());
seenPostitions.add(condn.getRight());
if (joinType == JoinDesc.FULL_OUTER_JOIN) {
// setting these 2 parameters here just in case that if the code got
// changed in future, these 2 are not missing.
seenOuterJoin = true;
lastSeenRightOuterJoin = false;
// empty set - cannot convert
return new HashSet<Integer>();
} else if (joinType == JoinDesc.LEFT_OUTER_JOIN || joinType == JoinDesc.LEFT_SEMI_JOIN) {
seenOuterJoin = true;
if (bigTableCandidates.size() == 0) {
bigTableCandidates.add(condn.getLeft());
}
lastSeenRightOuterJoin = false;
} else if (joinType == JoinDesc.RIGHT_OUTER_JOIN) {
seenOuterJoin = true;
lastSeenRightOuterJoin = true;
// add all except the right side to the bad positions
leftPosListOfLastRightOuterJoin.clear();
leftPosListOfLastRightOuterJoin.addAll(seenPostitions);
leftPosListOfLastRightOuterJoin.remove(condn.getRight());
bigTableCandidates.clear();
bigTableCandidates.add(condn.getRight());
} else if (joinType == JoinDesc.INNER_JOIN) {
if (!seenOuterJoin || lastSeenRightOuterJoin) {
// is the left was at the left side of a right outer join?
if (!leftPosListOfLastRightOuterJoin.contains(condn.getLeft())) {
bigTableCandidates.add(condn.getLeft());
}
// is the right was at the left side of a right outer join?
if (!leftPosListOfLastRightOuterJoin.contains(condn.getRight())) {
bigTableCandidates.add(condn.getRight());
}
}
}
}
return bigTableCandidates;
}
use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.
the class ConvertJoinMapJoin method checkAndConvertSMBJoin.
@SuppressWarnings("unchecked")
private Object checkAndConvertSMBJoin(OptimizeTezProcContext context, JoinOperator joinOp, TezBucketJoinProcCtx tezBucketJoinProcCtx, final long maxSize) throws SemanticException {
// map join either based on the size. Check if we can convert to SMB join.
if (!(HiveConf.getBoolVar(context.conf, ConfVars.HIVE_AUTO_SORTMERGE_JOIN)) || ((!HiveConf.getBoolVar(context.conf, ConfVars.HIVE_AUTO_SORTMERGE_JOIN_REDUCE)) && joinOp.getOpTraits().getNumReduceSinks() >= 2)) {
fallbackToReduceSideJoin(joinOp, context, maxSize);
return null;
}
Class<? extends BigTableSelectorForAutoSMJ> bigTableMatcherClass = null;
try {
String selector = HiveConf.getVar(context.parseContext.getConf(), HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR);
bigTableMatcherClass = JavaUtils.loadClass(selector);
} catch (ClassNotFoundException e) {
throw new SemanticException(e.getMessage());
}
BigTableSelectorForAutoSMJ bigTableMatcher = ReflectionUtils.newInstance(bigTableMatcherClass, null);
JoinDesc joinDesc = joinOp.getConf();
JoinCondDesc[] joinCondns = joinDesc.getConds();
Set<Integer> joinCandidates = MapJoinProcessor.getBigTableCandidates(joinCondns);
if (joinCandidates.isEmpty()) {
// of any type. So return false.
return false;
}
int mapJoinConversionPos = bigTableMatcher.getBigTablePosition(context.parseContext, joinOp, joinCandidates);
if (mapJoinConversionPos < 0) {
// contains aliases from sub-query
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
fallbackToReduceSideJoin(joinOp, context, maxSize);
return null;
}
if (checkConvertJoinSMBJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx)) {
convertJoinSMBJoin(joinOp, context, mapJoinConversionPos, tezBucketJoinProcCtx.getNumBuckets(), true);
} else {
// we are just converting to a common merge join operator. The shuffle
// join in map-reduce case.
fallbackToReduceSideJoin(joinOp, context, maxSize);
}
return null;
}
use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.
the class MapJoinProcessor method getBigTableCandidates.
public static Set<Integer> getBigTableCandidates(JoinCondDesc[] condns, boolean isSupportFullOuter) {
Set<Integer> bigTableCandidates = new HashSet<Integer>();
if (condns.length == 1) {
JoinCondDesc condn = condns[0];
if (condn.getType() == JoinDesc.FULL_OUTER_JOIN) {
if (!isSupportFullOuter) {
return new HashSet<Integer>();
}
// FULL OUTER MapJoin must be a single condition.
bigTableCandidates.add(condn.getLeft());
bigTableCandidates.add(condn.getRight());
return bigTableCandidates;
}
}
boolean seenOuterJoin = false;
Set<Integer> seenPostitions = new HashSet<Integer>();
Set<Integer> leftPosListOfLastRightOuterJoin = new HashSet<Integer>();
// is the outer join that we saw most recently is a right outer join?
boolean lastSeenRightOuterJoin = false;
for (JoinCondDesc condn : condns) {
int joinType = condn.getType();
if (joinType == JoinDesc.FULL_OUTER_JOIN) {
return new HashSet<Integer>();
}
seenPostitions.add(condn.getLeft());
seenPostitions.add(condn.getRight());
if (joinType == JoinDesc.LEFT_OUTER_JOIN || joinType == JoinDesc.LEFT_SEMI_JOIN || joinType == JoinDesc.ANTI_JOIN) {
seenOuterJoin = true;
if (bigTableCandidates.size() == 0) {
bigTableCandidates.add(condn.getLeft());
}
lastSeenRightOuterJoin = false;
} else if (joinType == JoinDesc.RIGHT_OUTER_JOIN) {
seenOuterJoin = true;
lastSeenRightOuterJoin = true;
// add all except the right side to the bad positions
leftPosListOfLastRightOuterJoin.clear();
leftPosListOfLastRightOuterJoin.addAll(seenPostitions);
leftPosListOfLastRightOuterJoin.remove(condn.getRight());
bigTableCandidates.clear();
bigTableCandidates.add(condn.getRight());
} else if (joinType == JoinDesc.INNER_JOIN) {
if (!seenOuterJoin || lastSeenRightOuterJoin) {
// is the left was at the left side of a right outer join?
if (!leftPosListOfLastRightOuterJoin.contains(condn.getLeft())) {
bigTableCandidates.add(condn.getLeft());
}
// is the right was at the left side of a right outer join?
if (!leftPosListOfLastRightOuterJoin.contains(condn.getRight())) {
bigTableCandidates.add(condn.getRight());
}
}
}
}
return bigTableCandidates;
}
use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.
the class TopNKeyPushdownProcessor method pushDownThroughJoin.
// Only push down through Left Outer Join is supported.
// Right and Full Outer Join support will be added in a follow up patch.
private void pushDownThroughJoin(TopNKeyOperator topNKey) throws SemanticException {
CommonJoinOperator<? extends JoinDesc> parent = (CommonJoinOperator<? extends JoinDesc>) topNKey.getParentOperators().get(0);
JoinDesc joinDesc = parent.getConf();
JoinCondDesc[] joinConds = joinDesc.getConds();
JoinCondDesc firstJoinCond = joinConds[0];
for (JoinCondDesc joinCond : joinConds) {
if (!firstJoinCond.equals(joinCond)) {
return;
}
}
if (firstJoinCond.getType() == JoinDesc.LEFT_OUTER_JOIN) {
pushdownThroughLeftOuterJoin(topNKey);
} else if (firstJoinCond.getType() == JoinDesc.INNER_JOIN && joinDesc.isPkFkJoin()) {
pushdownInnerJoin(topNKey, joinDesc.getFkJoinTableIndex(), joinDesc.isNonFkSideIsFiltered());
}
}
use of org.apache.hadoop.hive.ql.plan.JoinCondDesc in project hive by apache.
the class PredicateTransitivePropagate method getTargets.
// calculate filter propagation directions for each alias
// L<->R for inner/semi join, L->R for left outer join, R->L for right outer join
public static int[][] getTargets(CommonJoinOperator<JoinDesc> join) {
JoinCondDesc[] conds = join.getConf().getConds();
int aliases = conds.length + 1;
Vectors vector = new Vectors(aliases);
for (JoinCondDesc cond : conds) {
int left = cond.getLeft();
int right = cond.getRight();
switch(cond.getType()) {
case JoinDesc.INNER_JOIN:
case JoinDesc.LEFT_SEMI_JOIN:
vector.add(left, right);
vector.add(right, left);
break;
case JoinDesc.LEFT_OUTER_JOIN:
case JoinDesc.ANTI_JOIN:
vector.add(left, right);
break;
case JoinDesc.RIGHT_OUTER_JOIN:
vector.add(right, left);
break;
case JoinDesc.FULL_OUTER_JOIN:
break;
}
}
int[][] result = new int[aliases][];
for (int pos = 0; pos < aliases; pos++) {
// find all targets recursively
result[pos] = vector.traverse(pos);
}
return result;
}
Aggregations