use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin in project hive by apache.
the class HiveRelMdSelectivity method computeInnerJoinSelectivity.
private Double computeInnerJoinSelectivity(Join j, RelMetadataQuery mq, RexNode predicate) {
Pair<Boolean, RexNode> predInfo = getCombinedPredicateForJoin(j, predicate);
if (!predInfo.getKey()) {
return new FilterSelectivityEstimator(j).estimateSelectivity(predInfo.getValue());
}
RexNode combinedPredicate = predInfo.getValue();
JoinPredicateInfo jpi;
try {
jpi = JoinPredicateInfo.constructJoinPredicateInfo(j, combinedPredicate);
} catch (CalciteSemanticException e) {
throw new RuntimeException(e);
}
ImmutableMap.Builder<Integer, Double> colStatMapBuilder = ImmutableMap.builder();
ImmutableMap<Integer, Double> colStatMap;
int rightOffSet = j.getLeft().getRowType().getFieldCount();
// Join which are part of join keys
for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
colStatMapBuilder.put(ljk, HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), mq, ljk));
}
// Join which are part of join keys
for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
colStatMapBuilder.put(rjk + rightOffSet, HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), mq, rjk));
}
colStatMap = colStatMapBuilder.build();
// 3. Walk through the Join Condition Building NDV for selectivity
// NDV of the join can not exceed the cardinality of cross join.
List<JoinLeafPredicateInfo> peLst = jpi.getEquiJoinPredicateElements();
int noOfPE = peLst.size();
double ndvCrossProduct = 1;
if (noOfPE > 0) {
ndvCrossProduct = exponentialBackoff(peLst, colStatMap);
if (j instanceof SemiJoin) {
ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()), ndvCrossProduct);
} else if (j instanceof HiveJoin) {
ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()) * mq.getRowCount(j.getRight()), ndvCrossProduct);
} else {
throw new RuntimeException("Unexpected Join type: " + j.getClass().getName());
}
}
// 4. Join Selectivity = 1/NDV
return (1 / ndvCrossProduct);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin in project hive by apache.
the class HiveOpConverter method translateJoin.
private OpAttr translateJoin(RelNode joinRel) throws SemanticException {
// 0. Additional data structures needed for the join optimization
// through Hive
String[] baseSrc = new String[joinRel.getInputs().size()];
String tabAlias = getHiveDerivedTableAlias();
// 1. Convert inputs
OpAttr[] inputs = new OpAttr[joinRel.getInputs().size()];
List<Operator<?>> children = new ArrayList<Operator<?>>(joinRel.getInputs().size());
for (int i = 0; i < inputs.length; i++) {
inputs[i] = dispatch(joinRel.getInput(i));
children.add(inputs[i].inputs.get(0));
baseSrc[i] = inputs[i].tabAlias;
}
// 2. Generate tags
for (int tag = 0; tag < children.size(); tag++) {
ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator) children.get(tag);
reduceSinkOp.getConf().setTag(tag);
}
// 3. Virtual columns
Set<Integer> newVcolsInCalcite = new HashSet<Integer>();
newVcolsInCalcite.addAll(inputs[0].vcolsInCalcite);
if (joinRel instanceof HiveMultiJoin || !(joinRel instanceof SemiJoin)) {
int shift = inputs[0].inputs.get(0).getSchema().getSignature().size();
for (int i = 1; i < inputs.length; i++) {
newVcolsInCalcite.addAll(HiveCalciteUtil.shiftVColsSet(inputs[i].vcolsInCalcite, shift));
shift += inputs[i].inputs.get(0).getSchema().getSignature().size();
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + joinRel.getId() + ":" + joinRel.getRelTypeName() + " with row type: [" + joinRel.getRowType() + "]");
}
// 4. Extract join key expressions from HiveSortExchange
ExprNodeDesc[][] joinExpressions = new ExprNodeDesc[inputs.length][];
for (int i = 0; i < inputs.length; i++) {
joinExpressions[i] = ((HiveSortExchange) joinRel.getInput(i)).getJoinExpressions();
}
// 5. Extract rest of join predicate info. We infer the rest of join condition
// that will be added to the filters (join conditions that are not part of
// the join key)
List<RexNode> joinFilters;
if (joinRel instanceof HiveJoin) {
joinFilters = ImmutableList.of(((HiveJoin) joinRel).getJoinFilter());
} else if (joinRel instanceof HiveMultiJoin) {
joinFilters = ((HiveMultiJoin) joinRel).getJoinFilters();
} else if (joinRel instanceof HiveSemiJoin) {
joinFilters = ImmutableList.of(((HiveSemiJoin) joinRel).getJoinFilter());
} else {
throw new SemanticException("Can't handle join type: " + joinRel.getClass().getName());
}
List<List<ExprNodeDesc>> filterExpressions = Lists.newArrayList();
for (int i = 0; i < joinFilters.size(); i++) {
List<ExprNodeDesc> filterExpressionsForInput = new ArrayList<ExprNodeDesc>();
if (joinFilters.get(i) != null) {
for (RexNode conj : RelOptUtil.conjunctions(joinFilters.get(i))) {
ExprNodeDesc expr = convertToExprNode(conj, joinRel, null, newVcolsInCalcite);
filterExpressionsForInput.add(expr);
}
}
filterExpressions.add(filterExpressionsForInput);
}
// 6. Generate Join operator
JoinOperator joinOp = genJoin(joinRel, joinExpressions, filterExpressions, children, baseSrc, tabAlias);
// 7. Return result
return new OpAttr(tabAlias, newVcolsInCalcite, joinOp);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin in project hive by apache.
the class HiveJoinToMultiJoinRule method onMatch.
//~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
final HiveJoin join = call.rel(0);
final RelNode left = call.rel(1);
final RelNode right = call.rel(2);
// 1. We try to merge this join with the left child
RelNode multiJoin = mergeJoin(join, left, right);
if (multiJoin != null) {
call.transformTo(multiJoin);
return;
}
// 2. If we cannot, we swap the inputs so we can try
// to merge it with its right child
RelNode swapped = JoinCommuteRule.swap(join, true);
assert swapped != null;
// The result of the swapping operation is either
// i) a Project or,
// ii) if the project is trivial, a raw join
final HiveJoin newJoin;
Project topProject = null;
if (swapped instanceof HiveJoin) {
newJoin = (HiveJoin) swapped;
} else {
topProject = (Project) swapped;
newJoin = (HiveJoin) swapped.getInput(0);
}
// 3. We try to merge the join with the right child
multiJoin = mergeJoin(newJoin, right, left);
if (multiJoin != null) {
if (topProject != null) {
multiJoin = projectFactory.createProject(multiJoin, topProject.getChildExps(), topProject.getRowType().getFieldNames());
}
call.transformTo(multiJoin);
return;
}
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin in project hive by apache.
the class HiveSortJoinReduceRule method matches.
//~ Methods ----------------------------------------------------------------
@Override
public boolean matches(RelOptRuleCall call) {
final HiveSortLimit sortLimit = call.rel(0);
final HiveJoin join = call.rel(1);
// If sort does not contain a limit operation or limit is 0, we bail out
if (!HiveCalciteUtil.limitRelNode(sortLimit) || RexLiteral.intValue(sortLimit.fetch) == 0) {
return false;
}
// 1) If join is not a left or right outer, we bail out
// 2) If any sort column is not part of the input where the
// sort is pushed, we bail out
RelNode reducedInput;
if (join.getJoinType() == JoinRelType.LEFT) {
reducedInput = join.getLeft();
if (sortLimit.getCollation() != RelCollations.EMPTY) {
for (RelFieldCollation relFieldCollation : sortLimit.getCollation().getFieldCollations()) {
if (relFieldCollation.getFieldIndex() >= join.getLeft().getRowType().getFieldCount()) {
return false;
}
}
}
} else if (join.getJoinType() == JoinRelType.RIGHT) {
reducedInput = join.getRight();
if (sortLimit.getCollation() != RelCollations.EMPTY) {
for (RelFieldCollation relFieldCollation : sortLimit.getCollation().getFieldCollations()) {
if (relFieldCollation.getFieldIndex() < join.getLeft().getRowType().getFieldCount()) {
return false;
}
}
}
} else {
return false;
}
// Finally, if we do not reduce the input size, we bail out
final int offset = sortLimit.offset == null ? 0 : RexLiteral.intValue(sortLimit.offset);
if (offset + RexLiteral.intValue(sortLimit.fetch) >= RelMetadataQuery.instance().getRowCount(reducedInput)) {
return false;
}
return true;
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin in project hive by apache.
the class HiveSortJoinReduceRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final HiveSortLimit sortLimit = call.rel(0);
final HiveJoin join = call.rel(1);
RelNode inputLeft = join.getLeft();
RelNode inputRight = join.getRight();
// We create a new sort operator on the corresponding input
if (join.getJoinType() == JoinRelType.LEFT) {
inputLeft = sortLimit.copy(sortLimit.getTraitSet(), inputLeft, sortLimit.getCollation(), sortLimit.offset, sortLimit.fetch);
((HiveSortLimit) inputLeft).setRuleCreated(true);
} else {
// Adjust right collation
final RelCollation rightCollation = RelCollationTraitDef.INSTANCE.canonize(RelCollations.shift(sortLimit.getCollation(), -join.getLeft().getRowType().getFieldCount()));
inputRight = sortLimit.copy(sortLimit.getTraitSet().replace(rightCollation), inputRight, rightCollation, sortLimit.offset, sortLimit.fetch);
((HiveSortLimit) inputRight).setRuleCreated(true);
}
// We copy the join and the top sort operator
RelNode result = join.copy(join.getTraitSet(), join.getCondition(), inputLeft, inputRight, join.getJoinType(), join.isSemiJoinDone());
result = sortLimit.copy(sortLimit.getTraitSet(), result, sortLimit.getCollation(), sortLimit.offset, sortLimit.fetch);
call.transformTo(result);
}
Aggregations