use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo in project hive by apache.
the class HiveRelMdSelectivity method computeInnerJoinSelectivity.
private Double computeInnerJoinSelectivity(Join j, RelMetadataQuery mq, RexNode predicate) {
Pair<Boolean, RexNode> predInfo = getCombinedPredicateForJoin(j, predicate);
if (!predInfo.getKey()) {
return new FilterSelectivityEstimator(j).estimateSelectivity(predInfo.getValue());
}
RexNode combinedPredicate = predInfo.getValue();
JoinPredicateInfo jpi;
try {
jpi = JoinPredicateInfo.constructJoinPredicateInfo(j, combinedPredicate);
} catch (CalciteSemanticException e) {
throw new RuntimeException(e);
}
ImmutableMap.Builder<Integer, Double> colStatMapBuilder = ImmutableMap.builder();
ImmutableMap<Integer, Double> colStatMap;
int rightOffSet = j.getLeft().getRowType().getFieldCount();
// Join which are part of join keys
for (Integer ljk : jpi.getProjsFromLeftPartOfJoinKeysInChildSchema()) {
colStatMapBuilder.put(ljk, HiveRelMdDistinctRowCount.getDistinctRowCount(j.getLeft(), mq, ljk));
}
// Join which are part of join keys
for (Integer rjk : jpi.getProjsFromRightPartOfJoinKeysInChildSchema()) {
colStatMapBuilder.put(rjk + rightOffSet, HiveRelMdDistinctRowCount.getDistinctRowCount(j.getRight(), mq, rjk));
}
colStatMap = colStatMapBuilder.build();
// 3. Walk through the Join Condition Building NDV for selectivity
// NDV of the join can not exceed the cardinality of cross join.
List<JoinLeafPredicateInfo> peLst = jpi.getEquiJoinPredicateElements();
int noOfPE = peLst.size();
double ndvCrossProduct = 1;
if (noOfPE > 0) {
ndvCrossProduct = exponentialBackoff(peLst, colStatMap);
if (j instanceof SemiJoin) {
ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()), ndvCrossProduct);
} else if (j instanceof HiveJoin) {
ndvCrossProduct = Math.min(mq.getRowCount(j.getLeft()) * mq.getRowCount(j.getRight()), ndvCrossProduct);
} else {
throw new RuntimeException("Unexpected Join type: " + j.getClass().getName());
}
}
// 4. Join Selectivity = 1/NDV
return (1 / ndvCrossProduct);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo in project hive by apache.
the class HiveJoinAddNotNullRule method onMatch.
//~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
final Join join = call.rel(0);
RelNode lChild = join.getLeft();
RelNode rChild = join.getRight();
HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class);
assert registry != null;
if (join.getJoinType() != JoinRelType.INNER) {
return;
}
if (join.getCondition().isAlwaysTrue()) {
return;
}
JoinPredicateInfo joinPredInfo;
try {
joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join);
} catch (CalciteSemanticException e) {
return;
}
Set<Integer> joinLeftKeyPositions = new HashSet<Integer>();
Set<Integer> joinRightKeyPositions = new HashSet<Integer>();
for (int i = 0; i < joinPredInfo.getEquiJoinPredicateElements().size(); i++) {
JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.getEquiJoinPredicateElements().get(i);
joinLeftKeyPositions.addAll(joinLeafPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema());
joinRightKeyPositions.addAll(joinLeafPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema());
}
// Build not null conditions
final RelOptCluster cluster = join.getCluster();
final RexBuilder rexBuilder = join.getCluster().getRexBuilder();
Set<String> leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0));
final List<RexNode> newLeftConditions = getNotNullConditions(cluster, rexBuilder, join.getLeft(), joinLeftKeyPositions, leftPushedPredicates);
Set<String> rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1));
final List<RexNode> newRightConditions = getNotNullConditions(cluster, rexBuilder, join.getRight(), joinRightKeyPositions, rightPushedPredicates);
// Nothing will be added to the expression
RexNode newLeftPredicate = RexUtil.composeConjunction(rexBuilder, newLeftConditions, false);
RexNode newRightPredicate = RexUtil.composeConjunction(rexBuilder, newRightConditions, false);
if (newLeftPredicate.isAlwaysTrue() && newRightPredicate.isAlwaysTrue()) {
return;
}
if (!newLeftPredicate.isAlwaysTrue()) {
RelNode curr = lChild;
lChild = filterFactory.createFilter(lChild, newLeftPredicate);
call.getPlanner().onCopy(curr, lChild);
}
if (!newRightPredicate.isAlwaysTrue()) {
RelNode curr = rChild;
rChild = filterFactory.createFilter(rChild, newRightPredicate);
call.getPlanner().onCopy(curr, rChild);
}
Join newJoin = join.copy(join.getTraitSet(), join.getCondition(), lChild, rChild, join.getJoinType(), join.isSemiJoinDone());
call.getPlanner().onCopy(join, newJoin);
// Register information about created predicates
registry.getPushedPredicates(newJoin, 0).addAll(leftPushedPredicates);
registry.getPushedPredicates(newJoin, 1).addAll(rightPushedPredicates);
call.transformTo(newJoin);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo in project hive by apache.
the class HiveJoinToMultiJoinRule method isCombinableJoin.
/*
* Returns true if the join conditions execute over the same keys
*/
private static boolean isCombinableJoin(HiveJoin join, HiveJoin leftChildJoin) throws CalciteSemanticException {
final JoinPredicateInfo joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join, join.getCondition());
final JoinPredicateInfo leftChildJoinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(leftChildJoin, leftChildJoin.getCondition());
return isCombinablePredicate(joinPredInfo, leftChildJoinPredInfo, leftChildJoin.getInputs().size());
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo in project hive by apache.
the class HiveInsertExchange4JoinRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
JoinPredicateInfo joinPredInfo;
if (call.rel(0) instanceof HiveMultiJoin) {
HiveMultiJoin multiJoin = call.rel(0);
joinPredInfo = multiJoin.getJoinPredicateInfo();
} else if (call.rel(0) instanceof HiveJoin) {
HiveJoin hiveJoin = call.rel(0);
joinPredInfo = hiveJoin.getJoinPredicateInfo();
} else if (call.rel(0) instanceof Join) {
Join join = call.rel(0);
try {
joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(join);
} catch (CalciteSemanticException e) {
throw new RuntimeException(e);
}
} else {
return;
}
for (RelNode child : call.rel(0).getInputs()) {
if (((HepRelVertex) child).getCurrentRel() instanceof Exchange) {
return;
}
}
// Get key columns from inputs. Those are the columns on which we will distribute on.
// It is also the columns we will sort on.
List<RelNode> newInputs = new ArrayList<RelNode>();
for (int i = 0; i < call.rel(0).getInputs().size(); i++) {
List<Integer> joinKeyPositions = new ArrayList<Integer>();
ImmutableList.Builder<RexNode> joinExprsBuilder = new ImmutableList.Builder<RexNode>();
Set<String> keySet = Sets.newHashSet();
ImmutableList.Builder<RelFieldCollation> collationListBuilder = new ImmutableList.Builder<RelFieldCollation>();
for (int j = 0; j < joinPredInfo.getEquiJoinPredicateElements().size(); j++) {
JoinLeafPredicateInfo joinLeafPredInfo = joinPredInfo.getEquiJoinPredicateElements().get(j);
for (RexNode joinExprNode : joinLeafPredInfo.getJoinExprs(i)) {
if (keySet.add(joinExprNode.toString())) {
joinExprsBuilder.add(joinExprNode);
}
}
for (int pos : joinLeafPredInfo.getProjsJoinKeysInChildSchema(i)) {
if (!joinKeyPositions.contains(pos)) {
joinKeyPositions.add(pos);
collationListBuilder.add(new RelFieldCollation(pos));
}
}
}
HiveSortExchange exchange = HiveSortExchange.create(call.rel(0).getInput(i), new HiveRelDistribution(RelDistribution.Type.HASH_DISTRIBUTED, joinKeyPositions), new HiveRelCollation(collationListBuilder.build()), joinExprsBuilder.build());
newInputs.add(exchange);
}
RelNode newOp;
if (call.rel(0) instanceof HiveMultiJoin) {
HiveMultiJoin multiJoin = call.rel(0);
newOp = multiJoin.copy(multiJoin.getTraitSet(), newInputs);
} else if (call.rel(0) instanceof Join) {
Join join = call.rel(0);
newOp = join.copy(join.getTraitSet(), join.getCondition(), newInputs.get(0), newInputs.get(1), join.getJoinType(), join.isSemiJoinDone());
} else {
return;
}
call.getPlanner().onCopy(call.rel(0), newOp);
call.transformTo(newOp);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo in project hive by apache.
the class HiveJoinToMultiJoinRule method mergeJoin.
// This method tries to merge the join with its left child. The left
// child should be a join for this to happen.
private static RelNode mergeJoin(HiveJoin join, RelNode left, RelNode right) {
final RexBuilder rexBuilder = join.getCluster().getRexBuilder();
// We check whether the join can be combined with any of its children
final List<RelNode> newInputs = Lists.newArrayList();
final List<RexNode> newJoinCondition = Lists.newArrayList();
final List<Pair<Integer, Integer>> joinInputs = Lists.newArrayList();
final List<JoinRelType> joinTypes = Lists.newArrayList();
final List<RexNode> joinFilters = Lists.newArrayList();
// Left child
if (left instanceof HiveJoin || left instanceof HiveMultiJoin) {
final RexNode leftCondition;
final List<Pair<Integer, Integer>> leftJoinInputs;
final List<JoinRelType> leftJoinTypes;
final List<RexNode> leftJoinFilters;
boolean combinable;
if (left instanceof HiveJoin) {
HiveJoin hj = (HiveJoin) left;
leftCondition = hj.getCondition();
leftJoinInputs = ImmutableList.of(Pair.of(0, 1));
leftJoinTypes = ImmutableList.of(hj.getJoinType());
leftJoinFilters = ImmutableList.of(hj.getJoinFilter());
try {
combinable = isCombinableJoin(join, hj);
} catch (CalciteSemanticException e) {
LOG.trace("Failed to merge join-join", e);
combinable = false;
}
} else {
HiveMultiJoin hmj = (HiveMultiJoin) left;
leftCondition = hmj.getCondition();
leftJoinInputs = hmj.getJoinInputs();
leftJoinTypes = hmj.getJoinTypes();
leftJoinFilters = hmj.getJoinFilters();
try {
combinable = isCombinableJoin(join, hmj);
} catch (CalciteSemanticException e) {
LOG.trace("Failed to merge join-multijoin", e);
combinable = false;
}
}
if (combinable) {
newJoinCondition.add(leftCondition);
for (int i = 0; i < leftJoinInputs.size(); i++) {
joinInputs.add(leftJoinInputs.get(i));
joinTypes.add(leftJoinTypes.get(i));
joinFilters.add(leftJoinFilters.get(i));
}
newInputs.addAll(left.getInputs());
} else {
// The join operation in the child is not on the same keys
return null;
}
} else {
// The left child is not a join or multijoin operator
return null;
}
final int numberLeftInputs = newInputs.size();
// Right child
newInputs.add(right);
// If we cannot combine any of the children, we bail out
newJoinCondition.add(join.getCondition());
if (newJoinCondition.size() == 1) {
return null;
}
final List<RelDataTypeField> systemFieldList = ImmutableList.of();
List<List<RexNode>> joinKeyExprs = new ArrayList<List<RexNode>>();
List<Integer> filterNulls = new ArrayList<Integer>();
for (int i = 0; i < newInputs.size(); i++) {
joinKeyExprs.add(new ArrayList<RexNode>());
}
RexNode filters;
try {
filters = HiveRelOptUtil.splitHiveJoinCondition(systemFieldList, newInputs, join.getCondition(), joinKeyExprs, filterNulls, null);
} catch (CalciteSemanticException e) {
LOG.trace("Failed to merge joins", e);
return null;
}
ImmutableBitSet.Builder keysInInputsBuilder = ImmutableBitSet.builder();
for (int i = 0; i < newInputs.size(); i++) {
List<RexNode> partialCondition = joinKeyExprs.get(i);
if (!partialCondition.isEmpty()) {
keysInInputsBuilder.set(i);
}
}
// If we cannot merge, we bail out
ImmutableBitSet keysInInputs = keysInInputsBuilder.build();
ImmutableBitSet leftReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs));
ImmutableBitSet rightReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs, newInputs.size()));
if (join.getJoinType() != JoinRelType.INNER && (leftReferencedInputs.cardinality() > 1 || rightReferencedInputs.cardinality() > 1)) {
return null;
}
// Otherwise, we add to the join specs
if (join.getJoinType() != JoinRelType.INNER) {
int leftInput = keysInInputs.nextSetBit(0);
int rightInput = keysInInputs.nextSetBit(numberLeftInputs);
joinInputs.add(Pair.of(leftInput, rightInput));
joinTypes.add(join.getJoinType());
joinFilters.add(filters);
} else {
for (int i : leftReferencedInputs) {
for (int j : rightReferencedInputs) {
joinInputs.add(Pair.of(i, j));
joinTypes.add(join.getJoinType());
joinFilters.add(filters);
}
}
}
// We can now create a multijoin operator
RexNode newCondition = RexUtil.flatten(rexBuilder, RexUtil.composeConjunction(rexBuilder, newJoinCondition, false));
List<RelNode> newInputsArray = Lists.newArrayList(newInputs);
JoinPredicateInfo joinPredInfo = null;
try {
joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(newInputsArray, systemFieldList, newCondition);
} catch (CalciteSemanticException e) {
throw new RuntimeException(e);
}
// If the number of joins < number of input tables-1, this is not a star join.
if (joinPredInfo.getEquiJoinPredicateElements().size() < newInputs.size() - 1) {
return null;
}
// Validate that the multi-join is a valid star join before returning it.
for (int i = 0; i < newInputs.size(); i++) {
List<RexNode> joinKeys = null;
for (int j = 0; j < joinPredInfo.getEquiJoinPredicateElements().size(); j++) {
List<RexNode> currJoinKeys = joinPredInfo.getEquiJoinPredicateElements().get(j).getJoinExprs(i);
if (currJoinKeys.isEmpty()) {
continue;
}
if (joinKeys == null) {
joinKeys = currJoinKeys;
} else {
// Bail out if this is the case.
if (!joinKeys.containsAll(currJoinKeys) || !currJoinKeys.containsAll(joinKeys)) {
return null;
}
}
}
}
return new HiveMultiJoin(join.getCluster(), newInputsArray, newCondition, join.getRowType(), joinInputs, joinTypes, joinFilters, joinPredInfo);
}
Aggregations