use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo in project hive by apache.
the class HiveJoinConstraintsRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final Project project = call.rel(0);
final RexBuilder rexBuilder = project.getCluster().getRexBuilder();
List<RexNode> topProjExprs = project.getProjects();
Join join = call.rel(1);
final JoinRelType joinType = join.getJoinType();
final RelNode leftInput = join.getLeft();
final RelNode rightInput = join.getRight();
final RexNode cond = join.getCondition();
// TODO:https://issues.apache.org/jira/browse/HIVE-23920
if (joinType == JoinRelType.ANTI) {
return;
}
// 1) If it is an inner, check whether project only uses columns from one side.
// That side will need to be the FK side.
// If it is a left outer, left will be the FK side.
// If it is a right outer, right will be the FK side.
final RelNode fkInput;
final RelNode nonFkInput;
final ImmutableBitSet topRefs = RelOptUtil.InputFinder.bits(topProjExprs, null);
final ImmutableBitSet leftBits = ImmutableBitSet.range(leftInput.getRowType().getFieldCount());
final ImmutableBitSet rightBits = ImmutableBitSet.range(leftInput.getRowType().getFieldCount(), join.getRowType().getFieldCount());
// These boolean values represent corresponding left, right input which is potential FK
boolean leftInputPotentialFK = topRefs.intersects(leftBits);
boolean rightInputPotentialFK = topRefs.intersects(rightBits);
if (leftInputPotentialFK && rightInputPotentialFK && (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI)) {
// Both inputs are referenced. Before making a decision, try to swap
// references in join condition if it is an inner join, i.e. if a join
// condition column is referenced above the join, then we can just
// reference the column from the other side.
// For example, given two relations R(a1,a2), S(b1) :
// SELECT a2, b1 FROM R, S ON R.a1=R.b1 =>
// SELECT a2, a1 FROM R, S ON R.a1=R.b1
int joinFieldCount = join.getRowType().getFieldCount();
Mapping mappingLR = Mappings.create(MappingType.PARTIAL_FUNCTION, joinFieldCount, joinFieldCount);
Mapping mappingRL = Mappings.create(MappingType.PARTIAL_FUNCTION, joinFieldCount, joinFieldCount);
for (RexNode conj : RelOptUtil.conjunctions(cond)) {
if (!conj.isA(SqlKind.EQUALS)) {
continue;
}
RexCall eq = (RexCall) conj;
RexNode op1 = eq.getOperands().get(0);
RexNode op2 = eq.getOperands().get(1);
if (op1 instanceof RexInputRef && op2 instanceof RexInputRef) {
// Check references
int ref1 = ((RexInputRef) op1).getIndex();
int ref2 = ((RexInputRef) op2).getIndex();
int leftRef = -1;
int rightRef = -1;
if (leftBits.get(ref1) && rightBits.get(ref2)) {
leftRef = ref1;
rightRef = ref2;
} else if (rightBits.get(ref1) && leftBits.get(ref2)) {
leftRef = ref2;
rightRef = ref1;
}
if (leftRef != -1 && rightRef != -1) {
// as it is useless
if (mappingLR.getTargetOpt(leftRef) == -1) {
mappingLR.set(leftRef, rightRef);
}
if (mappingRL.getTargetOpt(rightRef) == -1) {
mappingRL.set(rightRef, leftRef);
}
}
}
}
if (mappingLR.size() != 0) {
// First insert missing elements into the mapping as identity mappings
for (int i = 0; i < joinFieldCount; i++) {
if (mappingLR.getTargetOpt(i) == -1) {
mappingLR.set(i, i);
}
if (mappingRL.getTargetOpt(i) == -1) {
mappingRL.set(i, i);
}
}
// Then, we start by trying to reference only left side in top projections
List<RexNode> swappedTopProjExprs = topProjExprs.stream().map(projExpr -> projExpr.accept(new RexPermuteInputsShuttle(mappingRL, call.rel(1)))).collect(Collectors.toList());
rightInputPotentialFK = RelOptUtil.InputFinder.bits(swappedTopProjExprs, null).intersects(rightBits);
if (!rightInputPotentialFK) {
topProjExprs = swappedTopProjExprs;
} else {
// If it did not work, we try to reference only right side in top projections
swappedTopProjExprs = topProjExprs.stream().map(projExpr -> projExpr.accept(new RexPermuteInputsShuttle(mappingLR, call.rel(1)))).collect(Collectors.toList());
leftInputPotentialFK = RelOptUtil.InputFinder.bits(swappedTopProjExprs, null).intersects(leftBits);
if (!leftInputPotentialFK) {
topProjExprs = swappedTopProjExprs;
}
}
}
} else if (!leftInputPotentialFK && !rightInputPotentialFK) {
// TODO: There are no references in the project operator above.
// In this case, we should probably do two passes, one for
// left as FK and one for right as FK, although it may be expensive.
// Currently we only assume left as FK
leftInputPotentialFK = true;
}
final Mode mode;
switch(joinType) {
case SEMI:
case INNER:
// case ANTI: //TODO:https://issues.apache.org/jira/browse/HIVE-23920
if (leftInputPotentialFK && rightInputPotentialFK) {
// and there is nothing to transform
return;
}
fkInput = leftInputPotentialFK ? leftInput : rightInput;
nonFkInput = leftInputPotentialFK ? rightInput : leftInput;
mode = Mode.REMOVE;
break;
case LEFT:
fkInput = leftInput;
nonFkInput = rightInput;
mode = leftInputPotentialFK && !rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM;
break;
case RIGHT:
fkInput = rightInput;
nonFkInput = leftInput;
mode = !leftInputPotentialFK && rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM;
break;
default:
// Other type, bail out
return;
}
// 2) Check whether this join can be rewritten or removed
RewritablePKFKJoinInfo r = HiveRelOptUtil.isRewritablePKFKJoin(join, fkInput, nonFkInput, call.getMetadataQuery());
// 3) If it is the only condition, we can trigger the rewriting
if (r.rewritable) {
rewrite(mode, fkInput, nonFkInput, join, topProjExprs, call, project, r.nullableNodes);
} else {
// Possibly this could be enhanced to take other join type into consideration.
if (joinType != JoinRelType.INNER) {
return;
}
// first swap fk and non-fk input and see if we can rewrite them
RewritablePKFKJoinInfo fkRemoval = HiveRelOptUtil.isRewritablePKFKJoin(join, nonFkInput, fkInput, call.getMetadataQuery());
if (fkRemoval.rewritable) {
// we have established that nonFkInput is FK, and fkInput is PK
// and there is no row filtering on FK side
// check that FK side join column is distinct (i.e. have a group by)
ImmutableBitSet fkSideBitSet;
if (nonFkInput == leftInput) {
fkSideBitSet = leftBits;
} else {
fkSideBitSet = rightBits;
}
ImmutableBitSet.Builder fkJoinColBuilder = ImmutableBitSet.builder();
for (RexNode conj : RelOptUtil.conjunctions(cond)) {
if (!conj.isA(SqlKind.EQUALS)) {
return;
}
RexCall eq = (RexCall) conj;
RexNode op1 = eq.getOperands().get(0);
RexNode op2 = eq.getOperands().get(1);
if (op1 instanceof RexInputRef && op2 instanceof RexInputRef) {
// Check references
int ref1 = ((RexInputRef) op1).getIndex();
int ref2 = ((RexInputRef) op2).getIndex();
int leftRef = -1;
int rightRef = -1;
if (fkSideBitSet.get(ref1)) {
// check that join columns are not nullable
if (op1.getType().isNullable()) {
return;
}
fkJoinColBuilder.set(fkSideBitSet.indexOf(ref1));
} else {
if (op2.getType().isNullable()) {
return;
}
fkJoinColBuilder.set(fkSideBitSet.indexOf(ref2));
}
}
}
if (!call.getMetadataQuery().areColumnsUnique(nonFkInput, fkJoinColBuilder.build())) {
return;
}
// all conditions are met, therefore we can perform rewrite to remove fk side
rewrite(mode, fkInput, nonFkInput, join, topProjExprs, call, project, fkRemoval.nullableNodes);
}
}
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo in project hive by apache.
the class HiveFilterProjectTransposeRule method matches.
@Override
public boolean matches(RelOptRuleCall call) {
final Filter filterRel = call.rel(0);
// The condition fetched here can reference a udf that is not deterministic, but defined
// as part of the select list when a view is in play. But the condition after the pushdown
// will resolve to using the udf from select list. The check here for deterministic filters
// should be based on the resolved expression. Refer to test case cbo_ppd_non_deterministic.q.
RexNode condition = RelOptUtil.pushPastProject(filterRel.getCondition(), call.rel(1));
if (this.onlyDeterministic && !HiveCalciteUtil.isDeterministic(condition)) {
return false;
}
if (call.rels.length > 2) {
final Join joinRel = call.rel(2);
RewritablePKFKJoinInfo joinInfo = HiveRelOptUtil.isRewritablePKFKJoin(joinRel, joinRel.getLeft(), joinRel.getRight(), call.getMetadataQuery());
if (!joinInfo.rewritable) {
return false;
}
}
return super.matches(call);
}
use of org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo in project hive by apache.
the class HiveJoinSwapConstraintsRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final Join topJoin = call.rel(0);
final Join bottomJoin = call.rel(1);
final RexBuilder rexBuilder = topJoin.getCluster().getRexBuilder();
// 1) Check whether these joins can be swapped.
if (topJoin.getJoinType().generatesNullsOnLeft() || bottomJoin.getJoinType().generatesNullsOnLeft() || bottomJoin.isSemiJoin()) {
// Nothing to do
return;
}
// 2) Check whether the bottom is a non-filtering column appending join.
// - If the top one is a non-filtering column appending join, we do not
// trigger the optimization, since we do not want to swap this type of
// joins.
// - If the bottom one is not a non-filtering column appending join,
// we cannot trigger the optimization.
RewritablePKFKJoinInfo topInfo = HiveRelOptUtil.isRewritablePKFKJoin(topJoin, topJoin.getLeft(), topJoin.getRight(), call.getMetadataQuery());
RewritablePKFKJoinInfo bottomInfo = HiveRelOptUtil.isRewritablePKFKJoin(bottomJoin, bottomJoin.getLeft(), bottomJoin.getRight(), call.getMetadataQuery());
if (topInfo.rewritable || !bottomInfo.rewritable) {
// Nothing to do
return;
}
// 3) Rewrite.
// X is the left child of the join below
// Y is the right child of the join below
// Z is the right child of the top join
int nFieldsX = bottomJoin.getLeft().getRowType().getFieldList().size();
int nFieldsY = bottomJoin.getRight().getRowType().getFieldList().size();
int nFieldsZ = topJoin.getRight().getRowType().getFieldList().size();
int nTotalFields = nFieldsX + nFieldsY + nFieldsZ;
List<RelDataTypeField> fields = new ArrayList<>();
// create a list of fields for the full join result; note that
// we can't simply use the fields because the row-type of a
// semi-join would only include the left hand side fields
List<RelDataTypeField> joinFields = topJoin.getRowType().getFieldList();
for (int i = 0; i < (nFieldsX + nFieldsY); i++) {
fields.add(joinFields.get(i));
}
joinFields = topJoin.getRight().getRowType().getFieldList();
for (int i = 0; i < nFieldsZ; i++) {
fields.add(joinFields.get(i));
}
// determine which operands below the join are the actual
// rels that participate in it
final Set<Integer> leftKeys = HiveCalciteUtil.getInputRefs(topJoin.getCondition());
leftKeys.removeIf(i -> i >= topJoin.getLeft().getRowType().getFieldCount());
int nKeysFromX = 0;
for (int leftKey : leftKeys) {
if (leftKey < nFieldsX) {
nKeysFromX++;
}
}
// the keys must all originate from the left
if (nKeysFromX != leftKeys.size()) {
// Nothing to do
return;
}
// need to convert the conditions
// (X, Y, Z) --> (X, Z, Y)
int[] adjustments = new int[nTotalFields];
setJoinAdjustments(adjustments, nFieldsX, nFieldsY, nFieldsZ, nFieldsZ, -nFieldsY);
final RexNode newBottomCondition = topJoin.getCondition().accept(new RelOptUtil.RexInputConverter(rexBuilder, fields, adjustments));
// create the new joins
final Join newBottomJoin = topJoin.copy(topJoin.getTraitSet(), newBottomCondition, bottomJoin.getLeft(), topJoin.getRight(), topJoin.getJoinType(), topJoin.isSemiJoinDone());
final RexNode newTopCondition;
if (newBottomJoin.isSemiJoin()) {
newTopCondition = bottomJoin.getCondition();
} else {
newTopCondition = bottomJoin.getCondition().accept(new RelOptUtil.RexInputConverter(rexBuilder, fields, adjustments));
}
final Join newTopJoin = bottomJoin.copy(bottomJoin.getTraitSet(), newTopCondition, newBottomJoin, bottomJoin.getRight(), bottomJoin.getJoinType(), bottomJoin.isSemiJoinDone());
if (newBottomJoin.isSemiJoin()) {
call.transformTo(newTopJoin);
} else {
// need to swap the columns to match the original join
// (X, Y, Z) --> (X, Z, Y)
List<RexNode> exprs = new ArrayList<>();
for (int i = 0; i < nFieldsX; i++) {
exprs.add(rexBuilder.makeInputRef(newTopJoin, i));
}
for (int i = nFieldsX + nFieldsZ; i < topJoin.getRowType().getFieldCount(); i++) {
exprs.add(rexBuilder.makeInputRef(newTopJoin, i));
}
for (int i = nFieldsX; i < nFieldsX + nFieldsZ; i++) {
exprs.add(rexBuilder.makeInputRef(newTopJoin, i));
}
call.transformTo(call.builder().push(newTopJoin).project(exprs).build());
}
}
Aggregations