use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.
the class HiveRemoveSqCountCheck method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final Join topJoin = call.rel(0);
final Join join = call.rel(2);
final Aggregate aggregate = call.rel(6);
// in presence of grouping sets we can't remove sq_count_check
if (aggregate.indicator) {
return;
}
if (isAggregateWithoutGbyKeys(aggregate) || isAggWithConstantGbyKeys(aggregate, call)) {
// join(left, join.getRight)
RelNode newJoin = HiveJoin.getJoin(topJoin.getCluster(), join.getLeft(), topJoin.getRight(), topJoin.getCondition(), topJoin.getJoinType());
call.transformTo(newJoin);
}
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.
the class HiveJoinConstraintsRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final Project project = call.rel(0);
final RexBuilder rexBuilder = project.getCluster().getRexBuilder();
List<RexNode> topProjExprs = project.getProjects();
Join join = call.rel(1);
final JoinRelType joinType = join.getJoinType();
final RelNode leftInput = join.getLeft();
final RelNode rightInput = join.getRight();
final RexNode cond = join.getCondition();
// TODO:https://issues.apache.org/jira/browse/HIVE-23920
if (joinType == JoinRelType.ANTI) {
return;
}
// 1) If it is an inner, check whether project only uses columns from one side.
// That side will need to be the FK side.
// If it is a left outer, left will be the FK side.
// If it is a right outer, right will be the FK side.
final RelNode fkInput;
final RelNode nonFkInput;
final ImmutableBitSet topRefs = RelOptUtil.InputFinder.bits(topProjExprs, null);
final ImmutableBitSet leftBits = ImmutableBitSet.range(leftInput.getRowType().getFieldCount());
final ImmutableBitSet rightBits = ImmutableBitSet.range(leftInput.getRowType().getFieldCount(), join.getRowType().getFieldCount());
// These boolean values represent corresponding left, right input which is potential FK
boolean leftInputPotentialFK = topRefs.intersects(leftBits);
boolean rightInputPotentialFK = topRefs.intersects(rightBits);
if (leftInputPotentialFK && rightInputPotentialFK && (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI)) {
// Both inputs are referenced. Before making a decision, try to swap
// references in join condition if it is an inner join, i.e. if a join
// condition column is referenced above the join, then we can just
// reference the column from the other side.
// For example, given two relations R(a1,a2), S(b1) :
// SELECT a2, b1 FROM R, S ON R.a1=R.b1 =>
// SELECT a2, a1 FROM R, S ON R.a1=R.b1
int joinFieldCount = join.getRowType().getFieldCount();
Mapping mappingLR = Mappings.create(MappingType.PARTIAL_FUNCTION, joinFieldCount, joinFieldCount);
Mapping mappingRL = Mappings.create(MappingType.PARTIAL_FUNCTION, joinFieldCount, joinFieldCount);
for (RexNode conj : RelOptUtil.conjunctions(cond)) {
if (!conj.isA(SqlKind.EQUALS)) {
continue;
}
RexCall eq = (RexCall) conj;
RexNode op1 = eq.getOperands().get(0);
RexNode op2 = eq.getOperands().get(1);
if (op1 instanceof RexInputRef && op2 instanceof RexInputRef) {
// Check references
int ref1 = ((RexInputRef) op1).getIndex();
int ref2 = ((RexInputRef) op2).getIndex();
int leftRef = -1;
int rightRef = -1;
if (leftBits.get(ref1) && rightBits.get(ref2)) {
leftRef = ref1;
rightRef = ref2;
} else if (rightBits.get(ref1) && leftBits.get(ref2)) {
leftRef = ref2;
rightRef = ref1;
}
if (leftRef != -1 && rightRef != -1) {
// as it is useless
if (mappingLR.getTargetOpt(leftRef) == -1) {
mappingLR.set(leftRef, rightRef);
}
if (mappingRL.getTargetOpt(rightRef) == -1) {
mappingRL.set(rightRef, leftRef);
}
}
}
}
if (mappingLR.size() != 0) {
// First insert missing elements into the mapping as identity mappings
for (int i = 0; i < joinFieldCount; i++) {
if (mappingLR.getTargetOpt(i) == -1) {
mappingLR.set(i, i);
}
if (mappingRL.getTargetOpt(i) == -1) {
mappingRL.set(i, i);
}
}
// Then, we start by trying to reference only left side in top projections
List<RexNode> swappedTopProjExprs = topProjExprs.stream().map(projExpr -> projExpr.accept(new RexPermuteInputsShuttle(mappingRL, call.rel(1)))).collect(Collectors.toList());
rightInputPotentialFK = RelOptUtil.InputFinder.bits(swappedTopProjExprs, null).intersects(rightBits);
if (!rightInputPotentialFK) {
topProjExprs = swappedTopProjExprs;
} else {
// If it did not work, we try to reference only right side in top projections
swappedTopProjExprs = topProjExprs.stream().map(projExpr -> projExpr.accept(new RexPermuteInputsShuttle(mappingLR, call.rel(1)))).collect(Collectors.toList());
leftInputPotentialFK = RelOptUtil.InputFinder.bits(swappedTopProjExprs, null).intersects(leftBits);
if (!leftInputPotentialFK) {
topProjExprs = swappedTopProjExprs;
}
}
}
} else if (!leftInputPotentialFK && !rightInputPotentialFK) {
// TODO: There are no references in the project operator above.
// In this case, we should probably do two passes, one for
// left as FK and one for right as FK, although it may be expensive.
// Currently we only assume left as FK
leftInputPotentialFK = true;
}
final Mode mode;
switch(joinType) {
case SEMI:
case INNER:
// case ANTI: //TODO:https://issues.apache.org/jira/browse/HIVE-23920
if (leftInputPotentialFK && rightInputPotentialFK) {
// and there is nothing to transform
return;
}
fkInput = leftInputPotentialFK ? leftInput : rightInput;
nonFkInput = leftInputPotentialFK ? rightInput : leftInput;
mode = Mode.REMOVE;
break;
case LEFT:
fkInput = leftInput;
nonFkInput = rightInput;
mode = leftInputPotentialFK && !rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM;
break;
case RIGHT:
fkInput = rightInput;
nonFkInput = leftInput;
mode = !leftInputPotentialFK && rightInputPotentialFK ? Mode.REMOVE : Mode.TRANSFORM;
break;
default:
// Other type, bail out
return;
}
// 2) Check whether this join can be rewritten or removed
RewritablePKFKJoinInfo r = HiveRelOptUtil.isRewritablePKFKJoin(join, fkInput, nonFkInput, call.getMetadataQuery());
// 3) If it is the only condition, we can trigger the rewriting
if (r.rewritable) {
rewrite(mode, fkInput, nonFkInput, join, topProjExprs, call, project, r.nullableNodes);
} else {
// Possibly this could be enhanced to take other join type into consideration.
if (joinType != JoinRelType.INNER) {
return;
}
// first swap fk and non-fk input and see if we can rewrite them
RewritablePKFKJoinInfo fkRemoval = HiveRelOptUtil.isRewritablePKFKJoin(join, nonFkInput, fkInput, call.getMetadataQuery());
if (fkRemoval.rewritable) {
// we have established that nonFkInput is FK, and fkInput is PK
// and there is no row filtering on FK side
// check that FK side join column is distinct (i.e. have a group by)
ImmutableBitSet fkSideBitSet;
if (nonFkInput == leftInput) {
fkSideBitSet = leftBits;
} else {
fkSideBitSet = rightBits;
}
ImmutableBitSet.Builder fkJoinColBuilder = ImmutableBitSet.builder();
for (RexNode conj : RelOptUtil.conjunctions(cond)) {
if (!conj.isA(SqlKind.EQUALS)) {
return;
}
RexCall eq = (RexCall) conj;
RexNode op1 = eq.getOperands().get(0);
RexNode op2 = eq.getOperands().get(1);
if (op1 instanceof RexInputRef && op2 instanceof RexInputRef) {
// Check references
int ref1 = ((RexInputRef) op1).getIndex();
int ref2 = ((RexInputRef) op2).getIndex();
int leftRef = -1;
int rightRef = -1;
if (fkSideBitSet.get(ref1)) {
// check that join columns are not nullable
if (op1.getType().isNullable()) {
return;
}
fkJoinColBuilder.set(fkSideBitSet.indexOf(ref1));
} else {
if (op2.getType().isNullable()) {
return;
}
fkJoinColBuilder.set(fkSideBitSet.indexOf(ref2));
}
}
}
if (!call.getMetadataQuery().areColumnsUnique(nonFkInput, fkJoinColBuilder.build())) {
return;
}
// all conditions are met, therefore we can perform rewrite to remove fk side
rewrite(mode, fkInput, nonFkInput, join, topProjExprs, call, project, fkRemoval.nullableNodes);
}
}
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.
the class JDBCFilterJoinRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
Filter filter = call.rel(0);
Join join = call.rel(1);
super.perform(call, filter, join);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.
the class JDBCExtractJoinFilterRule method matches.
// ~ Methods ----------------------------------------------------------------
@Override
public boolean matches(RelOptRuleCall call) {
final Join join = call.rel(0);
final HiveJdbcConverter conv1 = call.rel(1);
final HiveJdbcConverter conv2 = call.rel(2);
if (!conv1.getJdbcDialect().equals(conv2.getJdbcDialect())) {
return false;
}
return JDBCAbstractSplitFilterRule.canSplitFilter(join.getCondition(), conv1.getJdbcDialect());
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.core.Join in project hive by apache.
the class HiveAggregateJoinTransposeRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
try {
final Aggregate aggregate = call.rel(0);
final Join join = call.rel(1);
final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
final RelBuilder relBuilder = call.builder();
// If any aggregate call has a filter, bail out
for (AggregateCall aggregateCall : aggregate.getAggCallList()) {
if (aggregateCall.getAggregation().unwrap(SqlSplittableAggFunction.class) == null) {
return;
}
if (aggregateCall.filterArg >= 0) {
return;
}
}
// aggregate operator
if (join.getJoinType() != JoinRelType.INNER) {
return;
}
if (!allowFunctions && !aggregate.getAggCallList().isEmpty()) {
return;
}
boolean groupingUnique = isGroupingUnique(join, aggregate.getGroupSet());
if (!groupingUnique && !costBased) {
// there is no need to check further - the transformation may not happen
return;
}
// Do the columns used by the join appear in the output of the aggregate?
final ImmutableBitSet aggregateColumns = aggregate.getGroupSet();
final RelMetadataQuery mq = call.getMetadataQuery();
final ImmutableBitSet keyColumns = keyColumns(aggregateColumns, mq.getPulledUpPredicates(join).pulledUpPredicates);
final ImmutableBitSet joinColumns = RelOptUtil.InputFinder.bits(join.getCondition());
final boolean allColumnsInAggregate = keyColumns.contains(joinColumns);
final ImmutableBitSet belowAggregateColumns = aggregateColumns.union(joinColumns);
// Split join condition
final List<Integer> leftKeys = Lists.newArrayList();
final List<Integer> rightKeys = Lists.newArrayList();
final List<Boolean> filterNulls = Lists.newArrayList();
RexNode nonEquiConj = RelOptUtil.splitJoinCondition(join.getLeft(), join.getRight(), join.getCondition(), leftKeys, rightKeys, filterNulls);
// If it contains non-equi join conditions, we bail out
if (!nonEquiConj.isAlwaysTrue()) {
return;
}
// Push each aggregate function down to each side that contains all of its
// arguments. Note that COUNT(*), because it has no arguments, can go to
// both sides.
final Map<Integer, Integer> map = new HashMap<>();
final List<Side> sides = new ArrayList<>();
int uniqueCount = 0;
int offset = 0;
int belowOffset = 0;
for (int s = 0; s < 2; s++) {
final Side side = new Side();
final RelNode joinInput = join.getInput(s);
int fieldCount = joinInput.getRowType().getFieldCount();
final ImmutableBitSet fieldSet = ImmutableBitSet.range(offset, offset + fieldCount);
final ImmutableBitSet belowAggregateKeyNotShifted = belowAggregateColumns.intersect(fieldSet);
for (Ord<Integer> c : Ord.zip(belowAggregateKeyNotShifted)) {
map.put(c.e, belowOffset + c.i);
}
final ImmutableBitSet belowAggregateKey = belowAggregateKeyNotShifted.shift(-offset);
final boolean unique;
if (!allowFunctions) {
assert aggregate.getAggCallList().isEmpty();
// If there are no functions, it doesn't matter as much whether we
// aggregate the inputs before the join, because there will not be
// any functions experiencing a cartesian product effect.
//
// But finding out whether the input is already unique requires a call
// to areColumnsUnique that currently (until [CALCITE-1048] "Make
// metadata more robust" is fixed) places a heavy load on
// the metadata system.
//
// So we choose to imagine the the input is already unique, which is
// untrue but harmless.
//
unique = true;
} else {
final Boolean unique0 = mq.areColumnsUnique(joinInput, belowAggregateKey, true);
unique = unique0 != null && unique0;
}
if (unique) {
++uniqueCount;
relBuilder.push(joinInput);
relBuilder.project(belowAggregateKey.asList().stream().map(relBuilder::field).collect(Collectors.toList()));
side.newInput = relBuilder.build();
} else {
List<AggregateCall> belowAggCalls = new ArrayList<>();
final SqlSplittableAggFunction.Registry<AggregateCall> belowAggCallRegistry = registry(belowAggCalls);
final Mappings.TargetMapping mapping = s == 0 ? Mappings.createIdentity(fieldCount) : Mappings.createShiftMapping(fieldCount + offset, 0, offset, fieldCount);
for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
final SqlAggFunction aggregation = aggCall.e.getAggregation();
final SqlSplittableAggFunction splitter = Preconditions.checkNotNull(aggregation.unwrap(SqlSplittableAggFunction.class));
final AggregateCall call1;
if (fieldSet.contains(ImmutableBitSet.of(aggCall.e.getArgList()))) {
call1 = splitter.split(aggCall.e, mapping);
} else {
call1 = splitter.other(rexBuilder.getTypeFactory(), aggCall.e);
}
if (call1 != null) {
side.split.put(aggCall.i, belowAggregateKey.cardinality() + belowAggCallRegistry.register(call1));
}
}
side.newInput = relBuilder.push(joinInput).aggregate(relBuilder.groupKey(belowAggregateKey, null), belowAggCalls).build();
}
offset += fieldCount;
belowOffset += side.newInput.getRowType().getFieldCount();
sides.add(side);
}
if (uniqueCount == 2) {
// invocation of this rule; if we continue we might loop forever.
return;
}
// Update condition
final Mapping mapping = (Mapping) Mappings.target(map::get, join.getRowType().getFieldCount(), belowOffset);
final RexNode newCondition = RexUtil.apply(mapping, join.getCondition());
// Create new join
relBuilder.push(sides.get(0).newInput).push(sides.get(1).newInput).join(join.getJoinType(), newCondition);
// Aggregate above to sum up the sub-totals
final List<AggregateCall> newAggCalls = new ArrayList<>();
final int groupIndicatorCount = aggregate.getGroupCount() + aggregate.getIndicatorCount();
final int newLeftWidth = sides.get(0).newInput.getRowType().getFieldCount();
final List<RexNode> projects = new ArrayList<>(rexBuilder.identityProjects(relBuilder.peek().getRowType()));
for (Ord<AggregateCall> aggCall : Ord.zip(aggregate.getAggCallList())) {
final SqlAggFunction aggregation = aggCall.e.getAggregation();
final SqlSplittableAggFunction splitter = Preconditions.checkNotNull(aggregation.unwrap(SqlSplittableAggFunction.class));
final Integer leftSubTotal = sides.get(0).split.get(aggCall.i);
final Integer rightSubTotal = sides.get(1).split.get(aggCall.i);
newAggCalls.add(splitter.topSplit(rexBuilder, registry(projects), groupIndicatorCount, relBuilder.peek().getRowType(), aggCall.e, leftSubTotal == null ? -1 : leftSubTotal, rightSubTotal == null ? -1 : rightSubTotal + newLeftWidth));
}
relBuilder.project(projects);
boolean aggConvertedToProjects = false;
if (allColumnsInAggregate) {
// let's see if we can convert aggregate into projects
List<RexNode> projects2 = new ArrayList<>();
for (int key : Mappings.apply(mapping, aggregate.getGroupSet())) {
projects2.add(relBuilder.field(key));
}
for (AggregateCall newAggCall : newAggCalls) {
final SqlSplittableAggFunction splitter = newAggCall.getAggregation().unwrap(SqlSplittableAggFunction.class);
if (splitter != null) {
final RelDataType rowType = relBuilder.peek().getRowType();
projects2.add(splitter.singleton(rexBuilder, rowType, newAggCall));
}
}
if (projects2.size() == aggregate.getGroupSet().cardinality() + newAggCalls.size()) {
// We successfully converted agg calls into projects.
relBuilder.project(projects2);
aggConvertedToProjects = true;
}
}
if (!aggConvertedToProjects) {
relBuilder.aggregate(relBuilder.groupKey(Mappings.apply(mapping, aggregate.getGroupSet()), Mappings.apply2(mapping, aggregate.getGroupSets())), newAggCalls);
}
RelNode r = relBuilder.build();
boolean transform = false;
if (uniqueBased && aggConvertedToProjects) {
transform = groupingUnique;
}
if (!transform && costBased) {
RelOptCost afterCost = mq.getCumulativeCost(r);
RelOptCost beforeCost = mq.getCumulativeCost(aggregate);
transform = afterCost.isLt(beforeCost);
}
if (transform) {
call.transformTo(r);
}
} catch (Exception e) {
if (noColsMissingStats.get() > 0) {
LOG.warn("Missing column stats (see previous messages), skipping aggregate-join transpose in CBO");
noColsMissingStats.set(0);
} else {
throw e;
}
}
}
Aggregations