use of org.apache.calcite.util.Pair in project hive by apache.
the class HiveRelMdSelectivity method getCombinedPredicateForJoin.
/**
* @param j
* @param additionalPredicate
* @return if predicate is the join condition return (true, joinCond)
* else return (false, minusPred)
*/
private Pair<Boolean, RexNode> getCombinedPredicateForJoin(Join j, RexNode additionalPredicate) {
RexNode minusPred = RelMdUtil.minusPreds(j.getCluster().getRexBuilder(), additionalPredicate, j.getCondition());
if (minusPred != null) {
List<RexNode> minusList = new ArrayList<RexNode>();
minusList.add(j.getCondition());
minusList.add(minusPred);
return new Pair<Boolean, RexNode>(false, minusPred);
}
return new Pair<Boolean, RexNode>(true, j.getCondition());
}
use of org.apache.calcite.util.Pair in project hive by apache.
the class HiveJoinToMultiJoinRule method mergeJoin.
// This method tries to merge the join with its left child. The left
// child should be a join for this to happen.
private static RelNode mergeJoin(HiveJoin join, RelNode left, RelNode right) {
final RexBuilder rexBuilder = join.getCluster().getRexBuilder();
// We check whether the join can be combined with any of its children
final List<RelNode> newInputs = Lists.newArrayList();
final List<RexNode> newJoinCondition = Lists.newArrayList();
final List<Pair<Integer, Integer>> joinInputs = Lists.newArrayList();
final List<JoinRelType> joinTypes = Lists.newArrayList();
final List<RexNode> joinFilters = Lists.newArrayList();
// Left child
if (left instanceof HiveJoin || left instanceof HiveMultiJoin) {
final RexNode leftCondition;
final List<Pair<Integer, Integer>> leftJoinInputs;
final List<JoinRelType> leftJoinTypes;
final List<RexNode> leftJoinFilters;
boolean combinable;
if (left instanceof HiveJoin) {
HiveJoin hj = (HiveJoin) left;
leftCondition = hj.getCondition();
leftJoinInputs = ImmutableList.of(Pair.of(0, 1));
leftJoinTypes = ImmutableList.of(hj.getJoinType());
leftJoinFilters = ImmutableList.of(hj.getJoinFilter());
try {
combinable = isCombinableJoin(join, hj);
} catch (CalciteSemanticException e) {
LOG.trace("Failed to merge join-join", e);
combinable = false;
}
} else {
HiveMultiJoin hmj = (HiveMultiJoin) left;
leftCondition = hmj.getCondition();
leftJoinInputs = hmj.getJoinInputs();
leftJoinTypes = hmj.getJoinTypes();
leftJoinFilters = hmj.getJoinFilters();
try {
combinable = isCombinableJoin(join, hmj);
} catch (CalciteSemanticException e) {
LOG.trace("Failed to merge join-multijoin", e);
combinable = false;
}
}
if (combinable) {
newJoinCondition.add(leftCondition);
for (int i = 0; i < leftJoinInputs.size(); i++) {
joinInputs.add(leftJoinInputs.get(i));
joinTypes.add(leftJoinTypes.get(i));
joinFilters.add(leftJoinFilters.get(i));
}
newInputs.addAll(left.getInputs());
} else {
// The join operation in the child is not on the same keys
return null;
}
} else {
// The left child is not a join or multijoin operator
return null;
}
final int numberLeftInputs = newInputs.size();
// Right child
newInputs.add(right);
// If we cannot combine any of the children, we bail out
newJoinCondition.add(join.getCondition());
if (newJoinCondition.size() == 1) {
return null;
}
final List<RelDataTypeField> systemFieldList = ImmutableList.of();
List<List<RexNode>> joinKeyExprs = new ArrayList<List<RexNode>>();
List<Integer> filterNulls = new ArrayList<Integer>();
for (int i = 0; i < newInputs.size(); i++) {
joinKeyExprs.add(new ArrayList<RexNode>());
}
RexNode filters;
try {
filters = HiveRelOptUtil.splitHiveJoinCondition(systemFieldList, newInputs, join.getCondition(), joinKeyExprs, filterNulls, null);
} catch (CalciteSemanticException e) {
LOG.trace("Failed to merge joins", e);
return null;
}
ImmutableBitSet.Builder keysInInputsBuilder = ImmutableBitSet.builder();
for (int i = 0; i < newInputs.size(); i++) {
List<RexNode> partialCondition = joinKeyExprs.get(i);
if (!partialCondition.isEmpty()) {
keysInInputsBuilder.set(i);
}
}
// If we cannot merge, we bail out
ImmutableBitSet keysInInputs = keysInInputsBuilder.build();
ImmutableBitSet leftReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs));
ImmutableBitSet rightReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs, newInputs.size()));
if (join.getJoinType() != JoinRelType.INNER && (leftReferencedInputs.cardinality() > 1 || rightReferencedInputs.cardinality() > 1)) {
return null;
}
// Otherwise, we add to the join specs
if (join.getJoinType() != JoinRelType.INNER) {
int leftInput = keysInInputs.nextSetBit(0);
int rightInput = keysInInputs.nextSetBit(numberLeftInputs);
joinInputs.add(Pair.of(leftInput, rightInput));
joinTypes.add(join.getJoinType());
joinFilters.add(filters);
} else {
for (int i : leftReferencedInputs) {
for (int j : rightReferencedInputs) {
joinInputs.add(Pair.of(i, j));
joinTypes.add(join.getJoinType());
joinFilters.add(filters);
}
}
}
// We can now create a multijoin operator
RexNode newCondition = RexUtil.flatten(rexBuilder, RexUtil.composeConjunction(rexBuilder, newJoinCondition, false));
List<RelNode> newInputsArray = Lists.newArrayList(newInputs);
JoinPredicateInfo joinPredInfo = null;
try {
joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(newInputsArray, systemFieldList, newCondition);
} catch (CalciteSemanticException e) {
throw new RuntimeException(e);
}
// If the number of joins < number of input tables-1, this is not a star join.
if (joinPredInfo.getEquiJoinPredicateElements().size() < newInputs.size() - 1) {
return null;
}
// Validate that the multi-join is a valid star join before returning it.
for (int i = 0; i < newInputs.size(); i++) {
List<RexNode> joinKeys = null;
for (int j = 0; j < joinPredInfo.getEquiJoinPredicateElements().size(); j++) {
List<RexNode> currJoinKeys = joinPredInfo.getEquiJoinPredicateElements().get(j).getJoinExprs(i);
if (currJoinKeys.isEmpty()) {
continue;
}
if (joinKeys == null) {
joinKeys = currJoinKeys;
} else {
// Bail out if this is the case.
if (!joinKeys.containsAll(currJoinKeys) || !currJoinKeys.containsAll(joinKeys)) {
return null;
}
}
}
}
return new HiveMultiJoin(join.getCluster(), newInputsArray, newCondition, join.getRowType(), joinInputs, joinTypes, joinFilters, joinPredInfo);
}
use of org.apache.calcite.util.Pair in project hive by apache.
the class HiveExpandDistinctAggregatesRule method createSelectDistinct.
/**
* Given an {@link org.apache.calcite.rel.logical.LogicalAggregate}
* and the ordinals of the arguments to a
* particular call to an aggregate function, creates a 'select distinct'
* relational expression which projects the group columns and those
* arguments but nothing else.
*
* <p>For example, given
*
* <blockquote>
* <pre>select f0, count(distinct f1), count(distinct f2)
* from t group by f0</pre>
* </blockquote>
*
* and the arglist
*
* <blockquote>{2}</blockquote>
*
* returns
*
* <blockquote>
* <pre>select distinct f0, f2 from t</pre>
* </blockquote>
*
* '
*
* <p>The <code>sourceOf</code> map is populated with the source of each
* column; in this case sourceOf.get(0) = 0, and sourceOf.get(1) = 2.</p>
*
* @param aggregate Aggregate relational expression
* @param argList Ordinals of columns to make distinct
* @param sourceOf Out parameter, is populated with a map of where each
* output field came from
* @return Aggregate relational expression which projects the required
* columns
*/
private static Aggregate createSelectDistinct(Aggregate aggregate, List<Integer> argList, Map<Integer, Integer> sourceOf) {
final List<Pair<RexNode, String>> projects = new ArrayList<Pair<RexNode, String>>();
final RelNode child = aggregate.getInput();
final List<RelDataTypeField> childFields = child.getRowType().getFieldList();
for (int i : aggregate.getGroupSet()) {
sourceOf.put(i, projects.size());
projects.add(RexInputRef.of2(i, childFields));
}
for (Integer arg : argList) {
if (sourceOf.get(arg) != null) {
continue;
}
sourceOf.put(arg, projects.size());
projects.add(RexInputRef.of2(arg, childFields));
}
final RelNode project = projFactory.createProject(child, Collections.emptyList(), Pair.left(projects), Pair.right(projects));
// to the agg functions.
return aggregate.copy(aggregate.getTraitSet(), project, false, ImmutableBitSet.range(projects.size()), null, ImmutableList.<AggregateCall>of());
}
use of org.apache.calcite.util.Pair in project hive by apache.
the class PartitionPrune method extractPartitionPredicates.
/**
* Breaks the predicate into 2 pieces. The first piece is the expressions that
* only contain partition columns and can be used for Partition Pruning; the
* second piece is the predicates that are left.
*
* @param cluster
* @param hiveTable
* @param predicate
* @return a Pair of expressions, each of which maybe null. The 1st predicate
* is expressions that only contain partition columns; the 2nd
* predicate contains the remaining predicates.
*/
public static Pair<RexNode, RexNode> extractPartitionPredicates(RelOptCluster cluster, RelOptHiveTable hiveTable, RexNode predicate) {
RexNode partitionPruningPred = predicate.accept(new ExtractPartPruningPredicate(cluster, hiveTable));
RexNode remainingPred = predicate.accept(new ExtractRemainingPredicate(cluster, partitionPruningPred));
return new Pair<RexNode, RexNode>(partitionPruningPred, remainingPred);
}
use of org.apache.calcite.util.Pair in project hive by apache.
the class HiveRelFieldTrimmer method generateGroupSetIfCardinalitySame.
// Given a groupset this tries to find out if the cardinality of the grouping columns could have changed
// because if not and it consist of keys (unique + not null OR pk), we can safely remove rest of the columns
// if those are columns are not being used further up
private ImmutableBitSet generateGroupSetIfCardinalitySame(final Aggregate aggregate, final ImmutableBitSet originalGroupSet, final ImmutableBitSet fieldsUsed) {
RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
RelMetadataQuery mq = aggregate.getCluster().getMetadataQuery();
// map from backtracked table ref to list of gb keys and list of corresponding backtracked columns
Map<RexTableInputRef.RelTableRef, List<Pair<Integer, Integer>>> mapGBKeysLineage = new HashMap<>();
// map from table ref to list of columns (from gb keys) which are candidate to be removed
Map<RexTableInputRef.RelTableRef, List<Integer>> candidateKeys = new HashMap<>();
for (int key : originalGroupSet) {
RexNode inputRef = rexBuilder.makeInputRef(aggregate.getInput(), key);
Set<RexNode> exprLineage = mq.getExpressionLineage(aggregate.getInput(), inputRef);
if (exprLineage != null && exprLineage.size() == 1) {
RexNode expr = exprLineage.iterator().next();
if (expr instanceof RexTableInputRef) {
RexTableInputRef tblRef = (RexTableInputRef) expr;
if (mapGBKeysLineage.containsKey(tblRef.getTableRef())) {
mapGBKeysLineage.get(tblRef.getTableRef()).add(Pair.of(tblRef.getIndex(), key));
} else {
List<Pair<Integer, Integer>> newList = new ArrayList<>();
newList.add(Pair.of(tblRef.getIndex(), key));
mapGBKeysLineage.put(tblRef.getTableRef(), newList);
}
} else if (RexUtil.isDeterministic(expr)) {
// even though we weren't able to backtrack this key it could still be candidate for removal
// if rest of the columns contain pk/unique
Set<RexTableInputRef.RelTableRef> tableRefs = RexUtil.gatherTableReferences(Lists.newArrayList(expr));
if (tableRefs.size() == 1) {
RexTableInputRef.RelTableRef tblRef = tableRefs.iterator().next();
if (candidateKeys.containsKey(tblRef)) {
List<Integer> candidateGBKeys = candidateKeys.get(tblRef);
candidateGBKeys.add(key);
} else {
List<Integer> candidateGBKeys = new ArrayList<>();
candidateGBKeys.add(key);
candidateKeys.put(tblRef, candidateGBKeys);
}
}
}
}
}
// we want to delete all columns in original GB set except the key
ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
for (Map.Entry<RexTableInputRef.RelTableRef, List<Pair<Integer, Integer>>> entry : mapGBKeysLineage.entrySet()) {
RelOptHiveTable tbl = (RelOptHiveTable) entry.getKey().getTable();
List<Pair<Integer, Integer>> gbKeyCols = entry.getValue();
ImmutableBitSet.Builder btBuilder = ImmutableBitSet.builder();
gbKeyCols.forEach(pair -> btBuilder.set(pair.left));
ImmutableBitSet backtrackedGBSet = btBuilder.build();
List<ImmutableBitSet> allKeys = tbl.getNonNullableKeys();
ImmutableBitSet currentKey = null;
for (ImmutableBitSet key : allKeys) {
if (backtrackedGBSet.contains(key)) {
// only if grouping sets consist of keys
currentKey = key;
break;
}
}
if (currentKey == null || currentKey.isEmpty()) {
continue;
}
// we have established that this gb set contains keys and it is safe to remove rest of the columns
for (Pair<Integer, Integer> gbKeyColPair : gbKeyCols) {
Integer backtrackedCol = gbKeyColPair.left;
Integer orgCol = gbKeyColPair.right;
if (!fieldsUsed.get(orgCol) && !currentKey.get(backtrackedCol)) {
// this could could be removed
builder.set(orgCol);
}
}
// remove candidate keys if possible
if (candidateKeys.containsKey(entry.getKey())) {
List<Integer> candidateGbKeys = candidateKeys.get(entry.getKey());
for (Integer keyToRemove : candidateGbKeys) {
if (!fieldsUsed.get(keyToRemove)) {
builder.set(keyToRemove);
}
}
}
}
ImmutableBitSet keysToRemove = builder.build();
ImmutableBitSet newGroupSet = originalGroupSet.except(keysToRemove);
assert (!newGroupSet.isEmpty());
return newGroupSet;
}
Aggregations