Search in sources :

Example 71 with Pair

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.Pair in project hive by apache.

the class HiveRelMdSelectivity method getCombinedPredicateForJoin.

/**
 * @param j
 * @param additionalPredicate
 * @return if predicate is the join condition return (true, joinCond)
 * else return (false, minusPred)
 */
private Pair<Boolean, RexNode> getCombinedPredicateForJoin(Join j, RexNode additionalPredicate) {
    RexNode minusPred = RelMdUtil.minusPreds(j.getCluster().getRexBuilder(), additionalPredicate, j.getCondition());
    if (minusPred != null) {
        List<RexNode> minusList = new ArrayList<RexNode>();
        minusList.add(j.getCondition());
        minusList.add(minusPred);
        return new Pair<Boolean, RexNode>(false, minusPred);
    }
    return new Pair<Boolean, RexNode>(true, j.getCondition());
}
Also used : ArrayList(java.util.ArrayList) RexNode(org.apache.calcite.rex.RexNode) Pair(org.apache.calcite.util.Pair)

Example 72 with Pair

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.Pair in project hive by apache.

the class HiveJoinToMultiJoinRule method mergeJoin.

// This method tries to merge the join with its left child. The left
// child should be a join for this to happen.
private static RelNode mergeJoin(HiveJoin join, RelNode left, RelNode right) {
    final RexBuilder rexBuilder = join.getCluster().getRexBuilder();
    // We check whether the join can be combined with any of its children
    final List<RelNode> newInputs = Lists.newArrayList();
    final List<RexNode> newJoinCondition = Lists.newArrayList();
    final List<Pair<Integer, Integer>> joinInputs = Lists.newArrayList();
    final List<JoinRelType> joinTypes = Lists.newArrayList();
    final List<RexNode> joinFilters = Lists.newArrayList();
    // Left child
    if (left instanceof HiveJoin || left instanceof HiveMultiJoin) {
        final RexNode leftCondition;
        final List<Pair<Integer, Integer>> leftJoinInputs;
        final List<JoinRelType> leftJoinTypes;
        final List<RexNode> leftJoinFilters;
        boolean combinable;
        if (left instanceof HiveJoin) {
            HiveJoin hj = (HiveJoin) left;
            leftCondition = hj.getCondition();
            leftJoinInputs = ImmutableList.of(Pair.of(0, 1));
            leftJoinTypes = ImmutableList.of(hj.getJoinType());
            leftJoinFilters = ImmutableList.of(hj.getJoinFilter());
            try {
                combinable = isCombinableJoin(join, hj);
            } catch (CalciteSemanticException e) {
                LOG.trace("Failed to merge join-join", e);
                combinable = false;
            }
        } else {
            HiveMultiJoin hmj = (HiveMultiJoin) left;
            leftCondition = hmj.getCondition();
            leftJoinInputs = hmj.getJoinInputs();
            leftJoinTypes = hmj.getJoinTypes();
            leftJoinFilters = hmj.getJoinFilters();
            try {
                combinable = isCombinableJoin(join, hmj);
            } catch (CalciteSemanticException e) {
                LOG.trace("Failed to merge join-multijoin", e);
                combinable = false;
            }
        }
        if (combinable) {
            newJoinCondition.add(leftCondition);
            for (int i = 0; i < leftJoinInputs.size(); i++) {
                joinInputs.add(leftJoinInputs.get(i));
                joinTypes.add(leftJoinTypes.get(i));
                joinFilters.add(leftJoinFilters.get(i));
            }
            newInputs.addAll(left.getInputs());
        } else {
            // The join operation in the child is not on the same keys
            return null;
        }
    } else {
        // The left child is not a join or multijoin operator
        return null;
    }
    final int numberLeftInputs = newInputs.size();
    // Right child
    newInputs.add(right);
    // If we cannot combine any of the children, we bail out
    newJoinCondition.add(join.getCondition());
    if (newJoinCondition.size() == 1) {
        return null;
    }
    final List<RelDataTypeField> systemFieldList = ImmutableList.of();
    List<List<RexNode>> joinKeyExprs = new ArrayList<List<RexNode>>();
    List<Integer> filterNulls = new ArrayList<Integer>();
    for (int i = 0; i < newInputs.size(); i++) {
        joinKeyExprs.add(new ArrayList<RexNode>());
    }
    RexNode filters;
    try {
        filters = HiveRelOptUtil.splitHiveJoinCondition(systemFieldList, newInputs, join.getCondition(), joinKeyExprs, filterNulls, null);
    } catch (CalciteSemanticException e) {
        LOG.trace("Failed to merge joins", e);
        return null;
    }
    ImmutableBitSet.Builder keysInInputsBuilder = ImmutableBitSet.builder();
    for (int i = 0; i < newInputs.size(); i++) {
        List<RexNode> partialCondition = joinKeyExprs.get(i);
        if (!partialCondition.isEmpty()) {
            keysInInputsBuilder.set(i);
        }
    }
    // If we cannot merge, we bail out
    ImmutableBitSet keysInInputs = keysInInputsBuilder.build();
    ImmutableBitSet leftReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs));
    ImmutableBitSet rightReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs, newInputs.size()));
    if (join.getJoinType() != JoinRelType.INNER && (leftReferencedInputs.cardinality() > 1 || rightReferencedInputs.cardinality() > 1)) {
        return null;
    }
    // Otherwise, we add to the join specs
    if (join.getJoinType() != JoinRelType.INNER) {
        int leftInput = keysInInputs.nextSetBit(0);
        int rightInput = keysInInputs.nextSetBit(numberLeftInputs);
        joinInputs.add(Pair.of(leftInput, rightInput));
        joinTypes.add(join.getJoinType());
        joinFilters.add(filters);
    } else {
        for (int i : leftReferencedInputs) {
            for (int j : rightReferencedInputs) {
                joinInputs.add(Pair.of(i, j));
                joinTypes.add(join.getJoinType());
                joinFilters.add(filters);
            }
        }
    }
    // We can now create a multijoin operator
    RexNode newCondition = RexUtil.flatten(rexBuilder, RexUtil.composeConjunction(rexBuilder, newJoinCondition, false));
    List<RelNode> newInputsArray = Lists.newArrayList(newInputs);
    JoinPredicateInfo joinPredInfo = null;
    try {
        joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(newInputsArray, systemFieldList, newCondition);
    } catch (CalciteSemanticException e) {
        throw new RuntimeException(e);
    }
    // If the number of joins < number of input tables-1, this is not a star join.
    if (joinPredInfo.getEquiJoinPredicateElements().size() < newInputs.size() - 1) {
        return null;
    }
    // Validate that the multi-join is a valid star join before returning it.
    for (int i = 0; i < newInputs.size(); i++) {
        List<RexNode> joinKeys = null;
        for (int j = 0; j < joinPredInfo.getEquiJoinPredicateElements().size(); j++) {
            List<RexNode> currJoinKeys = joinPredInfo.getEquiJoinPredicateElements().get(j).getJoinExprs(i);
            if (currJoinKeys.isEmpty()) {
                continue;
            }
            if (joinKeys == null) {
                joinKeys = currJoinKeys;
            } else {
                // Bail out if this is the case.
                if (!joinKeys.containsAll(currJoinKeys) || !currJoinKeys.containsAll(joinKeys)) {
                    return null;
                }
            }
        }
    }
    return new HiveMultiJoin(join.getCluster(), newInputsArray, newCondition, join.getRowType(), joinInputs, joinTypes, joinFilters, joinPredInfo);
}
Also used : HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) RexBuilder(org.apache.calcite.rex.RexBuilder) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Pair(org.apache.calcite.util.Pair) JoinRelType(org.apache.calcite.rel.core.JoinRelType) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) JoinPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo) RexNode(org.apache.calcite.rex.RexNode)

Example 73 with Pair

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.Pair in project hive by apache.

the class HiveExpandDistinctAggregatesRule method createSelectDistinct.

/**
 * Given an {@link org.apache.calcite.rel.logical.LogicalAggregate}
 * and the ordinals of the arguments to a
 * particular call to an aggregate function, creates a 'select distinct'
 * relational expression which projects the group columns and those
 * arguments but nothing else.
 *
 * <p>For example, given
 *
 * <blockquote>
 * <pre>select f0, count(distinct f1), count(distinct f2)
 * from t group by f0</pre>
 * </blockquote>
 *
 * and the arglist
 *
 * <blockquote>{2}</blockquote>
 *
 * returns
 *
 * <blockquote>
 * <pre>select distinct f0, f2 from t</pre>
 * </blockquote>
 *
 * '
 *
 * <p>The <code>sourceOf</code> map is populated with the source of each
 * column; in this case sourceOf.get(0) = 0, and sourceOf.get(1) = 2.</p>
 *
 * @param aggregate Aggregate relational expression
 * @param argList   Ordinals of columns to make distinct
 * @param sourceOf  Out parameter, is populated with a map of where each
 *                  output field came from
 * @return Aggregate relational expression which projects the required
 * columns
 */
private static Aggregate createSelectDistinct(Aggregate aggregate, List<Integer> argList, Map<Integer, Integer> sourceOf) {
    final List<Pair<RexNode, String>> projects = new ArrayList<Pair<RexNode, String>>();
    final RelNode child = aggregate.getInput();
    final List<RelDataTypeField> childFields = child.getRowType().getFieldList();
    for (int i : aggregate.getGroupSet()) {
        sourceOf.put(i, projects.size());
        projects.add(RexInputRef.of2(i, childFields));
    }
    for (Integer arg : argList) {
        if (sourceOf.get(arg) != null) {
            continue;
        }
        sourceOf.put(arg, projects.size());
        projects.add(RexInputRef.of2(arg, childFields));
    }
    final RelNode project = projFactory.createProject(child, Collections.emptyList(), Pair.left(projects), Pair.right(projects));
    // to the agg functions.
    return aggregate.copy(aggregate.getTraitSet(), project, false, ImmutableBitSet.range(projects.size()), null, ImmutableList.<AggregateCall>of());
}
Also used : RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) RelNode(org.apache.calcite.rel.RelNode) ArrayList(java.util.ArrayList) Pair(org.apache.calcite.util.Pair) RexNode(org.apache.calcite.rex.RexNode)

Example 74 with Pair

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.Pair in project hive by apache.

the class PartitionPrune method extractPartitionPredicates.

/**
 * Breaks the predicate into 2 pieces. The first piece is the expressions that
 * only contain partition columns and can be used for Partition Pruning; the
 * second piece is the predicates that are left.
 *
 * @param cluster
 * @param hiveTable
 * @param predicate
 * @return a Pair of expressions, each of which maybe null. The 1st predicate
 *         is expressions that only contain partition columns; the 2nd
 *         predicate contains the remaining predicates.
 */
public static Pair<RexNode, RexNode> extractPartitionPredicates(RelOptCluster cluster, RelOptHiveTable hiveTable, RexNode predicate) {
    RexNode partitionPruningPred = predicate.accept(new ExtractPartPruningPredicate(cluster, hiveTable));
    RexNode remainingPred = predicate.accept(new ExtractRemainingPredicate(cluster, partitionPruningPred));
    return new Pair<RexNode, RexNode>(partitionPruningPred, remainingPred);
}
Also used : RexNode(org.apache.calcite.rex.RexNode) Pair(org.apache.calcite.util.Pair)

Example 75 with Pair

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.Pair in project hive by apache.

the class HiveRelFieldTrimmer method generateGroupSetIfCardinalitySame.

// Given a groupset this tries to find out if the cardinality of the grouping columns could have changed
// because if not and it consist of keys (unique + not null OR pk), we can safely remove rest of the columns
// if those are columns are not being used further up
private ImmutableBitSet generateGroupSetIfCardinalitySame(final Aggregate aggregate, final ImmutableBitSet originalGroupSet, final ImmutableBitSet fieldsUsed) {
    RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
    RelMetadataQuery mq = aggregate.getCluster().getMetadataQuery();
    // map from backtracked table ref to list of gb keys and list of corresponding backtracked columns
    Map<RexTableInputRef.RelTableRef, List<Pair<Integer, Integer>>> mapGBKeysLineage = new HashMap<>();
    // map from table ref to list of columns (from gb keys) which are candidate to be removed
    Map<RexTableInputRef.RelTableRef, List<Integer>> candidateKeys = new HashMap<>();
    for (int key : originalGroupSet) {
        RexNode inputRef = rexBuilder.makeInputRef(aggregate.getInput(), key);
        Set<RexNode> exprLineage = mq.getExpressionLineage(aggregate.getInput(), inputRef);
        if (exprLineage != null && exprLineage.size() == 1) {
            RexNode expr = exprLineage.iterator().next();
            if (expr instanceof RexTableInputRef) {
                RexTableInputRef tblRef = (RexTableInputRef) expr;
                if (mapGBKeysLineage.containsKey(tblRef.getTableRef())) {
                    mapGBKeysLineage.get(tblRef.getTableRef()).add(Pair.of(tblRef.getIndex(), key));
                } else {
                    List<Pair<Integer, Integer>> newList = new ArrayList<>();
                    newList.add(Pair.of(tblRef.getIndex(), key));
                    mapGBKeysLineage.put(tblRef.getTableRef(), newList);
                }
            } else if (RexUtil.isDeterministic(expr)) {
                // even though we weren't able to backtrack this key it could still be candidate for removal
                // if rest of the columns contain pk/unique
                Set<RexTableInputRef.RelTableRef> tableRefs = RexUtil.gatherTableReferences(Lists.newArrayList(expr));
                if (tableRefs.size() == 1) {
                    RexTableInputRef.RelTableRef tblRef = tableRefs.iterator().next();
                    if (candidateKeys.containsKey(tblRef)) {
                        List<Integer> candidateGBKeys = candidateKeys.get(tblRef);
                        candidateGBKeys.add(key);
                    } else {
                        List<Integer> candidateGBKeys = new ArrayList<>();
                        candidateGBKeys.add(key);
                        candidateKeys.put(tblRef, candidateGBKeys);
                    }
                }
            }
        }
    }
    // we want to delete all columns in original GB set except the key
    ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
    for (Map.Entry<RexTableInputRef.RelTableRef, List<Pair<Integer, Integer>>> entry : mapGBKeysLineage.entrySet()) {
        RelOptHiveTable tbl = (RelOptHiveTable) entry.getKey().getTable();
        List<Pair<Integer, Integer>> gbKeyCols = entry.getValue();
        ImmutableBitSet.Builder btBuilder = ImmutableBitSet.builder();
        gbKeyCols.forEach(pair -> btBuilder.set(pair.left));
        ImmutableBitSet backtrackedGBSet = btBuilder.build();
        List<ImmutableBitSet> allKeys = tbl.getNonNullableKeys();
        ImmutableBitSet currentKey = null;
        for (ImmutableBitSet key : allKeys) {
            if (backtrackedGBSet.contains(key)) {
                // only if grouping sets consist of keys
                currentKey = key;
                break;
            }
        }
        if (currentKey == null || currentKey.isEmpty()) {
            continue;
        }
        // we have established that this gb set contains keys and it is safe to remove rest of the columns
        for (Pair<Integer, Integer> gbKeyColPair : gbKeyCols) {
            Integer backtrackedCol = gbKeyColPair.left;
            Integer orgCol = gbKeyColPair.right;
            if (!fieldsUsed.get(orgCol) && !currentKey.get(backtrackedCol)) {
                // this could could be removed
                builder.set(orgCol);
            }
        }
        // remove candidate keys if possible
        if (candidateKeys.containsKey(entry.getKey())) {
            List<Integer> candidateGbKeys = candidateKeys.get(entry.getKey());
            for (Integer keyToRemove : candidateGbKeys) {
                if (!fieldsUsed.get(keyToRemove)) {
                    builder.set(keyToRemove);
                }
            }
        }
    }
    ImmutableBitSet keysToRemove = builder.build();
    ImmutableBitSet newGroupSet = originalGroupSet.except(keysToRemove);
    assert (!newGroupSet.isEmpty());
    return newGroupSet;
}
Also used : RelMetadataQuery(org.apache.calcite.rel.metadata.RelMetadataQuery) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) Set(java.util.Set) LinkedHashSet(java.util.LinkedHashSet) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) RexBuilder(org.apache.calcite.rex.RexBuilder) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) IntPair(org.apache.calcite.util.mapping.IntPair) Pair(org.apache.calcite.util.Pair) RexTableInputRef(org.apache.calcite.rex.RexTableInputRef) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) Map(java.util.Map) HashMap(java.util.HashMap) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

Pair (org.apache.calcite.util.Pair)112 RexNode (org.apache.calcite.rex.RexNode)72 ArrayList (java.util.ArrayList)70 RelNode (org.apache.calcite.rel.RelNode)59 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)55 RexInputRef (org.apache.calcite.rex.RexInputRef)29 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)29 HashMap (java.util.HashMap)26 RexBuilder (org.apache.calcite.rex.RexBuilder)23 Map (java.util.Map)21 AggregateCall (org.apache.calcite.rel.core.AggregateCall)20 List (java.util.List)19 RelDataType (org.apache.calcite.rel.type.RelDataType)19 ImmutableList (com.google.common.collect.ImmutableList)18 JoinRelType (org.apache.calcite.rel.core.JoinRelType)16 TreeMap (java.util.TreeMap)14 RelDataTypeFactory (org.apache.calcite.rel.type.RelDataTypeFactory)13 RelBuilder (org.apache.calcite.tools.RelBuilder)13 ImmutableMap (com.google.common.collect.ImmutableMap)12 ImmutableSortedMap (com.google.common.collect.ImmutableSortedMap)12