Search in sources :

Example 21 with Pair

use of org.apache.calcite.util.Pair in project hive by apache.

the class Vectorizer method validateAggregationDesc.

private Pair<Boolean, Boolean> validateAggregationDesc(AggregationDesc aggDesc, ProcessingMode processingMode, boolean hasKeys) {
    String udfName = aggDesc.getGenericUDAFName().toLowerCase();
    if (!supportedAggregationUdfs.contains(udfName)) {
        setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported");
        return new Pair<Boolean, Boolean>(false, false);
    }
    if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters(), "Aggregation Function UDF " + udfName + " parameter")) {
        return new Pair<Boolean, Boolean>(false, false);
    }
    // See if we can vectorize the aggregation.
    VectorizationContext vc = new ValidatorVectorizationContext(hiveConf);
    VectorAggregateExpression vectorAggrExpr;
    try {
        vectorAggrExpr = vc.getAggregatorExpression(aggDesc);
    } catch (Exception e) {
        // We should have already attempted to vectorize in validateAggregationDesc.
        if (LOG.isDebugEnabled()) {
            LOG.debug("Vectorization of aggregation should have succeeded ", e);
        }
        setExpressionIssue("Aggregation Function", "Vectorization of aggreation should have succeeded " + e);
        return new Pair<Boolean, Boolean>(false, false);
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Aggregation " + aggDesc.getExprString() + " --> " + " vector expression " + vectorAggrExpr.toString());
    }
    ObjectInspector.Category outputCategory = aggregationOutputCategory(vectorAggrExpr);
    boolean outputIsPrimitive = (outputCategory == ObjectInspector.Category.PRIMITIVE);
    if (processingMode == ProcessingMode.MERGE_PARTIAL && hasKeys && !outputIsPrimitive) {
        setOperatorIssue("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types");
        return new Pair<Boolean, Boolean>(false, false);
    }
    return new Pair<Boolean, Boolean>(true, outputIsPrimitive);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) VectorAggregateExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression) VectorizationContext(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) UDFToBoolean(org.apache.hadoop.hive.ql.udf.UDFToBoolean) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Pair(org.apache.calcite.util.Pair) ImmutablePair(org.apache.commons.lang3.tuple.ImmutablePair)

Example 22 with Pair

use of org.apache.calcite.util.Pair in project hive by apache.

the class HiveCalciteUtil method projectNonColumnEquiConditions.

/**
   * Push any equi join conditions that are not column references as Projections
   * on top of the children.
   *
   * @param factory
   *          Project factory to use.
   * @param inputRels
   *          inputs to a join
   * @param leftJoinKeys
   *          expressions for LHS of join key
   * @param rightJoinKeys
   *          expressions for RHS of join key
   * @param systemColCount
   *          number of system columns, usually zero. These columns are
   *          projected at the leading edge of the output row.
   * @param leftKeys
   *          on return this contains the join key positions from the new
   *          project rel on the LHS.
   * @param rightKeys
   *          on return this contains the join key positions from the new
   *          project rel on the RHS.
   * @return the join condition after the equi expressions pushed down.
   */
public static RexNode projectNonColumnEquiConditions(ProjectFactory factory, RelNode[] inputRels, List<RexNode> leftJoinKeys, List<RexNode> rightJoinKeys, int systemColCount, List<Integer> leftKeys, List<Integer> rightKeys) {
    RelNode leftRel = inputRels[0];
    RelNode rightRel = inputRels[1];
    RexBuilder rexBuilder = leftRel.getCluster().getRexBuilder();
    RexNode outJoinCond = null;
    int origLeftInputSize = leftRel.getRowType().getFieldCount();
    int origRightInputSize = rightRel.getRowType().getFieldCount();
    List<RexNode> newLeftFields = new ArrayList<RexNode>();
    List<String> newLeftFieldNames = new ArrayList<String>();
    List<RexNode> newRightFields = new ArrayList<RexNode>();
    List<String> newRightFieldNames = new ArrayList<String>();
    int leftKeyCount = leftJoinKeys.size();
    int i;
    for (i = 0; i < origLeftInputSize; i++) {
        final RelDataTypeField field = leftRel.getRowType().getFieldList().get(i);
        newLeftFields.add(rexBuilder.makeInputRef(field.getType(), i));
        newLeftFieldNames.add(field.getName());
    }
    for (i = 0; i < origRightInputSize; i++) {
        final RelDataTypeField field = rightRel.getRowType().getFieldList().get(i);
        newRightFields.add(rexBuilder.makeInputRef(field.getType(), i));
        newRightFieldNames.add(field.getName());
    }
    ImmutableBitSet.Builder origColEqCondsPosBuilder = ImmutableBitSet.builder();
    int newKeyCount = 0;
    List<Pair<Integer, Integer>> origColEqConds = new ArrayList<Pair<Integer, Integer>>();
    for (i = 0; i < leftKeyCount; i++) {
        RexNode leftKey = leftJoinKeys.get(i);
        RexNode rightKey = rightJoinKeys.get(i);
        if (leftKey instanceof RexInputRef && rightKey instanceof RexInputRef) {
            origColEqConds.add(Pair.of(((RexInputRef) leftKey).getIndex(), ((RexInputRef) rightKey).getIndex()));
            origColEqCondsPosBuilder.set(i);
        } else {
            newLeftFields.add(leftKey);
            newLeftFieldNames.add(null);
            newRightFields.add(rightKey);
            newRightFieldNames.add(null);
            newKeyCount++;
        }
    }
    ImmutableBitSet origColEqCondsPos = origColEqCondsPosBuilder.build();
    for (i = 0; i < origColEqConds.size(); i++) {
        Pair<Integer, Integer> p = origColEqConds.get(i);
        int condPos = origColEqCondsPos.nth(i);
        RexNode leftKey = leftJoinKeys.get(condPos);
        RexNode rightKey = rightJoinKeys.get(condPos);
        leftKeys.add(p.left);
        rightKeys.add(p.right);
        RexNode cond = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, rexBuilder.makeInputRef(leftKey.getType(), systemColCount + p.left), rexBuilder.makeInputRef(rightKey.getType(), systemColCount + origLeftInputSize + newKeyCount + p.right));
        if (outJoinCond == null) {
            outJoinCond = cond;
        } else {
            outJoinCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, outJoinCond, cond);
        }
    }
    if (newKeyCount == 0) {
        return outJoinCond;
    }
    int newLeftOffset = systemColCount + origLeftInputSize;
    int newRightOffset = systemColCount + origLeftInputSize + origRightInputSize + newKeyCount;
    for (i = 0; i < newKeyCount; i++) {
        leftKeys.add(origLeftInputSize + i);
        rightKeys.add(origRightInputSize + i);
        RexNode cond = rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, rexBuilder.makeInputRef(newLeftFields.get(origLeftInputSize + i).getType(), newLeftOffset + i), rexBuilder.makeInputRef(newRightFields.get(origRightInputSize + i).getType(), newRightOffset + i));
        if (outJoinCond == null) {
            outJoinCond = cond;
        } else {
            outJoinCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, outJoinCond, cond);
        }
    }
    // fields
    if (newKeyCount > 0) {
        leftRel = factory.createProject(leftRel, newLeftFields, SqlValidatorUtil.uniquify(newLeftFieldNames));
        rightRel = factory.createProject(rightRel, newRightFields, SqlValidatorUtil.uniquify(newRightFieldNames));
    }
    inputRels[0] = leftRel;
    inputRels[1] = rightRel;
    return outJoinCond;
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) RexBuilder(org.apache.calcite.rex.RexBuilder) RexInputRef(org.apache.calcite.rex.RexInputRef) RexNode(org.apache.calcite.rex.RexNode) Pair(org.apache.calcite.util.Pair)

Example 23 with Pair

use of org.apache.calcite.util.Pair in project hive by apache.

the class HiveExpandDistinctAggregatesRule method createSelectDistinct.

/**
   * Given an {@link org.apache.calcite.rel.logical.LogicalAggregate}
   * and the ordinals of the arguments to a
   * particular call to an aggregate function, creates a 'select distinct'
   * relational expression which projects the group columns and those
   * arguments but nothing else.
   *
   * <p>For example, given
   *
   * <blockquote>
   * <pre>select f0, count(distinct f1), count(distinct f2)
   * from t group by f0</pre>
   * </blockquote>
   *
   * and the arglist
   *
   * <blockquote>{2}</blockquote>
   *
   * returns
   *
   * <blockquote>
   * <pre>select distinct f0, f2 from t</pre>
   * </blockquote>
   *
   * '
   *
   * <p>The <code>sourceOf</code> map is populated with the source of each
   * column; in this case sourceOf.get(0) = 0, and sourceOf.get(1) = 2.</p>
   *
   * @param aggregate Aggregate relational expression
   * @param argList   Ordinals of columns to make distinct
   * @param sourceOf  Out parameter, is populated with a map of where each
   *                  output field came from
   * @return Aggregate relational expression which projects the required
   * columns
   */
private static Aggregate createSelectDistinct(Aggregate aggregate, List<Integer> argList, Map<Integer, Integer> sourceOf) {
    final List<Pair<RexNode, String>> projects = new ArrayList<Pair<RexNode, String>>();
    final RelNode child = aggregate.getInput();
    final List<RelDataTypeField> childFields = child.getRowType().getFieldList();
    for (int i : aggregate.getGroupSet()) {
        sourceOf.put(i, projects.size());
        projects.add(RexInputRef.of2(i, childFields));
    }
    for (Integer arg : argList) {
        if (sourceOf.get(arg) != null) {
            continue;
        }
        sourceOf.put(arg, projects.size());
        projects.add(RexInputRef.of2(arg, childFields));
    }
    final RelNode project = projFactory.createProject(child, Pair.left(projects), Pair.right(projects));
    // to the agg functions.
    return aggregate.copy(aggregate.getTraitSet(), project, false, ImmutableBitSet.range(projects.size()), null, ImmutableList.<AggregateCall>of());
}
Also used : RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) HiveRelNode(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode) RelNode(org.apache.calcite.rel.RelNode) ArrayList(java.util.ArrayList) RexNode(org.apache.calcite.rex.RexNode) Pair(org.apache.calcite.util.Pair)

Example 24 with Pair

use of org.apache.calcite.util.Pair in project hive by apache.

the class HiveSubQRemoveRelBuilder method join.

/** Creates a {@link org.apache.calcite.rel.core.Join} with correlating
   * variables. */
public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition, Set<CorrelationId> variablesSet) {
    Frame right = stack.pop();
    final Frame left = stack.pop();
    final RelNode join;
    final boolean correlate = variablesSet.size() == 1;
    RexNode postCondition = literal(true);
    if (correlate) {
        final CorrelationId id = Iterables.getOnlyElement(variablesSet);
        final ImmutableBitSet requiredColumns = RelOptUtil.correlationColumns(id, right.rel);
        if (!RelOptUtil.notContainsCorrelation(left.rel, id, Litmus.IGNORE)) {
            throw new IllegalArgumentException("variable " + id + " must not be used by left input to correlation");
        }
        switch(joinType) {
            case LEFT:
                // Correlate does not have an ON clause.
                // For a LEFT correlate, predicate must be evaluated first.
                // For INNER, we can defer.
                stack.push(right);
                filter(condition.accept(new Shifter(left.rel, id, right.rel)));
                right = stack.pop();
                break;
            default:
                postCondition = condition;
        }
        join = correlateFactory.createCorrelate(left.rel, right.rel, id, requiredColumns, SemiJoinType.of(joinType));
    } else {
        join = joinFactory.createJoin(left.rel, right.rel, condition, variablesSet, joinType, false);
    }
    final List<Pair<String, RelDataType>> pairs = new ArrayList<>();
    pairs.addAll(left.right);
    pairs.addAll(right.right);
    stack.push(new Frame(join, ImmutableList.copyOf(pairs)));
    filter(postCondition);
    return this;
}
Also used : RelNode(org.apache.calcite.rel.RelNode) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) CorrelationId(org.apache.calcite.rel.core.CorrelationId) RexNode(org.apache.calcite.rex.RexNode) Pair(org.apache.calcite.util.Pair)

Example 25 with Pair

use of org.apache.calcite.util.Pair in project hive by apache.

the class HiveJoinToMultiJoinRule method mergeJoin.

// This method tries to merge the join with its left child. The left
// child should be a join for this to happen.
private static RelNode mergeJoin(HiveJoin join, RelNode left, RelNode right) {
    final RexBuilder rexBuilder = join.getCluster().getRexBuilder();
    // We check whether the join can be combined with any of its children
    final List<RelNode> newInputs = Lists.newArrayList();
    final List<RexNode> newJoinCondition = Lists.newArrayList();
    final List<Pair<Integer, Integer>> joinInputs = Lists.newArrayList();
    final List<JoinRelType> joinTypes = Lists.newArrayList();
    final List<RexNode> joinFilters = Lists.newArrayList();
    // Left child
    if (left instanceof HiveJoin || left instanceof HiveMultiJoin) {
        final RexNode leftCondition;
        final List<Pair<Integer, Integer>> leftJoinInputs;
        final List<JoinRelType> leftJoinTypes;
        final List<RexNode> leftJoinFilters;
        boolean combinable;
        if (left instanceof HiveJoin) {
            HiveJoin hj = (HiveJoin) left;
            leftCondition = hj.getCondition();
            leftJoinInputs = ImmutableList.of(Pair.of(0, 1));
            leftJoinTypes = ImmutableList.of(hj.getJoinType());
            leftJoinFilters = ImmutableList.of(hj.getJoinFilter());
            try {
                combinable = isCombinableJoin(join, hj);
            } catch (CalciteSemanticException e) {
                LOG.trace("Failed to merge join-join", e);
                combinable = false;
            }
        } else {
            HiveMultiJoin hmj = (HiveMultiJoin) left;
            leftCondition = hmj.getCondition();
            leftJoinInputs = hmj.getJoinInputs();
            leftJoinTypes = hmj.getJoinTypes();
            leftJoinFilters = hmj.getJoinFilters();
            try {
                combinable = isCombinableJoin(join, hmj);
            } catch (CalciteSemanticException e) {
                LOG.trace("Failed to merge join-multijoin", e);
                combinable = false;
            }
        }
        if (combinable) {
            newJoinCondition.add(leftCondition);
            for (int i = 0; i < leftJoinInputs.size(); i++) {
                joinInputs.add(leftJoinInputs.get(i));
                joinTypes.add(leftJoinTypes.get(i));
                joinFilters.add(leftJoinFilters.get(i));
            }
            newInputs.addAll(left.getInputs());
        } else {
            // The join operation in the child is not on the same keys
            return null;
        }
    } else {
        // The left child is not a join or multijoin operator
        return null;
    }
    final int numberLeftInputs = newInputs.size();
    // Right child
    newInputs.add(right);
    // If we cannot combine any of the children, we bail out
    newJoinCondition.add(join.getCondition());
    if (newJoinCondition.size() == 1) {
        return null;
    }
    final List<RelDataTypeField> systemFieldList = ImmutableList.of();
    List<List<RexNode>> joinKeyExprs = new ArrayList<List<RexNode>>();
    List<Integer> filterNulls = new ArrayList<Integer>();
    for (int i = 0; i < newInputs.size(); i++) {
        joinKeyExprs.add(new ArrayList<RexNode>());
    }
    RexNode filters;
    try {
        filters = HiveRelOptUtil.splitHiveJoinCondition(systemFieldList, newInputs, join.getCondition(), joinKeyExprs, filterNulls, null);
    } catch (CalciteSemanticException e) {
        LOG.trace("Failed to merge joins", e);
        return null;
    }
    ImmutableBitSet.Builder keysInInputsBuilder = ImmutableBitSet.builder();
    for (int i = 0; i < newInputs.size(); i++) {
        List<RexNode> partialCondition = joinKeyExprs.get(i);
        if (!partialCondition.isEmpty()) {
            keysInInputsBuilder.set(i);
        }
    }
    // If we cannot merge, we bail out
    ImmutableBitSet keysInInputs = keysInInputsBuilder.build();
    ImmutableBitSet leftReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs));
    ImmutableBitSet rightReferencedInputs = keysInInputs.intersect(ImmutableBitSet.range(numberLeftInputs, newInputs.size()));
    if (join.getJoinType() != JoinRelType.INNER && (leftReferencedInputs.cardinality() > 1 || rightReferencedInputs.cardinality() > 1)) {
        return null;
    }
    // Otherwise, we add to the join specs
    if (join.getJoinType() != JoinRelType.INNER) {
        int leftInput = keysInInputs.nextSetBit(0);
        int rightInput = keysInInputs.nextSetBit(numberLeftInputs);
        joinInputs.add(Pair.of(leftInput, rightInput));
        joinTypes.add(join.getJoinType());
        joinFilters.add(filters);
    } else {
        for (int i : leftReferencedInputs) {
            for (int j : rightReferencedInputs) {
                joinInputs.add(Pair.of(i, j));
                joinTypes.add(join.getJoinType());
                joinFilters.add(filters);
            }
        }
    }
    // We can now create a multijoin operator
    RexNode newCondition = RexUtil.flatten(rexBuilder, RexUtil.composeConjunction(rexBuilder, newJoinCondition, false));
    List<RelNode> newInputsArray = Lists.newArrayList(newInputs);
    JoinPredicateInfo joinPredInfo = null;
    try {
        joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(newInputsArray, systemFieldList, newCondition);
    } catch (CalciteSemanticException e) {
        throw new RuntimeException(e);
    }
    // If the number of joins < number of input tables-1, this is not a star join.
    if (joinPredInfo.getEquiJoinPredicateElements().size() < newInputs.size() - 1) {
        return null;
    }
    // Validate that the multi-join is a valid star join before returning it.
    for (int i = 0; i < newInputs.size(); i++) {
        List<RexNode> joinKeys = null;
        for (int j = 0; j < joinPredInfo.getEquiJoinPredicateElements().size(); j++) {
            List<RexNode> currJoinKeys = joinPredInfo.getEquiJoinPredicateElements().get(j).getJoinExprs(i);
            if (currJoinKeys.isEmpty()) {
                continue;
            }
            if (joinKeys == null) {
                joinKeys = currJoinKeys;
            } else {
                // Bail out if this is the case.
                if (!joinKeys.containsAll(currJoinKeys) || !currJoinKeys.containsAll(joinKeys)) {
                    return null;
                }
            }
        }
    }
    return new HiveMultiJoin(join.getCluster(), newInputsArray, newCondition, join.getRowType(), joinInputs, joinTypes, joinFilters, joinPredInfo);
}
Also used : HiveMultiJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) HiveJoin(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin) RexBuilder(org.apache.calcite.rex.RexBuilder) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) CalciteSemanticException(org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException) Pair(org.apache.calcite.util.Pair) JoinRelType(org.apache.calcite.rel.core.JoinRelType) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RelNode(org.apache.calcite.rel.RelNode) JoinPredicateInfo(org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil.JoinPredicateInfo) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

Pair (org.apache.calcite.util.Pair)26 RexNode (org.apache.calcite.rex.RexNode)22 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)17 RelNode (org.apache.calcite.rel.RelNode)16 ArrayList (java.util.ArrayList)14 ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)9 HashMap (java.util.HashMap)8 RexInputRef (org.apache.calcite.rex.RexInputRef)8 RexBuilder (org.apache.calcite.rex.RexBuilder)7 AggregateCall (org.apache.calcite.rel.core.AggregateCall)6 JoinRelType (org.apache.calcite.rel.core.JoinRelType)5 RelBuilder (org.apache.calcite.tools.RelBuilder)5 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 ImmutableSortedMap (com.google.common.collect.ImmutableSortedMap)4 List (java.util.List)4 Map (java.util.Map)4 NavigableMap (java.util.NavigableMap)4 SortedMap (java.util.SortedMap)4 TreeMap (java.util.TreeMap)4