Search in sources :

Example 96 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveAggregateProjectMergeRule method apply.

public static RelNode apply(HiveAggregate aggregate, HiveProject project) {
    final List<Integer> newKeys = Lists.newArrayList();
    final Map<Integer, Integer> map = new HashMap<>();
    for (int key : aggregate.getGroupSet()) {
        final RexNode rex = project.getProjects().get(key);
        if (rex instanceof RexInputRef) {
            final int newKey = ((RexInputRef) rex).getIndex();
            newKeys.add(newKey);
            map.put(key, newKey);
        } else {
            // Cannot handle "GROUP BY expression"
            return null;
        }
    }
    final ImmutableBitSet newGroupSet = aggregate.getGroupSet().permute(map);
    ImmutableList<ImmutableBitSet> newGroupingSets = null;
    if (aggregate.indicator) {
        newGroupingSets = ImmutableBitSet.ORDERING.immutableSortedCopy(ImmutableBitSet.permute(aggregate.getGroupSets(), map));
    }
    final ImmutableList.Builder<AggregateCall> aggCalls = ImmutableList.builder();
    for (AggregateCall aggregateCall : aggregate.getAggCallList()) {
        final ImmutableList.Builder<Integer> newArgs = ImmutableList.builder();
        for (int arg : aggregateCall.getArgList()) {
            final RexNode rex = project.getProjects().get(arg);
            if (rex instanceof RexInputRef) {
                newArgs.add(((RexInputRef) rex).getIndex());
            } else {
                // Cannot handle "AGG(expression)"
                return null;
            }
        }
        final int newFilterArg;
        if (aggregateCall.filterArg >= 0) {
            final RexNode rex = project.getProjects().get(aggregateCall.filterArg);
            if (!(rex instanceof RexInputRef)) {
                return null;
            }
            newFilterArg = ((RexInputRef) rex).getIndex();
        } else {
            newFilterArg = -1;
        }
        aggCalls.add(aggregateCall.copy(newArgs.build(), newFilterArg));
    }
    final Aggregate newAggregate = aggregate.copy(aggregate.getTraitSet(), project.getInput(), aggregate.indicator, newGroupSet, newGroupingSets, aggCalls.build());
    // Add a project if the group set is not in the same order or
    // contains duplicates.
    RelNode rel = newAggregate;
    if (!newKeys.equals(newGroupSet.asList())) {
        final List<Integer> posList = Lists.newArrayList();
        for (int newKey : newKeys) {
            posList.add(newGroupSet.indexOf(newKey));
        }
        if (aggregate.indicator) {
            for (int newKey : newKeys) {
                posList.add(aggregate.getGroupCount() + newGroupSet.indexOf(newKey));
            }
        }
        for (int i = newAggregate.getGroupCount() + newAggregate.getIndicatorCount(); i < newAggregate.getRowType().getFieldCount(); i++) {
            posList.add(i);
        }
        rel = HiveRelOptUtil.createProject(HiveRelFactories.HIVE_BUILDER.create(aggregate.getCluster(), null), rel, posList);
    }
    return rel;
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelNode(org.apache.calcite.rel.RelNode) RexInputRef(org.apache.calcite.rex.RexInputRef) Aggregate(org.apache.calcite.rel.core.Aggregate) HiveAggregate(org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate) RexNode(org.apache.calcite.rex.RexNode)

Example 97 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveFilterJoinRule method filterRefersToBothSidesOfJoin.

private boolean filterRefersToBothSidesOfJoin(RexNode filter, Join j) {
    boolean refersToBothSides = false;
    int joinNoOfProjects = j.getRowType().getFieldCount();
    ImmutableBitSet filterProjs = ImmutableBitSet.FROM_BIT_SET.apply(new BitSet(joinNoOfProjects));
    ImmutableBitSet allLeftProjs = filterProjs.union(ImmutableBitSet.range(0, j.getInput(0).getRowType().getFieldCount()));
    ImmutableBitSet allRightProjs = filterProjs.union(ImmutableBitSet.range(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects));
    filterProjs = filterProjs.union(InputFinder.bits(filter));
    if (allLeftProjs.intersects(filterProjs) && allRightProjs.intersects(filterProjs))
        refersToBothSides = true;
    return refersToBothSides;
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) BitSet(java.util.BitSet)

Example 98 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveSubQRemoveRelBuilder method join.

/** Creates a {@link org.apache.calcite.rel.core.Join} with correlating
   * variables. */
public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition, Set<CorrelationId> variablesSet) {
    Frame right = stack.pop();
    final Frame left = stack.pop();
    final RelNode join;
    final boolean correlate = variablesSet.size() == 1;
    RexNode postCondition = literal(true);
    if (correlate) {
        final CorrelationId id = Iterables.getOnlyElement(variablesSet);
        final ImmutableBitSet requiredColumns = RelOptUtil.correlationColumns(id, right.rel);
        if (!RelOptUtil.notContainsCorrelation(left.rel, id, Litmus.IGNORE)) {
            throw new IllegalArgumentException("variable " + id + " must not be used by left input to correlation");
        }
        switch(joinType) {
            case LEFT:
                // Correlate does not have an ON clause.
                // For a LEFT correlate, predicate must be evaluated first.
                // For INNER, we can defer.
                stack.push(right);
                filter(condition.accept(new Shifter(left.rel, id, right.rel)));
                right = stack.pop();
                break;
            default:
                postCondition = condition;
        }
        join = correlateFactory.createCorrelate(left.rel, right.rel, id, requiredColumns, SemiJoinType.of(joinType));
    } else {
        join = joinFactory.createJoin(left.rel, right.rel, condition, variablesSet, joinType, false);
    }
    final List<Pair<String, RelDataType>> pairs = new ArrayList<>();
    pairs.addAll(left.right);
    pairs.addAll(right.right);
    stack.push(new Frame(join, ImmutableList.copyOf(pairs)));
    filter(postCondition);
    return this;
}
Also used : RelNode(org.apache.calcite.rel.RelNode) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) CorrelationId(org.apache.calcite.rel.core.CorrelationId) RexNode(org.apache.calcite.rex.RexNode) Pair(org.apache.calcite.util.Pair)

Example 99 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.

the class HiveOpConverter method visit.

OpAttr visit(HiveSortLimit sortRel) throws SemanticException {
    OpAttr inputOpAf = dispatch(sortRel.getInput());
    if (LOG.isDebugEnabled()) {
        LOG.debug("Translating operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " with row type: [" + sortRel.getRowType() + "]");
        if (sortRel.getCollation() == RelCollations.EMPTY) {
            LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of limit");
        } else if (sortRel.fetch == null) {
            LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort");
        } else {
            LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort+limit");
        }
    }
    Operator<?> inputOp = inputOpAf.inputs.get(0);
    Operator<?> resultOp = inputOpAf.inputs.get(0);
    // of their columns
    if (sortRel.getCollation() != RelCollations.EMPTY) {
        // In strict mode, in the presence of order by, limit must be specified.
        if (sortRel.fetch == null) {
            String error = StrictChecks.checkNoLimit(hiveConf);
            if (error != null)
                throw new SemanticException(error);
        }
        // 1.a. Extract order for each column from collation
        // Generate sortCols and order
        ImmutableBitSet.Builder sortColsPosBuilder = ImmutableBitSet.builder();
        ImmutableBitSet.Builder sortOutputColsPosBuilder = ImmutableBitSet.builder();
        Map<Integer, RexNode> obRefToCallMap = sortRel.getInputRefToCallMap();
        List<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>();
        StringBuilder order = new StringBuilder();
        StringBuilder nullOrder = new StringBuilder();
        for (RelFieldCollation sortInfo : sortRel.getCollation().getFieldCollations()) {
            int sortColumnPos = sortInfo.getFieldIndex();
            ColumnInfo columnInfo = new ColumnInfo(inputOp.getSchema().getSignature().get(sortColumnPos));
            ExprNodeColumnDesc sortColumn = new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), columnInfo.getIsVirtualCol());
            sortCols.add(sortColumn);
            if (sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING) {
                order.append("-");
            } else {
                order.append("+");
            }
            if (sortInfo.nullDirection == RelFieldCollation.NullDirection.FIRST) {
                nullOrder.append("a");
            } else if (sortInfo.nullDirection == RelFieldCollation.NullDirection.LAST) {
                nullOrder.append("z");
            } else {
                // Default
                nullOrder.append(sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING ? "z" : "a");
            }
            if (obRefToCallMap != null) {
                RexNode obExpr = obRefToCallMap.get(sortColumnPos);
                sortColsPosBuilder.set(sortColumnPos);
                if (obExpr == null) {
                    sortOutputColsPosBuilder.set(sortColumnPos);
                }
            }
        }
        // Use only 1 reducer for order by
        int numReducers = 1;
        // We keep the columns only the columns that are part of the final output
        List<String> keepColumns = new ArrayList<String>();
        final ImmutableBitSet sortColsPos = sortColsPosBuilder.build();
        final ImmutableBitSet sortOutputColsPos = sortOutputColsPosBuilder.build();
        final ArrayList<ColumnInfo> inputSchema = inputOp.getSchema().getSignature();
        for (int pos = 0; pos < inputSchema.size(); pos++) {
            if ((sortColsPos.get(pos) && sortOutputColsPos.get(pos)) || (!sortColsPos.get(pos) && !sortOutputColsPos.get(pos))) {
                keepColumns.add(inputSchema.get(pos).getInternalName());
            }
        }
        // 1.b. Generate reduce sink and project operator
        resultOp = genReduceSinkAndBacktrackSelect(resultOp, sortCols.toArray(new ExprNodeDesc[sortCols.size()]), 0, new ArrayList<ExprNodeDesc>(), order.toString(), nullOrder.toString(), numReducers, Operation.NOT_ACID, hiveConf, keepColumns);
    }
    // 2. If we need to generate limit
    if (sortRel.fetch != null) {
        int limit = RexLiteral.intValue(sortRel.fetch);
        int offset = sortRel.offset == null ? 0 : RexLiteral.intValue(sortRel.offset);
        LimitDesc limitDesc = new LimitDesc(offset, limit);
        ArrayList<ColumnInfo> cinfoLst = createColInfos(resultOp);
        resultOp = OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(cinfoLst), resultOp);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Generated " + resultOp + " with row schema: [" + resultOp.getSchema() + "]");
        }
    }
    // 3. Return result
    return inputOpAf.clone(resultOp);
}
Also used : RowSchema(org.apache.hadoop.hive.ql.exec.RowSchema) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) ColumnInfo(org.apache.hadoop.hive.ql.exec.ColumnInfo) LimitDesc(org.apache.hadoop.hive.ql.plan.LimitDesc) RelFieldCollation(org.apache.calcite.rel.RelFieldCollation) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) RexNode(org.apache.calcite.rex.RexNode)

Example 100 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.

the class FlinkAggregateExpandDistinctAggregatesRule method doRewrite.

/**
	 * Converts all distinct aggregate calls to a given set of arguments.
	 *
	 * <p>This method is called several times, one for each set of arguments.
	 * Each time it is called, it generates a JOIN to a new SELECT DISTINCT
	 * relational expression, and modifies the set of top-level calls.
	 *
	 * @param aggregate Original aggregate
	 * @param n		 Ordinal of this in a join. {@code relBuilder} contains the
	 *				  input relational expression (either the original
	 *				  aggregate, the output from the previous call to this
	 *				  method. {@code n} is 0 if we're converting the
	 *				  first distinct aggregate in a query with no non-distinct
	 *				  aggregates)
	 * @param argList   Arguments to the distinct aggregate function
	 * @param filterArg Argument that filters input to aggregate function, or -1
	 * @param refs	  Array of expressions which will be the projected by the
	 *				  result of this rule. Those relating to this arg list will
	 *				  be modified  @return Relational expression
	 */
private void doRewrite(RelBuilder relBuilder, Aggregate aggregate, int n, List<Integer> argList, int filterArg, List<RexInputRef> refs) {
    final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
    final List<RelDataTypeField> leftFields;
    if (n == 0) {
        leftFields = null;
    } else {
        leftFields = relBuilder.peek().getRowType().getFieldList();
    }
    // LogicalAggregate(
    //	 child,
    //	 {COUNT(DISTINCT 1), SUM(DISTINCT 1), SUM(2)})
    //
    // becomes
    //
    // LogicalAggregate(
    //	 LogicalJoin(
    //		 child,
    //		 LogicalAggregate(child, < all columns > {}),
    //		 INNER,
    //		 <f2 = f5>))
    //
    // E.g.
    //   SELECT deptno, SUM(DISTINCT sal), COUNT(DISTINCT gender), MAX(age)
    //   FROM Emps
    //   GROUP BY deptno
    //
    // becomes
    //
    //   SELECT e.deptno, adsal.sum_sal, adgender.count_gender, e.max_age
    //   FROM (
    //	 SELECT deptno, MAX(age) as max_age
    //	 FROM Emps GROUP BY deptno) AS e
    //   JOIN (
    //	 SELECT deptno, COUNT(gender) AS count_gender FROM (
    //	   SELECT DISTINCT deptno, gender FROM Emps) AS dgender
    //	 GROUP BY deptno) AS adgender
    //	 ON e.deptno = adgender.deptno
    //   JOIN (
    //	 SELECT deptno, SUM(sal) AS sum_sal FROM (
    //	   SELECT DISTINCT deptno, sal FROM Emps) AS dsal
    //	 GROUP BY deptno) AS adsal
    //   ON e.deptno = adsal.deptno
    //   GROUP BY e.deptno
    //
    // Note that if a query contains no non-distinct aggregates, then the
    // very first join/group by is omitted.  In the example above, if
    // MAX(age) is removed, then the sub-select of "e" is not needed, and
    // instead the two other group by's are joined to one another.
    // Project the columns of the GROUP BY plus the arguments
    // to the agg function.
    final Map<Integer, Integer> sourceOf = new HashMap<>();
    createSelectDistinct(relBuilder, aggregate, argList, filterArg, sourceOf);
    // Now compute the aggregate functions on top of the distinct dataset.
    // Each distinct agg becomes a non-distinct call to the corresponding
    // field from the right; for example,
    //   "COUNT(DISTINCT e.sal)"
    // becomes
    //   "COUNT(distinct_e.sal)".
    final List<AggregateCall> aggCallList = new ArrayList<>();
    final List<AggregateCall> aggCalls = aggregate.getAggCallList();
    final int groupAndIndicatorCount = aggregate.getGroupCount() + aggregate.getIndicatorCount();
    int i = groupAndIndicatorCount - 1;
    for (AggregateCall aggCall : aggCalls) {
        ++i;
        // COUNT(DISTINCT gender) or SUM(sal).
        if (!aggCall.isDistinct()) {
            continue;
        }
        if (!aggCall.getArgList().equals(argList)) {
            continue;
        }
        // Re-map arguments.
        final int argCount = aggCall.getArgList().size();
        final List<Integer> newArgs = new ArrayList<>(argCount);
        for (int j = 0; j < argCount; j++) {
            final Integer arg = aggCall.getArgList().get(j);
            newArgs.add(sourceOf.get(arg));
        }
        final int newFilterArg = aggCall.filterArg >= 0 ? sourceOf.get(aggCall.filterArg) : -1;
        final AggregateCall newAggCall = AggregateCall.create(aggCall.getAggregation(), false, newArgs, newFilterArg, aggCall.getType(), aggCall.getName());
        assert refs.get(i) == null;
        if (n == 0) {
            refs.set(i, new RexInputRef(groupAndIndicatorCount + aggCallList.size(), newAggCall.getType()));
        } else {
            refs.set(i, new RexInputRef(leftFields.size() + groupAndIndicatorCount + aggCallList.size(), newAggCall.getType()));
        }
        aggCallList.add(newAggCall);
    }
    final Map<Integer, Integer> map = new HashMap<>();
    for (Integer key : aggregate.getGroupSet()) {
        map.put(key, map.size());
    }
    final ImmutableBitSet newGroupSet = aggregate.getGroupSet().permute(map);
    assert newGroupSet.equals(ImmutableBitSet.range(aggregate.getGroupSet().cardinality()));
    ImmutableList<ImmutableBitSet> newGroupingSets = null;
    if (aggregate.indicator) {
        newGroupingSets = ImmutableBitSet.ORDERING.immutableSortedCopy(ImmutableBitSet.permute(aggregate.getGroupSets(), map));
    }
    relBuilder.push(aggregate.copy(aggregate.getTraitSet(), relBuilder.build(), aggregate.indicator, newGroupSet, newGroupingSets, aggCallList));
    // If there's no left child yet, no need to create the join
    if (n == 0) {
        return;
    }
    // Create the join condition. It is of the form
    //  'left.f0 = right.f0 and left.f1 = right.f1 and ...'
    // where {f0, f1, ...} are the GROUP BY fields.
    final List<RelDataTypeField> distinctFields = relBuilder.peek().getRowType().getFieldList();
    final List<RexNode> conditions = Lists.newArrayList();
    for (i = 0; i < groupAndIndicatorCount; ++i) {
        // null values form its own group
        // use "is not distinct from" so that the join condition
        // allows null values to match.
        conditions.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_DISTINCT_FROM, RexInputRef.of(i, leftFields), new RexInputRef(leftFields.size() + i, distinctFields.get(i).getType())));
    }
    // Join in the new 'select distinct' relation.
    relBuilder.join(JoinRelType.INNER, conditions);
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RexBuilder(org.apache.calcite.rex.RexBuilder) RexInputRef(org.apache.calcite.rex.RexInputRef) RexNode(org.apache.calcite.rex.RexNode)

Aggregations

ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)208 RexNode (org.apache.calcite.rex.RexNode)127 RelNode (org.apache.calcite.rel.RelNode)110 ArrayList (java.util.ArrayList)101 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)66 RexBuilder (org.apache.calcite.rex.RexBuilder)60 AggregateCall (org.apache.calcite.rel.core.AggregateCall)55 RexInputRef (org.apache.calcite.rex.RexInputRef)45 RelDataType (org.apache.calcite.rel.type.RelDataType)39 HashMap (java.util.HashMap)36 RelBuilder (org.apache.calcite.tools.RelBuilder)36 RelMetadataQuery (org.apache.calcite.rel.metadata.RelMetadataQuery)30 Mapping (org.apache.calcite.util.mapping.Mapping)30 Pair (org.apache.calcite.util.Pair)29 Aggregate (org.apache.calcite.rel.core.Aggregate)27 ImmutableList (com.google.common.collect.ImmutableList)23 LinkedHashSet (java.util.LinkedHashSet)23 List (java.util.List)22 HashSet (java.util.HashSet)20 RelOptUtil (org.apache.calcite.plan.RelOptUtil)18