Search in sources :

Example 61 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.

the class HiveParserCalcitePlanner method genGBRelNode.

private RelNode genGBRelNode(List<ExprNodeDesc> gbExprs, List<AggInfo> aggInfos, List<Integer> groupSets, RelNode srcRel) throws SemanticException {
    Map<String, Integer> colNameToPos = relToHiveColNameCalcitePosMap.get(srcRel);
    HiveParserRexNodeConverter converter = new HiveParserRexNodeConverter(cluster, srcRel.getRowType(), colNameToPos, 0, false, funcConverter);
    final boolean hasGroupSets = groupSets != null && !groupSets.isEmpty();
    final List<RexNode> gbInputRexNodes = new ArrayList<>();
    final HashMap<String, Integer> inputRexNodeToIndex = new HashMap<>();
    final List<Integer> gbKeyIndices = new ArrayList<>();
    int inputIndex = 0;
    for (ExprNodeDesc key : gbExprs) {
        // also convert null literal here to support grouping by NULLs
        RexNode keyRex = convertNullLiteral(converter.convert(key)).accept(funcConverter);
        gbInputRexNodes.add(keyRex);
        gbKeyIndices.add(inputIndex);
        inputRexNodeToIndex.put(keyRex.toString(), inputIndex);
        inputIndex++;
    }
    final ImmutableBitSet groupSet = ImmutableBitSet.of(gbKeyIndices);
    // Grouping sets: we need to transform them into ImmutableBitSet objects for Calcite
    List<ImmutableBitSet> transformedGroupSets = null;
    if (hasGroupSets) {
        Set<ImmutableBitSet> set = new HashSet<>(groupSets.size());
        for (int val : groupSets) {
            set.add(convert(val, groupSet.cardinality()));
        }
        // Calcite expects the grouping sets sorted and without duplicates
        transformedGroupSets = new ArrayList<>(set);
        transformedGroupSets.sort(ImmutableBitSet.COMPARATOR);
    }
    // add Agg parameters to inputs
    for (AggInfo aggInfo : aggInfos) {
        for (ExprNodeDesc expr : aggInfo.getAggParams()) {
            RexNode paramRex = converter.convert(expr).accept(funcConverter);
            Integer argIndex = inputRexNodeToIndex.get(paramRex.toString());
            if (argIndex == null) {
                argIndex = gbInputRexNodes.size();
                inputRexNodeToIndex.put(paramRex.toString(), argIndex);
                gbInputRexNodes.add(paramRex);
            }
        }
    }
    // create the actual input before creating agg calls so that the calls can properly infer
    // return type
    RelNode gbInputRel = LogicalProject.create(srcRel, Collections.emptyList(), gbInputRexNodes, (List<String>) null);
    List<AggregateCall> aggregateCalls = new ArrayList<>();
    for (AggInfo aggInfo : aggInfos) {
        aggregateCalls.add(HiveParserUtils.toAggCall(aggInfo, converter, inputRexNodeToIndex, groupSet.cardinality(), gbInputRel, cluster, funcConverter));
    }
    // GROUPING__ID is a virtual col in Hive, so we use Flink's function
    if (hasGroupSets) {
        // Create GroupingID column
        AggregateCall aggCall = AggregateCall.create(SqlStdOperatorTable.GROUPING_ID, false, false, false, gbKeyIndices, -1, RelCollations.EMPTY, groupSet.cardinality(), gbInputRel, null, null);
        aggregateCalls.add(aggCall);
    }
    if (gbInputRexNodes.isEmpty()) {
        // This will happen for count(*), in such cases we arbitrarily pick
        // first element from srcRel
        gbInputRexNodes.add(cluster.getRexBuilder().makeInputRef(srcRel, 0));
    }
    return LogicalAggregate.create(gbInputRel, groupSet, transformedGroupSets, aggregateCalls);
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HiveParserBaseSemanticAnalyzer.getHiveAggInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.getHiveAggInfo) AggInfo(org.apache.flink.table.planner.delegation.hive.copy.HiveParserBaseSemanticAnalyzer.AggInfo) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelNode(org.apache.calcite.rel.RelNode) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) RexNode(org.apache.calcite.rex.RexNode) HashSet(java.util.HashSet)

Example 62 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.

the class FlinkAggregateJoinTransposeRule method toRegularAggregate.

/**
 * Convert aggregate with AUXILIARY_GROUP to regular aggregate. Return original aggregate and
 * null project if the given aggregate does not contain AUXILIARY_GROUP, else new aggregate
 * without AUXILIARY_GROUP and a project to permute output columns if needed.
 */
private Pair<Aggregate, List<RexNode>> toRegularAggregate(Aggregate aggregate) {
    Tuple2<int[], Seq<AggregateCall>> auxGroupAndRegularAggCalls = AggregateUtil.checkAndSplitAggCalls(aggregate);
    final int[] auxGroup = auxGroupAndRegularAggCalls._1;
    final Seq<AggregateCall> regularAggCalls = auxGroupAndRegularAggCalls._2;
    if (auxGroup.length != 0) {
        int[] fullGroupSet = AggregateUtil.checkAndGetFullGroupSet(aggregate);
        ImmutableBitSet newGroupSet = ImmutableBitSet.of(fullGroupSet);
        List<AggregateCall> aggCalls = JavaConverters.seqAsJavaListConverter(regularAggCalls).asJava();
        final Aggregate newAgg = aggregate.copy(aggregate.getTraitSet(), aggregate.getInput(), aggregate.indicator, newGroupSet, com.google.common.collect.ImmutableList.of(newGroupSet), aggCalls);
        final List<RelDataTypeField> aggFields = aggregate.getRowType().getFieldList();
        final List<RexNode> projectAfterAgg = new ArrayList<>();
        for (int i = 0; i < fullGroupSet.length; ++i) {
            int group = fullGroupSet[i];
            int index = newGroupSet.indexOf(group);
            projectAfterAgg.add(new RexInputRef(index, aggFields.get(i).getType()));
        }
        int fieldCntOfAgg = aggFields.size();
        for (int i = fullGroupSet.length; i < fieldCntOfAgg; ++i) {
            projectAfterAgg.add(new RexInputRef(i, aggFields.get(i).getType()));
        }
        Preconditions.checkArgument(projectAfterAgg.size() == fieldCntOfAgg);
        return new Pair<>(newAgg, projectAfterAgg);
    } else {
        return new Pair<>(aggregate, null);
    }
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RexInputRef(org.apache.calcite.rex.RexInputRef) Aggregate(org.apache.calcite.rel.core.Aggregate) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) Seq(scala.collection.Seq) RexNode(org.apache.calcite.rex.RexNode) Pair(org.apache.calcite.util.Pair)

Example 63 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.

the class FlinkAggregateJoinTransposeRule method keyColumns.

/**
 * Computes the closure of a set of columns according to a given list of constraints. Each 'x =
 * y' constraint causes bit y to be set if bit x is set, and vice versa.
 */
private static ImmutableBitSet keyColumns(ImmutableBitSet aggregateColumns, com.google.common.collect.ImmutableList<RexNode> predicates) {
    SortedMap<Integer, BitSet> equivalence = new TreeMap<>();
    for (RexNode predicate : predicates) {
        populateEquivalences(equivalence, predicate);
    }
    ImmutableBitSet keyColumns = aggregateColumns;
    for (Integer aggregateColumn : aggregateColumns) {
        final BitSet bitSet = equivalence.get(aggregateColumn);
        if (bitSet != null) {
            keyColumns = keyColumns.union(bitSet);
        }
    }
    return keyColumns;
}
Also used : ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) BitSet(java.util.BitSet) TreeMap(java.util.TreeMap) RexNode(org.apache.calcite.rex.RexNode)

Example 64 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.

the class FlinkAggregateExpandDistinctAggregatesRule method convertSingletonDistinct.

/**
 * Converts an aggregate with one distinct aggregate and one or more non-distinct aggregates to
 * multi-phase aggregates (see reference example below).
 *
 * @param relBuilder Contains the input relational expression
 * @param aggregate Original aggregate
 * @param argLists Arguments and filters to the distinct aggregate function
 */
private RelBuilder convertSingletonDistinct(RelBuilder relBuilder, Aggregate aggregate, Set<Pair<List<Integer>, Integer>> argLists) {
    // In this case, we are assuming that there is a single distinct function.
    // So make sure that argLists is of size one.
    Preconditions.checkArgument(argLists.size() == 1);
    // For example,
    // SELECT deptno, COUNT(*), SUM(bonus), MIN(DISTINCT sal)
    // FROM emp
    // GROUP BY deptno
    // 
    // becomes
    // 
    // SELECT deptno, SUM(cnt), SUM(bonus), MIN(sal)
    // FROM (
    // SELECT deptno, COUNT(*) as cnt, SUM(bonus), sal
    // FROM EMP
    // GROUP BY deptno, sal)            // Aggregate B
    // GROUP BY deptno                        // Aggregate A
    relBuilder.push(aggregate.getInput());
    final List<AggregateCall> originalAggCalls = aggregate.getAggCallList();
    final ImmutableBitSet originalGroupSet = aggregate.getGroupSet();
    // Add the distinct aggregate column(s) to the group-by columns,
    // if not already a part of the group-by
    final SortedSet<Integer> bottomGroupSet = new TreeSet<>();
    bottomGroupSet.addAll(aggregate.getGroupSet().asList());
    for (AggregateCall aggCall : originalAggCalls) {
        if (aggCall.isDistinct()) {
            bottomGroupSet.addAll(aggCall.getArgList());
            // since we only have single distinct call
            break;
        }
    }
    // Generate the intermediate aggregate B, the one on the bottom that converts
    // a distinct call to group by call.
    // Bottom aggregate is the same as the original aggregate, except that
    // the bottom aggregate has converted the DISTINCT aggregate to a group by clause.
    final List<AggregateCall> bottomAggregateCalls = new ArrayList<>();
    for (AggregateCall aggCall : originalAggCalls) {
        // as-is all the non-distinct aggregates
        if (!aggCall.isDistinct()) {
            final AggregateCall newCall = AggregateCall.create(aggCall.getAggregation(), false, aggCall.isApproximate(), false, aggCall.getArgList(), -1, RelCollations.EMPTY, ImmutableBitSet.of(bottomGroupSet).cardinality(), relBuilder.peek(), null, aggCall.name);
            bottomAggregateCalls.add(newCall);
        }
    }
    // Generate the aggregate B (see the reference example above)
    relBuilder.push(aggregate.copy(aggregate.getTraitSet(), relBuilder.build(), ImmutableBitSet.of(bottomGroupSet), null, bottomAggregateCalls));
    // Add aggregate A (see the reference example above), the top aggregate
    // to handle the rest of the aggregation that the bottom aggregate hasn't handled
    final List<AggregateCall> topAggregateCalls = com.google.common.collect.Lists.newArrayList();
    // Use the remapped arguments for the (non)distinct aggregate calls
    int nonDistinctAggCallProcessedSoFar = 0;
    for (AggregateCall aggCall : originalAggCalls) {
        final AggregateCall newCall;
        if (aggCall.isDistinct()) {
            List<Integer> newArgList = new ArrayList<>();
            for (int arg : aggCall.getArgList()) {
                newArgList.add(bottomGroupSet.headSet(arg).size());
            }
            newCall = AggregateCall.create(aggCall.getAggregation(), false, aggCall.isApproximate(), false, newArgList, -1, RelCollations.EMPTY, originalGroupSet.cardinality(), relBuilder.peek(), aggCall.getType(), aggCall.name);
        } else {
            // If aggregate B had a COUNT aggregate call the corresponding aggregate at
            // aggregate A must be SUM. For other aggregates, it remains the same.
            final List<Integer> newArgs = com.google.common.collect.Lists.newArrayList(bottomGroupSet.size() + nonDistinctAggCallProcessedSoFar);
            if (aggCall.getAggregation().getKind() == SqlKind.COUNT) {
                newCall = AggregateCall.create(new SqlSumEmptyIsZeroAggFunction(), false, aggCall.isApproximate(), false, newArgs, -1, RelCollations.EMPTY, originalGroupSet.cardinality(), relBuilder.peek(), aggCall.getType(), aggCall.getName());
            } else {
                newCall = AggregateCall.create(aggCall.getAggregation(), false, aggCall.isApproximate(), false, newArgs, -1, RelCollations.EMPTY, originalGroupSet.cardinality(), relBuilder.peek(), aggCall.getType(), aggCall.name);
            }
            nonDistinctAggCallProcessedSoFar++;
        }
        topAggregateCalls.add(newCall);
    }
    // Populate the group-by keys with the remapped arguments for aggregate A
    // The top groupset is basically an identity (first X fields of aggregate B's
    // output), minus the distinct aggCall's input.
    final Set<Integer> topGroupSet = new HashSet<>();
    int groupSetToAdd = 0;
    for (int bottomGroup : bottomGroupSet) {
        if (originalGroupSet.get(bottomGroup)) {
            topGroupSet.add(groupSetToAdd);
        }
        groupSetToAdd++;
    }
    relBuilder.push(aggregate.copy(aggregate.getTraitSet(), relBuilder.build(), ImmutableBitSet.of(topGroupSet), null, topAggregateCalls));
    return relBuilder;
}
Also used : AggregateCall(org.apache.calcite.rel.core.AggregateCall) SqlSumEmptyIsZeroAggFunction(org.apache.calcite.sql.fun.SqlSumEmptyIsZeroAggFunction) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 65 with ImmutableBitSet

use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.

the class FlinkAggregateExpandDistinctAggregatesRule method onMatch.

// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
    final Aggregate aggregate = call.rel(0);
    if (!AggregateUtil.containsAccurateDistinctCall(aggregate.getAggCallList())) {
        return;
    }
    // accurate distinct call.
    if (AggregateUtil.containsApproximateDistinctCall(aggregate.getAggCallList())) {
        throw new TableException("There are both Distinct AggCall and Approximate Distinct AggCall in one sql statement, " + "it is not supported yet.\nPlease choose one of them.");
    }
    // by DecomposeGroupingSetsRule. Then this rule expands it's distinct aggregates.
    if (aggregate.getGroupSets().size() > 1) {
        return;
    }
    // Find all of the agg expressions. We use a LinkedHashSet to ensure determinism.
    // Find all aggregate calls without distinct
    int nonDistinctAggCallCount = 0;
    // Find all aggregate calls without distinct but ignore MAX, MIN, BIT_AND, BIT_OR
    int nonDistinctAggCallExcludingIgnoredCount = 0;
    int filterCount = 0;
    int unsupportedNonDistinctAggCallCount = 0;
    final Set<Pair<List<Integer>, Integer>> argLists = new LinkedHashSet<>();
    for (AggregateCall aggCall : aggregate.getAggCallList()) {
        if (aggCall.filterArg >= 0) {
            ++filterCount;
        }
        if (!aggCall.isDistinct()) {
            ++nonDistinctAggCallCount;
            final SqlKind aggCallKind = aggCall.getAggregation().getKind();
            // We only support COUNT/SUM/MIN/MAX for the "single" count distinct optimization
            switch(aggCallKind) {
                case COUNT:
                case SUM:
                case SUM0:
                case MIN:
                case MAX:
                    break;
                default:
                    ++unsupportedNonDistinctAggCallCount;
            }
            if (aggCall.getAggregation().getDistinctOptionality() == Optionality.IGNORED) {
                argLists.add(Pair.of(aggCall.getArgList(), aggCall.filterArg));
            } else {
                ++nonDistinctAggCallExcludingIgnoredCount;
            }
        } else {
            argLists.add(Pair.of(aggCall.getArgList(), aggCall.filterArg));
        }
    }
    final int distinctAggCallCount = aggregate.getAggCallList().size() - nonDistinctAggCallCount;
    Preconditions.checkState(argLists.size() > 0, "containsDistinctCall lied");
    // we can still use this promotion.
    if (nonDistinctAggCallExcludingIgnoredCount == 0 && argLists.size() == 1 && aggregate.getGroupType() == Group.SIMPLE) {
        final Pair<List<Integer>, Integer> pair = com.google.common.collect.Iterables.getOnlyElement(argLists);
        final RelBuilder relBuilder = call.builder();
        convertMonopole(relBuilder, aggregate, pair.left, pair.right);
        call.transformTo(relBuilder.build());
        return;
    }
    if (useGroupingSets) {
        rewriteUsingGroupingSets(call, aggregate);
        return;
    }
    // we can generate multi-phase aggregates
    if (// one distinct aggregate
    distinctAggCallCount == 1 && // no filter
    filterCount == 0 && unsupportedNonDistinctAggCallCount == // sum/min/max/count in non-distinct aggregate
    0 && nonDistinctAggCallCount > 0) {
        // one or more non-distinct aggregates
        final RelBuilder relBuilder = call.builder();
        convertSingletonDistinct(relBuilder, aggregate, argLists);
        call.transformTo(relBuilder.build());
        return;
    }
    // Create a list of the expressions which will yield the final result.
    // Initially, the expressions point to the input field.
    final List<RelDataTypeField> aggFields = aggregate.getRowType().getFieldList();
    final List<RexInputRef> refs = new ArrayList<>();
    final List<String> fieldNames = aggregate.getRowType().getFieldNames();
    final ImmutableBitSet groupSet = aggregate.getGroupSet();
    final int groupCount = aggregate.getGroupCount();
    for (int i : Util.range(groupCount)) {
        refs.add(RexInputRef.of(i, aggFields));
    }
    // Aggregate the original relation, including any non-distinct aggregates.
    final List<AggregateCall> newAggCallList = new ArrayList<>();
    int i = -1;
    for (AggregateCall aggCall : aggregate.getAggCallList()) {
        ++i;
        if (aggCall.isDistinct()) {
            refs.add(null);
            continue;
        }
        refs.add(new RexInputRef(groupCount + newAggCallList.size(), aggFields.get(groupCount + i).getType()));
        newAggCallList.add(aggCall);
    }
    // In the case where there are no non-distinct aggregates (regardless of
    // whether there are group bys), there's no need to generate the
    // extra aggregate and join.
    final RelBuilder relBuilder = call.builder();
    relBuilder.push(aggregate.getInput());
    int n = 0;
    if (!newAggCallList.isEmpty()) {
        final RelBuilder.GroupKey groupKey = relBuilder.groupKey(groupSet, aggregate.getGroupSets());
        relBuilder.aggregate(groupKey, newAggCallList);
        ++n;
    }
    // set of operands.
    for (Pair<List<Integer>, Integer> argList : argLists) {
        doRewrite(relBuilder, aggregate, n++, argList.left, argList.right, refs);
    }
    relBuilder.project(refs, fieldNames);
    call.transformTo(relBuilder.build());
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TableException(org.apache.flink.table.api.TableException) RelBuilder(org.apache.calcite.tools.RelBuilder) ImmutableBitSet(org.apache.calcite.util.ImmutableBitSet) ArrayList(java.util.ArrayList) SqlKind(org.apache.calcite.sql.SqlKind) AggregateCall(org.apache.calcite.rel.core.AggregateCall) RelDataTypeField(org.apache.calcite.rel.type.RelDataTypeField) RexInputRef(org.apache.calcite.rex.RexInputRef) ArrayList(java.util.ArrayList) ImmutableIntList(org.apache.calcite.util.ImmutableIntList) List(java.util.List) Aggregate(org.apache.calcite.rel.core.Aggregate) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) Pair(org.apache.calcite.util.Pair)

Aggregations

ImmutableBitSet (org.apache.calcite.util.ImmutableBitSet)208 RexNode (org.apache.calcite.rex.RexNode)127 RelNode (org.apache.calcite.rel.RelNode)110 ArrayList (java.util.ArrayList)101 RelDataTypeField (org.apache.calcite.rel.type.RelDataTypeField)66 RexBuilder (org.apache.calcite.rex.RexBuilder)60 AggregateCall (org.apache.calcite.rel.core.AggregateCall)55 RexInputRef (org.apache.calcite.rex.RexInputRef)45 RelDataType (org.apache.calcite.rel.type.RelDataType)39 HashMap (java.util.HashMap)36 RelBuilder (org.apache.calcite.tools.RelBuilder)36 RelMetadataQuery (org.apache.calcite.rel.metadata.RelMetadataQuery)30 Mapping (org.apache.calcite.util.mapping.Mapping)30 Pair (org.apache.calcite.util.Pair)29 Aggregate (org.apache.calcite.rel.core.Aggregate)27 ImmutableList (com.google.common.collect.ImmutableList)23 LinkedHashSet (java.util.LinkedHashSet)23 List (java.util.List)22 HashSet (java.util.HashSet)20 RelOptUtil (org.apache.calcite.plan.RelOptUtil)18