use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.
the class HiveParserCalcitePlanner method genGBRelNode.
private RelNode genGBRelNode(List<ExprNodeDesc> gbExprs, List<AggInfo> aggInfos, List<Integer> groupSets, RelNode srcRel) throws SemanticException {
Map<String, Integer> colNameToPos = relToHiveColNameCalcitePosMap.get(srcRel);
HiveParserRexNodeConverter converter = new HiveParserRexNodeConverter(cluster, srcRel.getRowType(), colNameToPos, 0, false, funcConverter);
final boolean hasGroupSets = groupSets != null && !groupSets.isEmpty();
final List<RexNode> gbInputRexNodes = new ArrayList<>();
final HashMap<String, Integer> inputRexNodeToIndex = new HashMap<>();
final List<Integer> gbKeyIndices = new ArrayList<>();
int inputIndex = 0;
for (ExprNodeDesc key : gbExprs) {
// also convert null literal here to support grouping by NULLs
RexNode keyRex = convertNullLiteral(converter.convert(key)).accept(funcConverter);
gbInputRexNodes.add(keyRex);
gbKeyIndices.add(inputIndex);
inputRexNodeToIndex.put(keyRex.toString(), inputIndex);
inputIndex++;
}
final ImmutableBitSet groupSet = ImmutableBitSet.of(gbKeyIndices);
// Grouping sets: we need to transform them into ImmutableBitSet objects for Calcite
List<ImmutableBitSet> transformedGroupSets = null;
if (hasGroupSets) {
Set<ImmutableBitSet> set = new HashSet<>(groupSets.size());
for (int val : groupSets) {
set.add(convert(val, groupSet.cardinality()));
}
// Calcite expects the grouping sets sorted and without duplicates
transformedGroupSets = new ArrayList<>(set);
transformedGroupSets.sort(ImmutableBitSet.COMPARATOR);
}
// add Agg parameters to inputs
for (AggInfo aggInfo : aggInfos) {
for (ExprNodeDesc expr : aggInfo.getAggParams()) {
RexNode paramRex = converter.convert(expr).accept(funcConverter);
Integer argIndex = inputRexNodeToIndex.get(paramRex.toString());
if (argIndex == null) {
argIndex = gbInputRexNodes.size();
inputRexNodeToIndex.put(paramRex.toString(), argIndex);
gbInputRexNodes.add(paramRex);
}
}
}
// create the actual input before creating agg calls so that the calls can properly infer
// return type
RelNode gbInputRel = LogicalProject.create(srcRel, Collections.emptyList(), gbInputRexNodes, (List<String>) null);
List<AggregateCall> aggregateCalls = new ArrayList<>();
for (AggInfo aggInfo : aggInfos) {
aggregateCalls.add(HiveParserUtils.toAggCall(aggInfo, converter, inputRexNodeToIndex, groupSet.cardinality(), gbInputRel, cluster, funcConverter));
}
// GROUPING__ID is a virtual col in Hive, so we use Flink's function
if (hasGroupSets) {
// Create GroupingID column
AggregateCall aggCall = AggregateCall.create(SqlStdOperatorTable.GROUPING_ID, false, false, false, gbKeyIndices, -1, RelCollations.EMPTY, groupSet.cardinality(), gbInputRel, null, null);
aggregateCalls.add(aggCall);
}
if (gbInputRexNodes.isEmpty()) {
// This will happen for count(*), in such cases we arbitrarily pick
// first element from srcRel
gbInputRexNodes.add(cluster.getRexBuilder().makeInputRef(srcRel, 0));
}
return LogicalAggregate.create(gbInputRel, groupSet, transformedGroupSets, aggregateCalls);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.
the class FlinkAggregateJoinTransposeRule method toRegularAggregate.
/**
* Convert aggregate with AUXILIARY_GROUP to regular aggregate. Return original aggregate and
* null project if the given aggregate does not contain AUXILIARY_GROUP, else new aggregate
* without AUXILIARY_GROUP and a project to permute output columns if needed.
*/
private Pair<Aggregate, List<RexNode>> toRegularAggregate(Aggregate aggregate) {
Tuple2<int[], Seq<AggregateCall>> auxGroupAndRegularAggCalls = AggregateUtil.checkAndSplitAggCalls(aggregate);
final int[] auxGroup = auxGroupAndRegularAggCalls._1;
final Seq<AggregateCall> regularAggCalls = auxGroupAndRegularAggCalls._2;
if (auxGroup.length != 0) {
int[] fullGroupSet = AggregateUtil.checkAndGetFullGroupSet(aggregate);
ImmutableBitSet newGroupSet = ImmutableBitSet.of(fullGroupSet);
List<AggregateCall> aggCalls = JavaConverters.seqAsJavaListConverter(regularAggCalls).asJava();
final Aggregate newAgg = aggregate.copy(aggregate.getTraitSet(), aggregate.getInput(), aggregate.indicator, newGroupSet, com.google.common.collect.ImmutableList.of(newGroupSet), aggCalls);
final List<RelDataTypeField> aggFields = aggregate.getRowType().getFieldList();
final List<RexNode> projectAfterAgg = new ArrayList<>();
for (int i = 0; i < fullGroupSet.length; ++i) {
int group = fullGroupSet[i];
int index = newGroupSet.indexOf(group);
projectAfterAgg.add(new RexInputRef(index, aggFields.get(i).getType()));
}
int fieldCntOfAgg = aggFields.size();
for (int i = fullGroupSet.length; i < fieldCntOfAgg; ++i) {
projectAfterAgg.add(new RexInputRef(i, aggFields.get(i).getType()));
}
Preconditions.checkArgument(projectAfterAgg.size() == fieldCntOfAgg);
return new Pair<>(newAgg, projectAfterAgg);
} else {
return new Pair<>(aggregate, null);
}
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.
the class FlinkAggregateJoinTransposeRule method keyColumns.
/**
* Computes the closure of a set of columns according to a given list of constraints. Each 'x =
* y' constraint causes bit y to be set if bit x is set, and vice versa.
*/
private static ImmutableBitSet keyColumns(ImmutableBitSet aggregateColumns, com.google.common.collect.ImmutableList<RexNode> predicates) {
SortedMap<Integer, BitSet> equivalence = new TreeMap<>();
for (RexNode predicate : predicates) {
populateEquivalences(equivalence, predicate);
}
ImmutableBitSet keyColumns = aggregateColumns;
for (Integer aggregateColumn : aggregateColumns) {
final BitSet bitSet = equivalence.get(aggregateColumn);
if (bitSet != null) {
keyColumns = keyColumns.union(bitSet);
}
}
return keyColumns;
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.
the class FlinkAggregateExpandDistinctAggregatesRule method convertSingletonDistinct.
/**
* Converts an aggregate with one distinct aggregate and one or more non-distinct aggregates to
* multi-phase aggregates (see reference example below).
*
* @param relBuilder Contains the input relational expression
* @param aggregate Original aggregate
* @param argLists Arguments and filters to the distinct aggregate function
*/
private RelBuilder convertSingletonDistinct(RelBuilder relBuilder, Aggregate aggregate, Set<Pair<List<Integer>, Integer>> argLists) {
// In this case, we are assuming that there is a single distinct function.
// So make sure that argLists is of size one.
Preconditions.checkArgument(argLists.size() == 1);
// For example,
// SELECT deptno, COUNT(*), SUM(bonus), MIN(DISTINCT sal)
// FROM emp
// GROUP BY deptno
//
// becomes
//
// SELECT deptno, SUM(cnt), SUM(bonus), MIN(sal)
// FROM (
// SELECT deptno, COUNT(*) as cnt, SUM(bonus), sal
// FROM EMP
// GROUP BY deptno, sal) // Aggregate B
// GROUP BY deptno // Aggregate A
relBuilder.push(aggregate.getInput());
final List<AggregateCall> originalAggCalls = aggregate.getAggCallList();
final ImmutableBitSet originalGroupSet = aggregate.getGroupSet();
// Add the distinct aggregate column(s) to the group-by columns,
// if not already a part of the group-by
final SortedSet<Integer> bottomGroupSet = new TreeSet<>();
bottomGroupSet.addAll(aggregate.getGroupSet().asList());
for (AggregateCall aggCall : originalAggCalls) {
if (aggCall.isDistinct()) {
bottomGroupSet.addAll(aggCall.getArgList());
// since we only have single distinct call
break;
}
}
// Generate the intermediate aggregate B, the one on the bottom that converts
// a distinct call to group by call.
// Bottom aggregate is the same as the original aggregate, except that
// the bottom aggregate has converted the DISTINCT aggregate to a group by clause.
final List<AggregateCall> bottomAggregateCalls = new ArrayList<>();
for (AggregateCall aggCall : originalAggCalls) {
// as-is all the non-distinct aggregates
if (!aggCall.isDistinct()) {
final AggregateCall newCall = AggregateCall.create(aggCall.getAggregation(), false, aggCall.isApproximate(), false, aggCall.getArgList(), -1, RelCollations.EMPTY, ImmutableBitSet.of(bottomGroupSet).cardinality(), relBuilder.peek(), null, aggCall.name);
bottomAggregateCalls.add(newCall);
}
}
// Generate the aggregate B (see the reference example above)
relBuilder.push(aggregate.copy(aggregate.getTraitSet(), relBuilder.build(), ImmutableBitSet.of(bottomGroupSet), null, bottomAggregateCalls));
// Add aggregate A (see the reference example above), the top aggregate
// to handle the rest of the aggregation that the bottom aggregate hasn't handled
final List<AggregateCall> topAggregateCalls = com.google.common.collect.Lists.newArrayList();
// Use the remapped arguments for the (non)distinct aggregate calls
int nonDistinctAggCallProcessedSoFar = 0;
for (AggregateCall aggCall : originalAggCalls) {
final AggregateCall newCall;
if (aggCall.isDistinct()) {
List<Integer> newArgList = new ArrayList<>();
for (int arg : aggCall.getArgList()) {
newArgList.add(bottomGroupSet.headSet(arg).size());
}
newCall = AggregateCall.create(aggCall.getAggregation(), false, aggCall.isApproximate(), false, newArgList, -1, RelCollations.EMPTY, originalGroupSet.cardinality(), relBuilder.peek(), aggCall.getType(), aggCall.name);
} else {
// If aggregate B had a COUNT aggregate call the corresponding aggregate at
// aggregate A must be SUM. For other aggregates, it remains the same.
final List<Integer> newArgs = com.google.common.collect.Lists.newArrayList(bottomGroupSet.size() + nonDistinctAggCallProcessedSoFar);
if (aggCall.getAggregation().getKind() == SqlKind.COUNT) {
newCall = AggregateCall.create(new SqlSumEmptyIsZeroAggFunction(), false, aggCall.isApproximate(), false, newArgs, -1, RelCollations.EMPTY, originalGroupSet.cardinality(), relBuilder.peek(), aggCall.getType(), aggCall.getName());
} else {
newCall = AggregateCall.create(aggCall.getAggregation(), false, aggCall.isApproximate(), false, newArgs, -1, RelCollations.EMPTY, originalGroupSet.cardinality(), relBuilder.peek(), aggCall.getType(), aggCall.name);
}
nonDistinctAggCallProcessedSoFar++;
}
topAggregateCalls.add(newCall);
}
// Populate the group-by keys with the remapped arguments for aggregate A
// The top groupset is basically an identity (first X fields of aggregate B's
// output), minus the distinct aggCall's input.
final Set<Integer> topGroupSet = new HashSet<>();
int groupSetToAdd = 0;
for (int bottomGroup : bottomGroupSet) {
if (originalGroupSet.get(bottomGroup)) {
topGroupSet.add(groupSetToAdd);
}
groupSetToAdd++;
}
relBuilder.push(aggregate.copy(aggregate.getTraitSet(), relBuilder.build(), ImmutableBitSet.of(topGroupSet), null, topAggregateCalls));
return relBuilder;
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.
the class FlinkAggregateExpandDistinctAggregatesRule method onMatch.
// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
final Aggregate aggregate = call.rel(0);
if (!AggregateUtil.containsAccurateDistinctCall(aggregate.getAggCallList())) {
return;
}
// accurate distinct call.
if (AggregateUtil.containsApproximateDistinctCall(aggregate.getAggCallList())) {
throw new TableException("There are both Distinct AggCall and Approximate Distinct AggCall in one sql statement, " + "it is not supported yet.\nPlease choose one of them.");
}
// by DecomposeGroupingSetsRule. Then this rule expands it's distinct aggregates.
if (aggregate.getGroupSets().size() > 1) {
return;
}
// Find all of the agg expressions. We use a LinkedHashSet to ensure determinism.
// Find all aggregate calls without distinct
int nonDistinctAggCallCount = 0;
// Find all aggregate calls without distinct but ignore MAX, MIN, BIT_AND, BIT_OR
int nonDistinctAggCallExcludingIgnoredCount = 0;
int filterCount = 0;
int unsupportedNonDistinctAggCallCount = 0;
final Set<Pair<List<Integer>, Integer>> argLists = new LinkedHashSet<>();
for (AggregateCall aggCall : aggregate.getAggCallList()) {
if (aggCall.filterArg >= 0) {
++filterCount;
}
if (!aggCall.isDistinct()) {
++nonDistinctAggCallCount;
final SqlKind aggCallKind = aggCall.getAggregation().getKind();
// We only support COUNT/SUM/MIN/MAX for the "single" count distinct optimization
switch(aggCallKind) {
case COUNT:
case SUM:
case SUM0:
case MIN:
case MAX:
break;
default:
++unsupportedNonDistinctAggCallCount;
}
if (aggCall.getAggregation().getDistinctOptionality() == Optionality.IGNORED) {
argLists.add(Pair.of(aggCall.getArgList(), aggCall.filterArg));
} else {
++nonDistinctAggCallExcludingIgnoredCount;
}
} else {
argLists.add(Pair.of(aggCall.getArgList(), aggCall.filterArg));
}
}
final int distinctAggCallCount = aggregate.getAggCallList().size() - nonDistinctAggCallCount;
Preconditions.checkState(argLists.size() > 0, "containsDistinctCall lied");
// we can still use this promotion.
if (nonDistinctAggCallExcludingIgnoredCount == 0 && argLists.size() == 1 && aggregate.getGroupType() == Group.SIMPLE) {
final Pair<List<Integer>, Integer> pair = com.google.common.collect.Iterables.getOnlyElement(argLists);
final RelBuilder relBuilder = call.builder();
convertMonopole(relBuilder, aggregate, pair.left, pair.right);
call.transformTo(relBuilder.build());
return;
}
if (useGroupingSets) {
rewriteUsingGroupingSets(call, aggregate);
return;
}
// we can generate multi-phase aggregates
if (// one distinct aggregate
distinctAggCallCount == 1 && // no filter
filterCount == 0 && unsupportedNonDistinctAggCallCount == // sum/min/max/count in non-distinct aggregate
0 && nonDistinctAggCallCount > 0) {
// one or more non-distinct aggregates
final RelBuilder relBuilder = call.builder();
convertSingletonDistinct(relBuilder, aggregate, argLists);
call.transformTo(relBuilder.build());
return;
}
// Create a list of the expressions which will yield the final result.
// Initially, the expressions point to the input field.
final List<RelDataTypeField> aggFields = aggregate.getRowType().getFieldList();
final List<RexInputRef> refs = new ArrayList<>();
final List<String> fieldNames = aggregate.getRowType().getFieldNames();
final ImmutableBitSet groupSet = aggregate.getGroupSet();
final int groupCount = aggregate.getGroupCount();
for (int i : Util.range(groupCount)) {
refs.add(RexInputRef.of(i, aggFields));
}
// Aggregate the original relation, including any non-distinct aggregates.
final List<AggregateCall> newAggCallList = new ArrayList<>();
int i = -1;
for (AggregateCall aggCall : aggregate.getAggCallList()) {
++i;
if (aggCall.isDistinct()) {
refs.add(null);
continue;
}
refs.add(new RexInputRef(groupCount + newAggCallList.size(), aggFields.get(groupCount + i).getType()));
newAggCallList.add(aggCall);
}
// In the case where there are no non-distinct aggregates (regardless of
// whether there are group bys), there's no need to generate the
// extra aggregate and join.
final RelBuilder relBuilder = call.builder();
relBuilder.push(aggregate.getInput());
int n = 0;
if (!newAggCallList.isEmpty()) {
final RelBuilder.GroupKey groupKey = relBuilder.groupKey(groupSet, aggregate.getGroupSets());
relBuilder.aggregate(groupKey, newAggCallList);
++n;
}
// set of operands.
for (Pair<List<Integer>, Integer> argList : argLists) {
doRewrite(relBuilder, aggregate, n++, argList.left, argList.right, refs);
}
relBuilder.project(refs, fieldNames);
call.transformTo(relBuilder.build());
}
Aggregations