use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.
the class HiveAggregateProjectMergeRule method apply.
public static RelNode apply(HiveAggregate aggregate, HiveProject project) {
final List<Integer> newKeys = Lists.newArrayList();
final Map<Integer, Integer> map = new HashMap<>();
for (int key : aggregate.getGroupSet()) {
final RexNode rex = project.getProjects().get(key);
if (rex instanceof RexInputRef) {
final int newKey = ((RexInputRef) rex).getIndex();
newKeys.add(newKey);
map.put(key, newKey);
} else {
// Cannot handle "GROUP BY expression"
return null;
}
}
final ImmutableBitSet newGroupSet = aggregate.getGroupSet().permute(map);
ImmutableList<ImmutableBitSet> newGroupingSets = null;
if (aggregate.indicator) {
newGroupingSets = ImmutableBitSet.ORDERING.immutableSortedCopy(ImmutableBitSet.permute(aggregate.getGroupSets(), map));
}
final ImmutableList.Builder<AggregateCall> aggCalls = ImmutableList.builder();
for (AggregateCall aggregateCall : aggregate.getAggCallList()) {
final ImmutableList.Builder<Integer> newArgs = ImmutableList.builder();
for (int arg : aggregateCall.getArgList()) {
final RexNode rex = project.getProjects().get(arg);
if (rex instanceof RexInputRef) {
newArgs.add(((RexInputRef) rex).getIndex());
} else {
// Cannot handle "AGG(expression)"
return null;
}
}
final int newFilterArg;
if (aggregateCall.filterArg >= 0) {
final RexNode rex = project.getProjects().get(aggregateCall.filterArg);
if (!(rex instanceof RexInputRef)) {
return null;
}
newFilterArg = ((RexInputRef) rex).getIndex();
} else {
newFilterArg = -1;
}
aggCalls.add(aggregateCall.copy(newArgs.build(), newFilterArg));
}
final Aggregate newAggregate = aggregate.copy(aggregate.getTraitSet(), project.getInput(), aggregate.indicator, newGroupSet, newGroupingSets, aggCalls.build());
// Add a project if the group set is not in the same order or
// contains duplicates.
RelNode rel = newAggregate;
if (!newKeys.equals(newGroupSet.asList())) {
final List<Integer> posList = Lists.newArrayList();
for (int newKey : newKeys) {
posList.add(newGroupSet.indexOf(newKey));
}
if (aggregate.indicator) {
for (int newKey : newKeys) {
posList.add(aggregate.getGroupCount() + newGroupSet.indexOf(newKey));
}
}
for (int i = newAggregate.getGroupCount() + newAggregate.getIndicatorCount(); i < newAggregate.getRowType().getFieldCount(); i++) {
posList.add(i);
}
rel = HiveRelOptUtil.createProject(HiveRelFactories.HIVE_BUILDER.create(aggregate.getCluster(), null), rel, posList);
}
return rel;
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.
the class HiveFilterJoinRule method filterRefersToBothSidesOfJoin.
private boolean filterRefersToBothSidesOfJoin(RexNode filter, Join j) {
boolean refersToBothSides = false;
int joinNoOfProjects = j.getRowType().getFieldCount();
ImmutableBitSet filterProjs = ImmutableBitSet.FROM_BIT_SET.apply(new BitSet(joinNoOfProjects));
ImmutableBitSet allLeftProjs = filterProjs.union(ImmutableBitSet.range(0, j.getInput(0).getRowType().getFieldCount()));
ImmutableBitSet allRightProjs = filterProjs.union(ImmutableBitSet.range(j.getInput(0).getRowType().getFieldCount(), joinNoOfProjects));
filterProjs = filterProjs.union(InputFinder.bits(filter));
if (allLeftProjs.intersects(filterProjs) && allRightProjs.intersects(filterProjs))
refersToBothSides = true;
return refersToBothSides;
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.
the class HiveSubQRemoveRelBuilder method join.
/** Creates a {@link org.apache.calcite.rel.core.Join} with correlating
* variables. */
public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition, Set<CorrelationId> variablesSet) {
Frame right = stack.pop();
final Frame left = stack.pop();
final RelNode join;
final boolean correlate = variablesSet.size() == 1;
RexNode postCondition = literal(true);
if (correlate) {
final CorrelationId id = Iterables.getOnlyElement(variablesSet);
final ImmutableBitSet requiredColumns = RelOptUtil.correlationColumns(id, right.rel);
if (!RelOptUtil.notContainsCorrelation(left.rel, id, Litmus.IGNORE)) {
throw new IllegalArgumentException("variable " + id + " must not be used by left input to correlation");
}
switch(joinType) {
case LEFT:
// Correlate does not have an ON clause.
// For a LEFT correlate, predicate must be evaluated first.
// For INNER, we can defer.
stack.push(right);
filter(condition.accept(new Shifter(left.rel, id, right.rel)));
right = stack.pop();
break;
default:
postCondition = condition;
}
join = correlateFactory.createCorrelate(left.rel, right.rel, id, requiredColumns, SemiJoinType.of(joinType));
} else {
join = joinFactory.createJoin(left.rel, right.rel, condition, variablesSet, joinType, false);
}
final List<Pair<String, RelDataType>> pairs = new ArrayList<>();
pairs.addAll(left.right);
pairs.addAll(right.right);
stack.push(new Frame(join, ImmutableList.copyOf(pairs)));
filter(postCondition);
return this;
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project hive by apache.
the class HiveOpConverter method visit.
OpAttr visit(HiveSortLimit sortRel) throws SemanticException {
OpAttr inputOpAf = dispatch(sortRel.getInput());
if (LOG.isDebugEnabled()) {
LOG.debug("Translating operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " with row type: [" + sortRel.getRowType() + "]");
if (sortRel.getCollation() == RelCollations.EMPTY) {
LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of limit");
} else if (sortRel.fetch == null) {
LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort");
} else {
LOG.debug("Operator rel#" + sortRel.getId() + ":" + sortRel.getRelTypeName() + " consists of sort+limit");
}
}
Operator<?> inputOp = inputOpAf.inputs.get(0);
Operator<?> resultOp = inputOpAf.inputs.get(0);
// of their columns
if (sortRel.getCollation() != RelCollations.EMPTY) {
// In strict mode, in the presence of order by, limit must be specified.
if (sortRel.fetch == null) {
String error = StrictChecks.checkNoLimit(hiveConf);
if (error != null)
throw new SemanticException(error);
}
// 1.a. Extract order for each column from collation
// Generate sortCols and order
ImmutableBitSet.Builder sortColsPosBuilder = ImmutableBitSet.builder();
ImmutableBitSet.Builder sortOutputColsPosBuilder = ImmutableBitSet.builder();
Map<Integer, RexNode> obRefToCallMap = sortRel.getInputRefToCallMap();
List<ExprNodeDesc> sortCols = new ArrayList<ExprNodeDesc>();
StringBuilder order = new StringBuilder();
StringBuilder nullOrder = new StringBuilder();
for (RelFieldCollation sortInfo : sortRel.getCollation().getFieldCollations()) {
int sortColumnPos = sortInfo.getFieldIndex();
ColumnInfo columnInfo = new ColumnInfo(inputOp.getSchema().getSignature().get(sortColumnPos));
ExprNodeColumnDesc sortColumn = new ExprNodeColumnDesc(columnInfo.getType(), columnInfo.getInternalName(), columnInfo.getTabAlias(), columnInfo.getIsVirtualCol());
sortCols.add(sortColumn);
if (sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING) {
order.append("-");
} else {
order.append("+");
}
if (sortInfo.nullDirection == RelFieldCollation.NullDirection.FIRST) {
nullOrder.append("a");
} else if (sortInfo.nullDirection == RelFieldCollation.NullDirection.LAST) {
nullOrder.append("z");
} else {
// Default
nullOrder.append(sortInfo.getDirection() == RelFieldCollation.Direction.DESCENDING ? "z" : "a");
}
if (obRefToCallMap != null) {
RexNode obExpr = obRefToCallMap.get(sortColumnPos);
sortColsPosBuilder.set(sortColumnPos);
if (obExpr == null) {
sortOutputColsPosBuilder.set(sortColumnPos);
}
}
}
// Use only 1 reducer for order by
int numReducers = 1;
// We keep the columns only the columns that are part of the final output
List<String> keepColumns = new ArrayList<String>();
final ImmutableBitSet sortColsPos = sortColsPosBuilder.build();
final ImmutableBitSet sortOutputColsPos = sortOutputColsPosBuilder.build();
final ArrayList<ColumnInfo> inputSchema = inputOp.getSchema().getSignature();
for (int pos = 0; pos < inputSchema.size(); pos++) {
if ((sortColsPos.get(pos) && sortOutputColsPos.get(pos)) || (!sortColsPos.get(pos) && !sortOutputColsPos.get(pos))) {
keepColumns.add(inputSchema.get(pos).getInternalName());
}
}
// 1.b. Generate reduce sink and project operator
resultOp = genReduceSinkAndBacktrackSelect(resultOp, sortCols.toArray(new ExprNodeDesc[sortCols.size()]), 0, new ArrayList<ExprNodeDesc>(), order.toString(), nullOrder.toString(), numReducers, Operation.NOT_ACID, hiveConf, keepColumns);
}
// 2. If we need to generate limit
if (sortRel.fetch != null) {
int limit = RexLiteral.intValue(sortRel.fetch);
int offset = sortRel.offset == null ? 0 : RexLiteral.intValue(sortRel.offset);
LimitDesc limitDesc = new LimitDesc(offset, limit);
ArrayList<ColumnInfo> cinfoLst = createColInfos(resultOp);
resultOp = OperatorFactory.getAndMakeChild(limitDesc, new RowSchema(cinfoLst), resultOp);
if (LOG.isDebugEnabled()) {
LOG.debug("Generated " + resultOp + " with row schema: [" + resultOp.getSchema() + "]");
}
}
// 3. Return result
return inputOpAf.clone(resultOp);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.util.ImmutableBitSet in project flink by apache.
the class FlinkAggregateExpandDistinctAggregatesRule method doRewrite.
/**
* Converts all distinct aggregate calls to a given set of arguments.
*
* <p>This method is called several times, one for each set of arguments.
* Each time it is called, it generates a JOIN to a new SELECT DISTINCT
* relational expression, and modifies the set of top-level calls.
*
* @param aggregate Original aggregate
* @param n Ordinal of this in a join. {@code relBuilder} contains the
* input relational expression (either the original
* aggregate, the output from the previous call to this
* method. {@code n} is 0 if we're converting the
* first distinct aggregate in a query with no non-distinct
* aggregates)
* @param argList Arguments to the distinct aggregate function
* @param filterArg Argument that filters input to aggregate function, or -1
* @param refs Array of expressions which will be the projected by the
* result of this rule. Those relating to this arg list will
* be modified @return Relational expression
*/
private void doRewrite(RelBuilder relBuilder, Aggregate aggregate, int n, List<Integer> argList, int filterArg, List<RexInputRef> refs) {
final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
final List<RelDataTypeField> leftFields;
if (n == 0) {
leftFields = null;
} else {
leftFields = relBuilder.peek().getRowType().getFieldList();
}
// LogicalAggregate(
// child,
// {COUNT(DISTINCT 1), SUM(DISTINCT 1), SUM(2)})
//
// becomes
//
// LogicalAggregate(
// LogicalJoin(
// child,
// LogicalAggregate(child, < all columns > {}),
// INNER,
// <f2 = f5>))
//
// E.g.
// SELECT deptno, SUM(DISTINCT sal), COUNT(DISTINCT gender), MAX(age)
// FROM Emps
// GROUP BY deptno
//
// becomes
//
// SELECT e.deptno, adsal.sum_sal, adgender.count_gender, e.max_age
// FROM (
// SELECT deptno, MAX(age) as max_age
// FROM Emps GROUP BY deptno) AS e
// JOIN (
// SELECT deptno, COUNT(gender) AS count_gender FROM (
// SELECT DISTINCT deptno, gender FROM Emps) AS dgender
// GROUP BY deptno) AS adgender
// ON e.deptno = adgender.deptno
// JOIN (
// SELECT deptno, SUM(sal) AS sum_sal FROM (
// SELECT DISTINCT deptno, sal FROM Emps) AS dsal
// GROUP BY deptno) AS adsal
// ON e.deptno = adsal.deptno
// GROUP BY e.deptno
//
// Note that if a query contains no non-distinct aggregates, then the
// very first join/group by is omitted. In the example above, if
// MAX(age) is removed, then the sub-select of "e" is not needed, and
// instead the two other group by's are joined to one another.
// Project the columns of the GROUP BY plus the arguments
// to the agg function.
final Map<Integer, Integer> sourceOf = new HashMap<>();
createSelectDistinct(relBuilder, aggregate, argList, filterArg, sourceOf);
// Now compute the aggregate functions on top of the distinct dataset.
// Each distinct agg becomes a non-distinct call to the corresponding
// field from the right; for example,
// "COUNT(DISTINCT e.sal)"
// becomes
// "COUNT(distinct_e.sal)".
final List<AggregateCall> aggCallList = new ArrayList<>();
final List<AggregateCall> aggCalls = aggregate.getAggCallList();
final int groupAndIndicatorCount = aggregate.getGroupCount() + aggregate.getIndicatorCount();
int i = groupAndIndicatorCount - 1;
for (AggregateCall aggCall : aggCalls) {
++i;
// COUNT(DISTINCT gender) or SUM(sal).
if (!aggCall.isDistinct()) {
continue;
}
if (!aggCall.getArgList().equals(argList)) {
continue;
}
// Re-map arguments.
final int argCount = aggCall.getArgList().size();
final List<Integer> newArgs = new ArrayList<>(argCount);
for (int j = 0; j < argCount; j++) {
final Integer arg = aggCall.getArgList().get(j);
newArgs.add(sourceOf.get(arg));
}
final int newFilterArg = aggCall.filterArg >= 0 ? sourceOf.get(aggCall.filterArg) : -1;
final AggregateCall newAggCall = AggregateCall.create(aggCall.getAggregation(), false, newArgs, newFilterArg, aggCall.getType(), aggCall.getName());
assert refs.get(i) == null;
if (n == 0) {
refs.set(i, new RexInputRef(groupAndIndicatorCount + aggCallList.size(), newAggCall.getType()));
} else {
refs.set(i, new RexInputRef(leftFields.size() + groupAndIndicatorCount + aggCallList.size(), newAggCall.getType()));
}
aggCallList.add(newAggCall);
}
final Map<Integer, Integer> map = new HashMap<>();
for (Integer key : aggregate.getGroupSet()) {
map.put(key, map.size());
}
final ImmutableBitSet newGroupSet = aggregate.getGroupSet().permute(map);
assert newGroupSet.equals(ImmutableBitSet.range(aggregate.getGroupSet().cardinality()));
ImmutableList<ImmutableBitSet> newGroupingSets = null;
if (aggregate.indicator) {
newGroupingSets = ImmutableBitSet.ORDERING.immutableSortedCopy(ImmutableBitSet.permute(aggregate.getGroupSets(), map));
}
relBuilder.push(aggregate.copy(aggregate.getTraitSet(), relBuilder.build(), aggregate.indicator, newGroupSet, newGroupingSets, aggCallList));
// If there's no left child yet, no need to create the join
if (n == 0) {
return;
}
// Create the join condition. It is of the form
// 'left.f0 = right.f0 and left.f1 = right.f1 and ...'
// where {f0, f1, ...} are the GROUP BY fields.
final List<RelDataTypeField> distinctFields = relBuilder.peek().getRowType().getFieldList();
final List<RexNode> conditions = Lists.newArrayList();
for (i = 0; i < groupAndIndicatorCount; ++i) {
// null values form its own group
// use "is not distinct from" so that the join condition
// allows null values to match.
conditions.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_DISTINCT_FROM, RexInputRef.of(i, leftFields), new RexInputRef(leftFields.size() + i, distinctFields.get(i).getType())));
}
// Join in the new 'select distinct' relation.
relBuilder.join(JoinRelType.INNER, conditions);
}
Aggregations