use of org.apache.calcite.util.Pair in project flink by apache.
the class FlinkAggregateExpandDistinctAggregatesRule method onMatch.
// ~ Methods ----------------------------------------------------------------
public void onMatch(RelOptRuleCall call) {
final Aggregate aggregate = call.rel(0);
if (!AggregateUtil.containsAccurateDistinctCall(aggregate.getAggCallList())) {
return;
}
// accurate distinct call.
if (AggregateUtil.containsApproximateDistinctCall(aggregate.getAggCallList())) {
throw new TableException("There are both Distinct AggCall and Approximate Distinct AggCall in one sql statement, " + "it is not supported yet.\nPlease choose one of them.");
}
// by DecomposeGroupingSetsRule. Then this rule expands it's distinct aggregates.
if (aggregate.getGroupSets().size() > 1) {
return;
}
// Find all of the agg expressions. We use a LinkedHashSet to ensure determinism.
// Find all aggregate calls without distinct
int nonDistinctAggCallCount = 0;
// Find all aggregate calls without distinct but ignore MAX, MIN, BIT_AND, BIT_OR
int nonDistinctAggCallExcludingIgnoredCount = 0;
int filterCount = 0;
int unsupportedNonDistinctAggCallCount = 0;
final Set<Pair<List<Integer>, Integer>> argLists = new LinkedHashSet<>();
for (AggregateCall aggCall : aggregate.getAggCallList()) {
if (aggCall.filterArg >= 0) {
++filterCount;
}
if (!aggCall.isDistinct()) {
++nonDistinctAggCallCount;
final SqlKind aggCallKind = aggCall.getAggregation().getKind();
// We only support COUNT/SUM/MIN/MAX for the "single" count distinct optimization
switch(aggCallKind) {
case COUNT:
case SUM:
case SUM0:
case MIN:
case MAX:
break;
default:
++unsupportedNonDistinctAggCallCount;
}
if (aggCall.getAggregation().getDistinctOptionality() == Optionality.IGNORED) {
argLists.add(Pair.of(aggCall.getArgList(), aggCall.filterArg));
} else {
++nonDistinctAggCallExcludingIgnoredCount;
}
} else {
argLists.add(Pair.of(aggCall.getArgList(), aggCall.filterArg));
}
}
final int distinctAggCallCount = aggregate.getAggCallList().size() - nonDistinctAggCallCount;
Preconditions.checkState(argLists.size() > 0, "containsDistinctCall lied");
// we can still use this promotion.
if (nonDistinctAggCallExcludingIgnoredCount == 0 && argLists.size() == 1 && aggregate.getGroupType() == Group.SIMPLE) {
final Pair<List<Integer>, Integer> pair = com.google.common.collect.Iterables.getOnlyElement(argLists);
final RelBuilder relBuilder = call.builder();
convertMonopole(relBuilder, aggregate, pair.left, pair.right);
call.transformTo(relBuilder.build());
return;
}
if (useGroupingSets) {
rewriteUsingGroupingSets(call, aggregate);
return;
}
// we can generate multi-phase aggregates
if (// one distinct aggregate
distinctAggCallCount == 1 && // no filter
filterCount == 0 && unsupportedNonDistinctAggCallCount == // sum/min/max/count in non-distinct aggregate
0 && nonDistinctAggCallCount > 0) {
// one or more non-distinct aggregates
final RelBuilder relBuilder = call.builder();
convertSingletonDistinct(relBuilder, aggregate, argLists);
call.transformTo(relBuilder.build());
return;
}
// Create a list of the expressions which will yield the final result.
// Initially, the expressions point to the input field.
final List<RelDataTypeField> aggFields = aggregate.getRowType().getFieldList();
final List<RexInputRef> refs = new ArrayList<>();
final List<String> fieldNames = aggregate.getRowType().getFieldNames();
final ImmutableBitSet groupSet = aggregate.getGroupSet();
final int groupCount = aggregate.getGroupCount();
for (int i : Util.range(groupCount)) {
refs.add(RexInputRef.of(i, aggFields));
}
// Aggregate the original relation, including any non-distinct aggregates.
final List<AggregateCall> newAggCallList = new ArrayList<>();
int i = -1;
for (AggregateCall aggCall : aggregate.getAggCallList()) {
++i;
if (aggCall.isDistinct()) {
refs.add(null);
continue;
}
refs.add(new RexInputRef(groupCount + newAggCallList.size(), aggFields.get(groupCount + i).getType()));
newAggCallList.add(aggCall);
}
// In the case where there are no non-distinct aggregates (regardless of
// whether there are group bys), there's no need to generate the
// extra aggregate and join.
final RelBuilder relBuilder = call.builder();
relBuilder.push(aggregate.getInput());
int n = 0;
if (!newAggCallList.isEmpty()) {
final RelBuilder.GroupKey groupKey = relBuilder.groupKey(groupSet, aggregate.getGroupSets());
relBuilder.aggregate(groupKey, newAggCallList);
++n;
}
// set of operands.
for (Pair<List<Integer>, Integer> argList : argLists) {
doRewrite(relBuilder, aggregate, n++, argList.left, argList.right, refs);
}
relBuilder.project(refs, fieldNames);
call.transformTo(relBuilder.build());
}
use of org.apache.calcite.util.Pair in project flink by apache.
the class FlinkRelMdCollation method values.
/**
* Helper method to determine a {@link org.apache.calcite.rel.core.Values}'s collation.
*
* <p>We actually under-report the collations. A Values with 0 or 1 rows - an edge case, but
* legitimate and very common - is ordered by every permutation of every subset of the columns.
*
* <p>So, our algorithm aims to:
*
* <ul>
* <li>produce at most N collations (where N is the number of columns);
* <li>make each collation as long as possible;
* <li>do not repeat combinations already emitted - if we've emitted {@code (a, b)} do not
* later emit {@code (b, a)};
* <li>probe the actual values and make sure that each collation is consistent with the data
* </ul>
*
* <p>So, for an empty Values with 4 columns, we would emit {@code (a, b, c, d), (b, c, d), (c,
* d), (d)}.
*/
public static List<RelCollation> values(RelMetadataQuery mq, RelDataType rowType, com.google.common.collect.ImmutableList<com.google.common.collect.ImmutableList<RexLiteral>> tuples) {
// for future use
Util.discard(mq);
final List<RelCollation> list = new ArrayList<>();
final int n = rowType.getFieldCount();
final List<Pair<RelFieldCollation, com.google.common.collect.Ordering<List<RexLiteral>>>> pairs = new ArrayList<>();
outer: for (int i = 0; i < n; i++) {
pairs.clear();
for (int j = i; j < n; j++) {
final RelFieldCollation fieldCollation = new RelFieldCollation(j);
com.google.common.collect.Ordering<List<RexLiteral>> comparator = comparator(fieldCollation);
com.google.common.collect.Ordering<List<RexLiteral>> ordering;
if (pairs.isEmpty()) {
ordering = comparator;
} else {
ordering = Util.last(pairs).right.compound(comparator);
}
pairs.add(Pair.of(fieldCollation, ordering));
if (!ordering.isOrdered(tuples)) {
if (j == i) {
continue outer;
}
pairs.remove(pairs.size() - 1);
}
}
if (!pairs.isEmpty()) {
list.add(RelCollations.of(Pair.left(pairs)));
}
}
return list;
}
use of org.apache.calcite.util.Pair in project flink by apache.
the class SqlValidatorImpl method validatePivot.
public void validatePivot(SqlPivot pivot) {
final PivotScope scope = (PivotScope) getJoinScope(pivot);
final PivotNamespace ns = getNamespace(pivot).unwrap(PivotNamespace.class);
assert ns.rowType == null;
// Given
// query PIVOT (agg1 AS a, agg2 AS b, ...
// FOR (axis1, ..., axisN)
// IN ((v11, ..., v1N) AS label1,
// (v21, ..., v2N) AS label2, ...))
// the type is
// k1, ... kN, a_label1, b_label1, ..., a_label2, b_label2, ...
// where k1, ... kN are columns that are not referenced as an argument to
// an aggregate or as an axis.
// Aggregates, e.g. "PIVOT (sum(x) AS sum_x, count(*) AS c)"
final List<Pair<String, RelDataType>> aggNames = new ArrayList<>();
pivot.forEachAgg((alias, call) -> {
call.validate(this, scope);
final RelDataType type = deriveType(scope, call);
aggNames.add(Pair.of(alias, type));
if (!(call instanceof SqlCall) || !(((SqlCall) call).getOperator() instanceof SqlAggFunction)) {
throw newValidationError(call, RESOURCE.pivotAggMalformed());
}
});
// Axes, e.g. "FOR (JOB, DEPTNO)"
final List<RelDataType> axisTypes = new ArrayList<>();
final List<SqlIdentifier> axisIdentifiers = new ArrayList<>();
for (SqlNode axis : pivot.axisList) {
SqlIdentifier identifier = (SqlIdentifier) axis;
identifier.validate(this, scope);
final RelDataType type = deriveType(scope, identifier);
axisTypes.add(type);
axisIdentifiers.add(identifier);
}
// Columns that have been seen as arguments to aggregates or as axes
// do not appear in the output.
final Set<String> columnNames = pivot.usedColumnNames();
final RelDataTypeFactory.Builder typeBuilder = typeFactory.builder();
scope.getChild().getRowType().getFieldList().forEach(field -> {
if (!columnNames.contains(field.getName())) {
typeBuilder.add(field);
}
});
// Values, e.g. "IN (('CLERK', 10) AS c10, ('MANAGER, 20) AS m20)"
pivot.forEachNameValues((alias, nodeList) -> {
if (nodeList.size() != axisTypes.size()) {
throw newValidationError(nodeList, RESOURCE.pivotValueArityMismatch(nodeList.size(), axisTypes.size()));
}
final SqlOperandTypeChecker typeChecker = OperandTypes.COMPARABLE_UNORDERED_COMPARABLE_UNORDERED;
Pair.forEach(axisIdentifiers, nodeList, (identifier, subNode) -> {
subNode.validate(this, scope);
typeChecker.checkOperandTypes(new SqlCallBinding(this, scope, SqlStdOperatorTable.EQUALS.createCall(subNode.getParserPosition(), identifier, subNode)), true);
});
Pair.forEach(aggNames, (aggAlias, aggType) -> typeBuilder.add(aggAlias == null ? alias : alias + "_" + aggAlias, aggType));
});
final RelDataType rowType = typeBuilder.build();
ns.setType(rowType);
}
use of org.apache.calcite.util.Pair in project flink by apache.
the class RelDecorrelator method aggregateCorrelatorOutput.
/**
* Pulls a {@link Project} above a {@link Correlate} from its RHS input. Enforces nullability
* for join output.
*
* @param correlate Correlate
* @param project the original project as the RHS input of the join
* @param isCount Positions which are calls to the <code>COUNT</code> aggregation function
* @return the subtree with the new Project at the root
*/
private RelNode aggregateCorrelatorOutput(Correlate correlate, Project project, Set<Integer> isCount) {
final RelNode left = correlate.getLeft();
final JoinRelType joinType = correlate.getJoinType();
// now create the new project
final List<Pair<RexNode, String>> newProjects = new ArrayList<>();
// Project everything from the LHS and then those from the original
// project
final List<RelDataTypeField> leftInputFields = left.getRowType().getFieldList();
for (int i = 0; i < leftInputFields.size(); i++) {
newProjects.add(RexInputRef.of2(i, leftInputFields));
}
// Marked where the projected expr is coming from so that the types will
// become nullable for the original projections which are now coming out
// of the nullable side of the OJ.
boolean projectPulledAboveLeftCorrelator = joinType.generatesNullsOnRight();
for (Pair<RexNode, String> pair : project.getNamedProjects()) {
RexNode newProjExpr = removeCorrelationExpr(pair.left, projectPulledAboveLeftCorrelator, isCount);
newProjects.add(Pair.of(newProjExpr, pair.right));
}
return relBuilder.push(correlate).projectNamed(Pair.left(newProjects), Pair.right(newProjects), true).build();
}
use of org.apache.calcite.util.Pair in project beam by apache.
the class IOPushDownRuleTest method testIsProjectRenameOnlyProgram.
@Test
public void testIsProjectRenameOnlyProgram() {
List<Pair<Pair<String, Boolean>, Boolean>> tests = ImmutableList.of(// Selecting fields in a different order is only allowed with project push-down.
Pair.of(Pair.of("select unused2, name, id from TEST", true), true), Pair.of(Pair.of("select unused2, name, id from TEST", false), false), Pair.of(Pair.of("select id from TEST", false), true), Pair.of(Pair.of("select * from TEST", false), true), Pair.of(Pair.of("select id, name from TEST", false), true), Pair.of(Pair.of("select id+10 from TEST", false), false), // Note that we only care about projects.
Pair.of(Pair.of("select id from TEST where name='one'", false), true));
for (Pair<Pair<String, Boolean>, Boolean> test : tests) {
String sqlQuery = test.left.left;
boolean projectPushDownSupported = test.left.right;
boolean expectedAnswer = test.right;
BeamRelNode basicRel = sqlEnv.parseQuery(sqlQuery);
assertThat(basicRel, instanceOf(Calc.class));
Calc calc = (Calc) basicRel;
assertThat(test.toString(), BeamIOPushDownRule.INSTANCE.isProjectRenameOnlyProgram(calc.getProgram(), projectPushDownSupported), equalTo(expectedAnswer));
}
}
Aggregations