use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelCollation in project drill by apache.
the class MetadataAggPrule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
MetadataAggRel aggregate = call.rel(0);
RelNode input = aggregate.getInput();
int groupByExprsSize = aggregate.getContext().groupByExpressions().size();
List<RelFieldCollation> collations = new ArrayList<>();
List<String> names = new ArrayList<>();
for (int i = 0; i < groupByExprsSize; i++) {
collations.add(new RelFieldCollation(i + 1));
SchemaPath fieldPath = getArgumentReference(aggregate.getContext().groupByExpressions().get(i));
names.add(fieldPath.getRootSegmentPath());
}
RelCollation collation = new NamedRelCollation(collations, names);
RelTraitSet traits;
if (aggregate.getContext().groupByExpressions().isEmpty()) {
DrillDistributionTrait singleDist = DrillDistributionTrait.SINGLETON;
RelTraitSet singleDistTrait = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL).plus(singleDist);
createTransformRequest(call, aggregate, input, singleDistTrait);
} else {
// hash distribute on all grouping keys
DrillDistributionTrait distOnAllKeys = new DrillDistributionTrait(DrillDistributionTrait.DistributionType.HASH_DISTRIBUTED, ImmutableList.copyOf(getDistributionFields(aggregate.getContext().groupByExpressions())));
PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner());
boolean smallInput = input.estimateRowCount(input.getCluster().getMetadataQuery()) < settings.getSliceTarget();
// to produce sort locally before aggregation is produced for large inputs
if (aggregate.getContext().createNewAggregations() && !smallInput) {
traits = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL);
RelNode convertedInput = convert(input, traits);
new TwoPhaseMetadataAggSubsetTransformer(call, collation, distOnAllKeys).go(aggregate, convertedInput);
} else {
// TODO: DRILL-7433 - replace DrillDistributionTrait.SINGLETON with distOnAllKeys when parallelization for MetadataHandler is implemented
traits = call.getPlanner().emptyTraitSet().plus(Prel.DRILL_PHYSICAL).plus(collation).plus(DrillDistributionTrait.SINGLETON);
createTransformRequest(call, aggregate, input, traits);
}
}
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelCollation in project flink by apache.
the class HiveParserCalcitePlanner method genLimitLogicalPlan.
private Sort genLimitLogicalPlan(HiveParserQB qb, RelNode srcRel) throws SemanticException {
Sort sortRel = null;
HiveParserQBParseInfo qbp = qb.getParseInfo();
AbstractMap.SimpleEntry<Integer, Integer> entry = qbp.getDestToLimit().get(qbp.getClauseNames().iterator().next());
int offset = (entry == null) ? 0 : entry.getKey();
Integer fetch = (entry == null) ? null : entry.getValue();
if (fetch != null) {
RexNode offsetRex = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(offset));
RexNode fetchRex = cluster.getRexBuilder().makeExactLiteral(BigDecimal.valueOf(fetch));
RelTraitSet traitSet = cluster.traitSet();
RelCollation canonizedCollation = traitSet.canonize(RelCollations.EMPTY);
sortRel = LogicalSort.create(srcRel, canonizedCollation, offsetRex, fetchRex);
HiveParserRowResolver outputRR = new HiveParserRowResolver();
if (!HiveParserRowResolver.add(outputRR, relToRowResolver.get(srcRel))) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
Map<String, Integer> hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR);
relToRowResolver.put(sortRel, outputRR);
relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap);
}
return sortRel;
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelCollation in project flink by apache.
the class HiveParserCalcitePlanner method genOBLogicalPlan.
private Pair<Sort, RelNode> genOBLogicalPlan(HiveParserQB qb, RelNode srcRel, boolean outermostOB) throws SemanticException {
Sort sortRel = null;
RelNode originalOBInput = null;
HiveParserQBParseInfo qbp = qb.getParseInfo();
String dest = qbp.getClauseNames().iterator().next();
HiveParserASTNode obAST = qbp.getOrderByForClause(dest);
if (obAST != null) {
// 1. OB Expr sanity test
// in strict mode, in the presence of order by, limit must be specified
Integer limit = qb.getParseInfo().getDestLimit(dest);
if (limit == null) {
String mapRedMode = semanticAnalyzer.getConf().getVar(HiveConf.ConfVars.HIVEMAPREDMODE);
boolean banLargeQuery = Boolean.parseBoolean(semanticAnalyzer.getConf().get("hive.strict.checks.large.query", "false"));
if ("strict".equalsIgnoreCase(mapRedMode) || banLargeQuery) {
throw new SemanticException(generateErrorMessage(obAST, "Order by-s without limit"));
}
}
// 2. Walk through OB exprs and extract field collations and additional
// virtual columns needed
final List<RexNode> virtualCols = new ArrayList<>();
final List<RelFieldCollation> fieldCollations = new ArrayList<>();
int fieldIndex;
List<Node> obASTExprLst = obAST.getChildren();
HiveParserASTNode obASTExpr;
HiveParserASTNode nullOrderASTExpr;
List<Pair<HiveParserASTNode, TypeInfo>> vcASTAndType = new ArrayList<>();
HiveParserRowResolver inputRR = relToRowResolver.get(srcRel);
HiveParserRowResolver outputRR = new HiveParserRowResolver();
HiveParserRexNodeConverter converter = new HiveParserRexNodeConverter(cluster, srcRel.getRowType(), relToHiveColNameCalcitePosMap.get(srcRel), 0, false, funcConverter);
int numSrcFields = srcRel.getRowType().getFieldCount();
for (Node node : obASTExprLst) {
// 2.1 Convert AST Expr to ExprNode
obASTExpr = (HiveParserASTNode) node;
nullOrderASTExpr = (HiveParserASTNode) obASTExpr.getChild(0);
HiveParserASTNode ref = (HiveParserASTNode) nullOrderASTExpr.getChild(0);
Map<HiveParserASTNode, ExprNodeDesc> astToExprNodeDesc = semanticAnalyzer.genAllExprNodeDesc(ref, inputRR);
ExprNodeDesc obExprNodeDesc = astToExprNodeDesc.get(ref);
if (obExprNodeDesc == null) {
throw new SemanticException("Invalid order by expression: " + obASTExpr.toString());
}
// 2.2 Convert ExprNode to RexNode
RexNode rexNode = converter.convert(obExprNodeDesc).accept(funcConverter);
// present in the child (& hence we add a child Project Rel)
if (rexNode instanceof RexInputRef) {
fieldIndex = ((RexInputRef) rexNode).getIndex();
} else {
fieldIndex = numSrcFields + virtualCols.size();
virtualCols.add(rexNode);
vcASTAndType.add(new Pair<>(ref, obExprNodeDesc.getTypeInfo()));
}
// 2.4 Determine the Direction of order by
RelFieldCollation.Direction direction = RelFieldCollation.Direction.DESCENDING;
if (obASTExpr.getType() == HiveASTParser.TOK_TABSORTCOLNAMEASC) {
direction = RelFieldCollation.Direction.ASCENDING;
}
RelFieldCollation.NullDirection nullOrder;
if (nullOrderASTExpr.getType() == HiveASTParser.TOK_NULLS_FIRST) {
nullOrder = RelFieldCollation.NullDirection.FIRST;
} else if (nullOrderASTExpr.getType() == HiveASTParser.TOK_NULLS_LAST) {
nullOrder = RelFieldCollation.NullDirection.LAST;
} else {
throw new SemanticException("Unexpected null ordering option: " + nullOrderASTExpr.getType());
}
// 2.5 Add to field collations
fieldCollations.add(new RelFieldCollation(fieldIndex, direction, nullOrder));
}
// 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel
// for top constraining Sel
RelNode obInputRel = srcRel;
if (!virtualCols.isEmpty()) {
List<RexNode> originalInputRefs = srcRel.getRowType().getFieldList().stream().map(input -> new RexInputRef(input.getIndex(), input.getType())).collect(Collectors.toList());
HiveParserRowResolver obSyntheticProjectRR = new HiveParserRowResolver();
if (!HiveParserRowResolver.add(obSyntheticProjectRR, inputRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
int vcolPos = inputRR.getRowSchema().getSignature().size();
for (Pair<HiveParserASTNode, TypeInfo> astTypePair : vcASTAndType) {
obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo(getColumnInternalName(vcolPos), astTypePair.getValue(), null, false));
vcolPos++;
}
obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, virtualCols), obSyntheticProjectRR, srcRel);
if (outermostOB) {
if (!HiveParserRowResolver.add(outputRR, inputRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
} else {
if (!HiveParserRowResolver.add(outputRR, obSyntheticProjectRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
}
originalOBInput = srcRel;
} else {
if (!HiveParserRowResolver.add(outputRR, inputRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
}
// 4. Construct SortRel
RelTraitSet traitSet = cluster.traitSet();
RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations));
sortRel = LogicalSort.create(obInputRel, canonizedCollation, null, null);
// 5. Update the maps
Map<String, Integer> hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR);
relToRowResolver.put(sortRel, outputRR);
relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap);
}
return (new Pair<>(sortRel, originalOBInput));
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelCollation in project flink by apache.
the class FlinkRelMdCollation method project.
/**
* Helper method to determine a {@link Project}'s collation.
*/
public static List<RelCollation> project(RelMetadataQuery mq, RelNode input, List<? extends RexNode> projects) {
final SortedSet<RelCollation> collations = new TreeSet<>();
final List<RelCollation> inputCollations = mq.collations(input);
if (inputCollations == null || inputCollations.isEmpty()) {
return com.google.common.collect.ImmutableList.of();
}
final com.google.common.collect.Multimap<Integer, Integer> targets = com.google.common.collect.LinkedListMultimap.create();
final Map<Integer, SqlMonotonicity> targetsWithMonotonicity = new HashMap<>();
for (Ord<RexNode> project : Ord.<RexNode>zip(projects)) {
if (project.e instanceof RexInputRef) {
targets.put(((RexInputRef) project.e).getIndex(), project.i);
} else if (project.e instanceof RexCall) {
final RexCall call = (RexCall) project.e;
final RexCallBinding binding = RexCallBinding.create(input.getCluster().getTypeFactory(), call, inputCollations);
targetsWithMonotonicity.put(project.i, call.getOperator().getMonotonicity(binding));
}
}
final List<RelFieldCollation> fieldCollations = new ArrayList<>();
loop: for (RelCollation ic : inputCollations) {
if (ic.getFieldCollations().isEmpty()) {
continue;
}
fieldCollations.clear();
for (RelFieldCollation ifc : ic.getFieldCollations()) {
final Collection<Integer> integers = targets.get(ifc.getFieldIndex());
if (integers.isEmpty()) {
// cannot do this collation
continue loop;
}
fieldCollations.add(ifc.withFieldIndex(integers.iterator().next()));
}
assert !fieldCollations.isEmpty();
collations.add(RelCollations.of(fieldCollations));
}
final List<RelFieldCollation> fieldCollationsForRexCalls = new ArrayList<>();
for (Map.Entry<Integer, SqlMonotonicity> entry : targetsWithMonotonicity.entrySet()) {
final SqlMonotonicity value = entry.getValue();
switch(value) {
case NOT_MONOTONIC:
case CONSTANT:
break;
default:
fieldCollationsForRexCalls.add(new RelFieldCollation(entry.getKey(), RelFieldCollation.Direction.of(value)));
break;
}
}
if (!fieldCollationsForRexCalls.isEmpty()) {
collations.add(RelCollations.of(fieldCollationsForRexCalls));
}
return com.google.common.collect.ImmutableList.copyOf(collations);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.rel.RelCollation in project flink by apache.
the class FlinkRelMdCollation method values.
/**
* Helper method to determine a {@link org.apache.calcite.rel.core.Values}'s collation.
*
* <p>We actually under-report the collations. A Values with 0 or 1 rows - an edge case, but
* legitimate and very common - is ordered by every permutation of every subset of the columns.
*
* <p>So, our algorithm aims to:
*
* <ul>
* <li>produce at most N collations (where N is the number of columns);
* <li>make each collation as long as possible;
* <li>do not repeat combinations already emitted - if we've emitted {@code (a, b)} do not
* later emit {@code (b, a)};
* <li>probe the actual values and make sure that each collation is consistent with the data
* </ul>
*
* <p>So, for an empty Values with 4 columns, we would emit {@code (a, b, c, d), (b, c, d), (c,
* d), (d)}.
*/
public static List<RelCollation> values(RelMetadataQuery mq, RelDataType rowType, com.google.common.collect.ImmutableList<com.google.common.collect.ImmutableList<RexLiteral>> tuples) {
// for future use
Util.discard(mq);
final List<RelCollation> list = new ArrayList<>();
final int n = rowType.getFieldCount();
final List<Pair<RelFieldCollation, com.google.common.collect.Ordering<List<RexLiteral>>>> pairs = new ArrayList<>();
outer: for (int i = 0; i < n; i++) {
pairs.clear();
for (int j = i; j < n; j++) {
final RelFieldCollation fieldCollation = new RelFieldCollation(j);
com.google.common.collect.Ordering<List<RexLiteral>> comparator = comparator(fieldCollation);
com.google.common.collect.Ordering<List<RexLiteral>> ordering;
if (pairs.isEmpty()) {
ordering = comparator;
} else {
ordering = Util.last(pairs).right.compound(comparator);
}
pairs.add(Pair.of(fieldCollation, ordering));
if (!ordering.isOrdered(tuples)) {
if (j == i) {
continue outer;
}
pairs.remove(pairs.size() - 1);
}
}
if (!pairs.isEmpty()) {
list.add(RelCollations.of(Pair.left(pairs)));
}
}
return list;
}
Aggregations