use of org.apache.calcite.util.Pair in project drill by apache.
the class MapRDBStatistics method computeSelectivity.
/*
* Compute the selectivity of the given rowCondition. Retrieve the selectivity
* for index conditions from the cache
*/
private Pair<Double, Boolean> computeSelectivity(RexNode condition, IndexDescriptor idx, double totalRows, RelNode scanRel, Map<String, Double> baseConditionMap) {
double selectivity;
boolean guess = false;
if (totalRows <= 0) {
return new Pair<>(1.0, true);
}
String conditionAsStr = convertRexToString(condition, scanRel.getRowType());
if (condition.getKind() == SqlKind.AND) {
selectivity = 1.0;
for (RexNode pred : RelOptUtil.conjunctions(condition)) {
Pair<Double, Boolean> selPayload = computeSelectivity(pred, idx, totalRows, scanRel, baseConditionMap);
if (selPayload.left > 0) {
// At least one AND branch is a guess
if (selPayload.right == true) {
guess = true;
}
selectivity *= selPayload.left;
}
}
} else if (condition.getKind() == SqlKind.OR) {
selectivity = 0.0;
for (RexNode pred : RelOptUtil.disjunctions(condition)) {
Pair<Double, Boolean> selPayload = computeSelectivity(pred, idx, totalRows, scanRel, baseConditionMap);
if (selPayload.left > 0.0) {
// At least one OR branch is a guess
if (selPayload.right == true) {
guess = true;
}
selectivity += selPayload.left;
}
}
// Cap selectivity of OR'ed predicates at 0.25 if at least one predicate is a guess (Calcite does the same)
if (guess && selectivity > 0.25) {
selectivity = 0.25;
}
} else {
guess = false;
if (baseConditionMap.get(conditionAsStr) != null) {
double rowCount = baseConditionMap.get(conditionAsStr);
if (rowCount != -1.0) {
selectivity = rowCount / totalRows;
} else {
// Ignore
selectivity = -1.0;
guess = true;
}
} else {
selectivity = RelMdUtil.guessSelectivity(condition);
guess = true;
}
return new Pair<>(selectivity, guess);
}
// Cap selectivity to be between 0.0 and 1.0
selectivity = Math.min(1.0, selectivity);
selectivity = Math.max(0.0, selectivity);
logger.debug("Statistics: computeSelectivity: Cache MISS: Computed {} -> {}", conditionAsStr, selectivity);
return new Pair<>(selectivity, guess);
}
use of org.apache.calcite.util.Pair in project drill by apache.
the class IndexIntersectPlanGenerator method buildRestrictedDBScan.
private Pair<RelNode, DbGroupScan> buildRestrictedDBScan(RexNode remnant, boolean isAnyIndexAsync) {
DbGroupScan origDbGroupScan = (DbGroupScan) IndexPlanUtils.getGroupScan(origScan);
List<SchemaPath> cols = new ArrayList<SchemaPath>(origDbGroupScan.getColumns());
if (!checkRowKey(cols)) {
cols.add(origDbGroupScan.getRowKeyPath());
}
// Create a restricted groupscan from the primary table's groupscan
DbGroupScan restrictedGroupScan = origDbGroupScan.getRestrictedScan(cols);
if (restrictedGroupScan == null) {
logger.error("Null restricted groupscan in IndexIntersectPlanGenerator.convertChild");
return null;
}
DrillDistributionTrait partition = IndexPlanUtils.scanIsPartition(IndexPlanUtils.getGroupScan(origScan)) ? DrillDistributionTrait.RANDOM_DISTRIBUTED : DrillDistributionTrait.SINGLETON;
RelNode lastRelNode;
RelDataType dbscanRowType = convertRowType(origScan.getRowType(), origScan.getCluster().getTypeFactory());
ScanPrel dbScan = new ScanPrel(origScan.getCluster(), origScan.getTraitSet().plus(Prel.DRILL_PHYSICAL).plus(partition), restrictedGroupScan, dbscanRowType, origScan.getTable());
lastRelNode = dbScan;
// build the row type for the left Project
List<RexNode> leftProjectExprs = Lists.newArrayList();
int leftRowKeyIndex = getRowKeyIndex(dbScan.getRowType(), origScan);
final RelDataTypeField leftRowKeyField = dbScan.getRowType().getFieldList().get(leftRowKeyIndex);
final RelDataTypeFactory.FieldInfoBuilder leftFieldTypeBuilder = dbScan.getCluster().getTypeFactory().builder();
FilterPrel leftIndexFilterPrel = null;
// full original condition.
if (isAnyIndexAsync) {
new FilterPrel(dbScan.getCluster(), dbScan.getTraitSet(), dbScan, indexContext.getOrigCondition());
lastRelNode = leftIndexFilterPrel;
}
// new Project's rowtype is original Project's rowtype [plus rowkey if rowkey is not in original rowtype]
ProjectPrel leftIndexProjectPrel = null;
if (origProject != null) {
RelDataType origRowType = origProject.getRowType();
List<RelDataTypeField> origProjFields = origRowType.getFieldList();
leftFieldTypeBuilder.addAll(origProjFields);
// get the exprs from the original Project IFF there is a project
leftProjectExprs.addAll(IndexPlanUtils.getProjects(origProject));
// add the rowkey IFF rowkey is not in orig scan
if (getRowKeyIndex(origRowType, origScan) < 0) {
leftFieldTypeBuilder.add(leftRowKeyField);
leftProjectExprs.add(RexInputRef.of(leftRowKeyIndex, dbScan.getRowType()));
}
final RelDataType leftProjectRowType = leftFieldTypeBuilder.build();
leftIndexProjectPrel = new ProjectPrel(dbScan.getCluster(), dbScan.getTraitSet(), leftIndexFilterPrel == null ? dbScan : leftIndexFilterPrel, leftProjectExprs, leftProjectRowType);
lastRelNode = leftIndexProjectPrel;
}
final RelTraitSet leftTraits = dbScan.getTraitSet().plus(Prel.DRILL_PHYSICAL);
// final RelNode convertedLeft = convert(leftIndexProjectPrel, leftTraits);
final RelNode convertedLeft = Prule.convert(lastRelNode, leftTraits);
return new Pair<>(convertedLeft, restrictedGroupScan);
}
use of org.apache.calcite.util.Pair in project flink by apache.
the class HiveParserCalcitePlanner method genOBLogicalPlan.
private Pair<Sort, RelNode> genOBLogicalPlan(HiveParserQB qb, RelNode srcRel, boolean outermostOB) throws SemanticException {
Sort sortRel = null;
RelNode originalOBInput = null;
HiveParserQBParseInfo qbp = qb.getParseInfo();
String dest = qbp.getClauseNames().iterator().next();
HiveParserASTNode obAST = qbp.getOrderByForClause(dest);
if (obAST != null) {
// 1. OB Expr sanity test
// in strict mode, in the presence of order by, limit must be specified
Integer limit = qb.getParseInfo().getDestLimit(dest);
if (limit == null) {
String mapRedMode = semanticAnalyzer.getConf().getVar(HiveConf.ConfVars.HIVEMAPREDMODE);
boolean banLargeQuery = Boolean.parseBoolean(semanticAnalyzer.getConf().get("hive.strict.checks.large.query", "false"));
if ("strict".equalsIgnoreCase(mapRedMode) || banLargeQuery) {
throw new SemanticException(generateErrorMessage(obAST, "Order by-s without limit"));
}
}
// 2. Walk through OB exprs and extract field collations and additional
// virtual columns needed
final List<RexNode> virtualCols = new ArrayList<>();
final List<RelFieldCollation> fieldCollations = new ArrayList<>();
int fieldIndex;
List<Node> obASTExprLst = obAST.getChildren();
HiveParserASTNode obASTExpr;
HiveParserASTNode nullOrderASTExpr;
List<Pair<HiveParserASTNode, TypeInfo>> vcASTAndType = new ArrayList<>();
HiveParserRowResolver inputRR = relToRowResolver.get(srcRel);
HiveParserRowResolver outputRR = new HiveParserRowResolver();
HiveParserRexNodeConverter converter = new HiveParserRexNodeConverter(cluster, srcRel.getRowType(), relToHiveColNameCalcitePosMap.get(srcRel), 0, false, funcConverter);
int numSrcFields = srcRel.getRowType().getFieldCount();
for (Node node : obASTExprLst) {
// 2.1 Convert AST Expr to ExprNode
obASTExpr = (HiveParserASTNode) node;
nullOrderASTExpr = (HiveParserASTNode) obASTExpr.getChild(0);
HiveParserASTNode ref = (HiveParserASTNode) nullOrderASTExpr.getChild(0);
Map<HiveParserASTNode, ExprNodeDesc> astToExprNodeDesc = semanticAnalyzer.genAllExprNodeDesc(ref, inputRR);
ExprNodeDesc obExprNodeDesc = astToExprNodeDesc.get(ref);
if (obExprNodeDesc == null) {
throw new SemanticException("Invalid order by expression: " + obASTExpr.toString());
}
// 2.2 Convert ExprNode to RexNode
RexNode rexNode = converter.convert(obExprNodeDesc).accept(funcConverter);
// present in the child (& hence we add a child Project Rel)
if (rexNode instanceof RexInputRef) {
fieldIndex = ((RexInputRef) rexNode).getIndex();
} else {
fieldIndex = numSrcFields + virtualCols.size();
virtualCols.add(rexNode);
vcASTAndType.add(new Pair<>(ref, obExprNodeDesc.getTypeInfo()));
}
// 2.4 Determine the Direction of order by
RelFieldCollation.Direction direction = RelFieldCollation.Direction.DESCENDING;
if (obASTExpr.getType() == HiveASTParser.TOK_TABSORTCOLNAMEASC) {
direction = RelFieldCollation.Direction.ASCENDING;
}
RelFieldCollation.NullDirection nullOrder;
if (nullOrderASTExpr.getType() == HiveASTParser.TOK_NULLS_FIRST) {
nullOrder = RelFieldCollation.NullDirection.FIRST;
} else if (nullOrderASTExpr.getType() == HiveASTParser.TOK_NULLS_LAST) {
nullOrder = RelFieldCollation.NullDirection.LAST;
} else {
throw new SemanticException("Unexpected null ordering option: " + nullOrderASTExpr.getType());
}
// 2.5 Add to field collations
fieldCollations.add(new RelFieldCollation(fieldIndex, direction, nullOrder));
}
// 3. Add Child Project Rel if needed, Generate Output RR, input Sel Rel
// for top constraining Sel
RelNode obInputRel = srcRel;
if (!virtualCols.isEmpty()) {
List<RexNode> originalInputRefs = srcRel.getRowType().getFieldList().stream().map(input -> new RexInputRef(input.getIndex(), input.getType())).collect(Collectors.toList());
HiveParserRowResolver obSyntheticProjectRR = new HiveParserRowResolver();
if (!HiveParserRowResolver.add(obSyntheticProjectRR, inputRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
int vcolPos = inputRR.getRowSchema().getSignature().size();
for (Pair<HiveParserASTNode, TypeInfo> astTypePair : vcASTAndType) {
obSyntheticProjectRR.putExpression(astTypePair.getKey(), new ColumnInfo(getColumnInternalName(vcolPos), astTypePair.getValue(), null, false));
vcolPos++;
}
obInputRel = genSelectRelNode(CompositeList.of(originalInputRefs, virtualCols), obSyntheticProjectRR, srcRel);
if (outermostOB) {
if (!HiveParserRowResolver.add(outputRR, inputRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
} else {
if (!HiveParserRowResolver.add(outputRR, obSyntheticProjectRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
}
originalOBInput = srcRel;
} else {
if (!HiveParserRowResolver.add(outputRR, inputRR)) {
throw new SemanticException("Duplicates detected when adding columns to RR: see previous message");
}
}
// 4. Construct SortRel
RelTraitSet traitSet = cluster.traitSet();
RelCollation canonizedCollation = traitSet.canonize(RelCollationImpl.of(fieldCollations));
sortRel = LogicalSort.create(obInputRel, canonizedCollation, null, null);
// 5. Update the maps
Map<String, Integer> hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR);
relToRowResolver.put(sortRel, outputRR);
relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap);
}
return (new Pair<>(sortRel, originalOBInput));
}
use of org.apache.calcite.util.Pair in project flink by apache.
the class FlinkAggregateJoinTransposeRule method toRegularAggregate.
/**
* Convert aggregate with AUXILIARY_GROUP to regular aggregate. Return original aggregate and
* null project if the given aggregate does not contain AUXILIARY_GROUP, else new aggregate
* without AUXILIARY_GROUP and a project to permute output columns if needed.
*/
private Pair<Aggregate, List<RexNode>> toRegularAggregate(Aggregate aggregate) {
Tuple2<int[], Seq<AggregateCall>> auxGroupAndRegularAggCalls = AggregateUtil.checkAndSplitAggCalls(aggregate);
final int[] auxGroup = auxGroupAndRegularAggCalls._1;
final Seq<AggregateCall> regularAggCalls = auxGroupAndRegularAggCalls._2;
if (auxGroup.length != 0) {
int[] fullGroupSet = AggregateUtil.checkAndGetFullGroupSet(aggregate);
ImmutableBitSet newGroupSet = ImmutableBitSet.of(fullGroupSet);
List<AggregateCall> aggCalls = JavaConverters.seqAsJavaListConverter(regularAggCalls).asJava();
final Aggregate newAgg = aggregate.copy(aggregate.getTraitSet(), aggregate.getInput(), aggregate.indicator, newGroupSet, com.google.common.collect.ImmutableList.of(newGroupSet), aggCalls);
final List<RelDataTypeField> aggFields = aggregate.getRowType().getFieldList();
final List<RexNode> projectAfterAgg = new ArrayList<>();
for (int i = 0; i < fullGroupSet.length; ++i) {
int group = fullGroupSet[i];
int index = newGroupSet.indexOf(group);
projectAfterAgg.add(new RexInputRef(index, aggFields.get(i).getType()));
}
int fieldCntOfAgg = aggFields.size();
for (int i = fullGroupSet.length; i < fieldCntOfAgg; ++i) {
projectAfterAgg.add(new RexInputRef(i, aggFields.get(i).getType()));
}
Preconditions.checkArgument(projectAfterAgg.size() == fieldCntOfAgg);
return new Pair<>(newAgg, projectAfterAgg);
} else {
return new Pair<>(aggregate, null);
}
}
use of org.apache.calcite.util.Pair in project flink by apache.
the class FlinkAggregateExpandDistinctAggregatesRule method createSelectDistinct.
/**
* Given an {@link org.apache.calcite.rel.core.Aggregate} and the ordinals of the arguments to a
* particular call to an aggregate function, creates a 'select distinct' relational expression
* which projects the group columns and those arguments but nothing else.
*
* <p>For example, given
*
* <blockquote>
*
* <pre>select f0, count(distinct f1), count(distinct f2)
* from t group by f0</pre>
*
* </blockquote>
*
* <p>and the argument list
*
* <blockquote>
*
* {2}
*
* </blockquote>
*
* <p>returns
*
* <blockquote>
*
* <pre>select distinct f0, f2 from t</pre>
*
* </blockquote>
*
* <p>The <code>sourceOf</code> map is populated with the source of each column; in this case
* sourceOf.get(0) = 0, and sourceOf.get(1) = 2.
*
* @param relBuilder Relational expression builder
* @param aggregate Aggregate relational expression
* @param argList Ordinals of columns to make distinct
* @param filterArg Ordinal of column to filter on, or -1
* @param sourceOf Out parameter, is populated with a map of where each output field came from
* @return Aggregate relational expression which projects the required columns
*/
private RelBuilder createSelectDistinct(RelBuilder relBuilder, Aggregate aggregate, List<Integer> argList, int filterArg, Map<Integer, Integer> sourceOf) {
relBuilder.push(aggregate.getInput());
final List<Pair<RexNode, String>> projects = new ArrayList<>();
final List<RelDataTypeField> childFields = relBuilder.peek().getRowType().getFieldList();
for (int i : aggregate.getGroupSet()) {
sourceOf.put(i, projects.size());
projects.add(RexInputRef.of2(i, childFields));
}
if (filterArg >= 0) {
sourceOf.put(filterArg, projects.size());
projects.add(RexInputRef.of2(filterArg, childFields));
}
for (Integer arg : argList) {
if (filterArg >= 0) {
// Implement
// agg(DISTINCT arg) FILTER $f
// by generating
// SELECT DISTINCT ... CASE WHEN $f THEN arg ELSE NULL END AS arg
// and then applying
// agg(arg)
// as usual.
//
// It works except for (rare) agg functions that need to see null
// values.
final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder();
final RexInputRef filterRef = RexInputRef.of(filterArg, childFields);
final Pair<RexNode, String> argRef = RexInputRef.of2(arg, childFields);
RexNode condition = rexBuilder.makeCall(SqlStdOperatorTable.CASE, filterRef, argRef.left, rexBuilder.makeNullLiteral(argRef.left.getType()));
sourceOf.put(arg, projects.size());
projects.add(Pair.of(condition, "i$" + argRef.right));
continue;
}
if (sourceOf.get(arg) != null) {
continue;
}
sourceOf.put(arg, projects.size());
projects.add(RexInputRef.of2(arg, childFields));
}
relBuilder.project(Pair.left(projects), Pair.right(projects));
// Get the distinct values of the GROUP BY fields and the arguments
// to the agg functions.
relBuilder.push(aggregate.copy(aggregate.getTraitSet(), relBuilder.build(), ImmutableBitSet.range(projects.size()), null, com.google.common.collect.ImmutableList.<AggregateCall>of()));
return relBuilder;
}
Aggregations