use of io.prestosql.spi.plan.AggregationNode.Aggregation in project hetu-core by openlookeng.
the class TestEffectivePredicateExtractor method testAggregation.
@Test
public void testAggregation() {
PlanNode node = new AggregationNode(newId(), filter(baseTableScan, and(equals(AE, DE), equals(BE, EE), equals(CE, FE), lessThan(DE, bigintLiteral(10)), lessThan(CE, DE), greaterThan(AE, bigintLiteral(2)), equals(EE, FE))), ImmutableMap.of(C, new Aggregation(new CallExpression("test", new FunctionResolution(metadata.getFunctionAndTypeManager()).countFunction(), BIGINT, ImmutableList.of()), ImmutableList.of(), false, Optional.empty(), Optional.empty(), Optional.empty()), D, new Aggregation(new CallExpression("test", new FunctionResolution(metadata.getFunctionAndTypeManager()).countFunction(), BIGINT, ImmutableList.of()), ImmutableList.of(), false, Optional.empty(), Optional.empty(), Optional.empty())), singleGroupingSet(ImmutableList.of(A, B, C)), ImmutableList.of(), AggregationNode.Step.FINAL, Optional.empty(), Optional.empty(), AggregationNode.AggregationType.HASH, Optional.empty());
Expression effectivePredicate = effectivePredicateExtractor.extract(SESSION, node, TypeProvider.empty(), typeAnalyzer);
// Rewrite in terms of group by symbols
assertEquals(normalizeConjuncts(effectivePredicate), normalizeConjunctsSet(lessThan(AE, bigintLiteral(10)), lessThan(BE, AE), greaterThan(AE, bigintLiteral(2)), equals(BE, CE)));
}
use of io.prestosql.spi.plan.AggregationNode.Aggregation in project hetu-core by openlookeng.
the class AggregationStatsRule method groupBy.
public static PlanNodeStatsEstimate groupBy(PlanNodeStatsEstimate sourceStats, Collection<Symbol> groupBySymbols, Map<Symbol, Aggregation> aggregations) {
PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder();
for (Symbol groupBySymbol : groupBySymbols) {
SymbolStatsEstimate symbolStatistics = sourceStats.getSymbolStatistics(groupBySymbol);
result.addSymbolStatistics(groupBySymbol, symbolStatistics.mapNullsFraction(nullsFraction -> {
if (nullsFraction == 0.0) {
return 0.0;
}
return 1.0 / (symbolStatistics.getDistinctValuesCount() + 1);
}));
}
double rowsCount = 1;
boolean knowsStatsFound = false;
for (Symbol groupBySymbol : groupBySymbols) {
SymbolStatsEstimate symbolStatistics = sourceStats.getSymbolStatistics(groupBySymbol);
if (symbolStatistics.isUnknown()) {
// so for now we dont consider the same for overall stats calculation.
continue;
}
knowsStatsFound = true;
int nullRow = (symbolStatistics.getNullsFraction() == 0.0) ? 0 : 1;
rowsCount *= symbolStatistics.getDistinctValuesCount() + nullRow;
}
if (knowsStatsFound) {
result.setOutputRowCount(min(rowsCount, sourceStats.getOutputRowCount()));
} else {
// If there was no proper stats in any of the grouping symbols, we just take fixed % of source.
result.setOutputRowCount(sourceStats.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT);
}
for (Map.Entry<Symbol, Aggregation> aggregationEntry : aggregations.entrySet()) {
result.addSymbolStatistics(aggregationEntry.getKey(), estimateAggregationStats(aggregationEntry.getValue(), sourceStats));
}
return result.build();
}
use of io.prestosql.spi.plan.AggregationNode.Aggregation in project hetu-core by openlookeng.
the class QueryPlanner method aggregate.
private PlanBuilder aggregate(PlanBuilder inputSubPlan, QuerySpecification node) {
PlanBuilder subPlan = inputSubPlan;
if (!analysis.isAggregation(node)) {
return subPlan;
}
// 1. Pre-project all scalar inputs (arguments and non-trivial group by expressions)
Set<Expression> groupByExpressions = ImmutableSet.copyOf(analysis.getGroupByExpressions(node));
ImmutableList.Builder<Expression> arguments = ImmutableList.builder();
analysis.getAggregates(node).stream().map(FunctionCall::getArguments).flatMap(List::stream).filter(// lambda expression is generated at execution time
exp -> !(exp instanceof LambdaExpression)).forEach(arguments::add);
analysis.getAggregates(node).stream().map(FunctionCall::getOrderBy).filter(Optional::isPresent).map(Optional::get).map(OrderBy::getSortItems).flatMap(List::stream).map(SortItem::getSortKey).forEach(arguments::add);
// filter expressions need to be projected first
analysis.getAggregates(node).stream().map(FunctionCall::getFilter).filter(Optional::isPresent).map(Optional::get).forEach(arguments::add);
Iterable<Expression> inputs = Iterables.concat(groupByExpressions, arguments.build());
subPlan = handleSubqueries(subPlan, node, inputs);
if (!Iterables.isEmpty(inputs)) {
// avoid an empty projection if the only aggregation is COUNT (which has no arguments)
subPlan = project(subPlan, inputs);
}
// 2. Aggregate
// 2.a. Rewrite aggregate arguments
TranslationMap argumentTranslations = new TranslationMap(subPlan.getRelationPlan(), analysis, lambdaDeclarationToSymbolMap);
ImmutableList.Builder<Symbol> aggregationArgumentsBuilder = ImmutableList.builder();
for (Expression argument : arguments.build()) {
Symbol symbol = subPlan.translate(argument);
argumentTranslations.put(argument, symbol);
aggregationArgumentsBuilder.add(symbol);
}
List<Symbol> aggregationArguments = aggregationArgumentsBuilder.build();
// 2.b. Rewrite grouping columns
TranslationMap groupingTranslations = new TranslationMap(subPlan.getRelationPlan(), analysis, lambdaDeclarationToSymbolMap);
Map<Symbol, Symbol> groupingSetMappings = new LinkedHashMap<>();
for (Expression expression : groupByExpressions) {
Symbol input = subPlan.translate(expression);
Symbol output = planSymbolAllocator.newSymbol(expression, analysis.getTypeWithCoercions(expression), "gid");
groupingTranslations.put(expression, output);
groupingSetMappings.put(output, input);
}
// This tracks the grouping sets before complex expressions are considered (see comments below)
// It's also used to compute the descriptors needed to implement grouping()
List<Set<FieldId>> columnOnlyGroupingSets = ImmutableList.of(ImmutableSet.of());
List<List<Symbol>> groupingSets = ImmutableList.of(ImmutableList.of());
if (node.getGroupBy().isPresent()) {
// For the purpose of "distinct", we need to canonicalize column references that may have varying
// syntactic forms (e.g., "t.a" vs "a"). Thus we need to enumerate grouping sets based on the underlying
// fieldId associated with each column reference expression.
// The catch is that simple group-by expressions can be arbitrary expressions (this is a departure from the SQL specification).
// But, they don't affect the number of grouping sets or the behavior of "distinct" . We can compute all the candidate
// grouping sets in terms of fieldId, dedup as appropriate and then cross-join them with the complex expressions.
Analysis.GroupingSetAnalysis groupingSetAnalysis = analysis.getGroupingSets(node);
columnOnlyGroupingSets = enumerateGroupingSets(groupingSetAnalysis);
if (node.getGroupBy().get().isDistinct()) {
columnOnlyGroupingSets = columnOnlyGroupingSets.stream().distinct().collect(toImmutableList());
}
// add in the complex expressions an turn materialize the grouping sets in terms of plan columns
ImmutableList.Builder<List<Symbol>> groupingSetBuilder = ImmutableList.builder();
for (Set<FieldId> groupingSet : columnOnlyGroupingSets) {
ImmutableList.Builder<Symbol> columns = ImmutableList.builder();
groupingSetAnalysis.getComplexExpressions().stream().map(groupingTranslations::get).forEach(columns::add);
groupingSet.stream().map(field -> groupingTranslations.get(new FieldReference(field.getFieldIndex()))).forEach(columns::add);
groupingSetBuilder.add(columns.build());
}
groupingSets = groupingSetBuilder.build();
}
// 2.c. Generate GroupIdNode (multiple grouping sets) or ProjectNode (single grouping set)
Optional<Symbol> groupIdSymbol = Optional.empty();
if (groupingSets.size() > 1) {
groupIdSymbol = Optional.of(planSymbolAllocator.newSymbol("groupId", BIGINT));
GroupIdNode groupId = new GroupIdNode(idAllocator.getNextId(), subPlan.getRoot(), groupingSets, groupingSetMappings, aggregationArguments, groupIdSymbol.get());
subPlan = new PlanBuilder(groupingTranslations, groupId);
} else {
Assignments.Builder assignments = Assignments.builder();
aggregationArguments.forEach(symbol -> assignments.put(symbol, castToRowExpression(toSymbolReference(symbol))));
groupingSetMappings.forEach((key, value) -> assignments.put(key, castToRowExpression(toSymbolReference(value))));
ProjectNode project = new ProjectNode(idAllocator.getNextId(), subPlan.getRoot(), assignments.build());
subPlan = new PlanBuilder(groupingTranslations, project);
}
TranslationMap aggregationTranslations = new TranslationMap(subPlan.getRelationPlan(), analysis, lambdaDeclarationToSymbolMap);
aggregationTranslations.copyMappingsFrom(groupingTranslations);
// 2.d. Rewrite aggregates
ImmutableMap.Builder<Symbol, Aggregation> aggregationsBuilder = ImmutableMap.builder();
boolean needPostProjectionCoercion = false;
for (FunctionCall aggregate : analysis.getAggregates(node)) {
Expression rewritten = argumentTranslations.rewrite(aggregate);
Symbol newSymbol = planSymbolAllocator.newSymbol(rewritten, analysis.getType(aggregate));
// Therefore we can end up with this implicit cast, and have to move it into a post-projection
if (rewritten instanceof Cast) {
rewritten = ((Cast) rewritten).getExpression();
needPostProjectionCoercion = true;
}
aggregationTranslations.put(aggregate, newSymbol);
FunctionCall functionCall = (FunctionCall) rewritten;
aggregationsBuilder.put(newSymbol, new Aggregation(call(aggregate.getName().getSuffix(), analysis.getFunctionHandle(aggregate), analysis.getType(aggregate), functionCall.getArguments().stream().map(OriginalExpressionUtils::castToRowExpression).collect(toImmutableList())), functionCall.getArguments().stream().map(OriginalExpressionUtils::castToRowExpression).collect(toImmutableList()), functionCall.isDistinct(), functionCall.getFilter().map(SymbolUtils::from), functionCall.getOrderBy().map(OrderingSchemeUtils::fromOrderBy), Optional.empty()));
}
Map<Symbol, Aggregation> aggregations = aggregationsBuilder.build();
ImmutableSet.Builder<Integer> globalGroupingSets = ImmutableSet.builder();
for (int i = 0; i < groupingSets.size(); i++) {
if (groupingSets.get(i).isEmpty()) {
globalGroupingSets.add(i);
}
}
ImmutableList.Builder<Symbol> groupingKeys = ImmutableList.builder();
groupingSets.stream().flatMap(List::stream).distinct().forEach(groupingKeys::add);
groupIdSymbol.ifPresent(groupingKeys::add);
AggregationNode aggregationNode = new AggregationNode(idAllocator.getNextId(), subPlan.getRoot(), aggregations, groupingSets(groupingKeys.build(), groupingSets.size(), globalGroupingSets.build()), ImmutableList.of(), AggregationNode.Step.SINGLE, Optional.empty(), groupIdSymbol, AggregationNode.AggregationType.HASH, Optional.empty());
subPlan = new PlanBuilder(aggregationTranslations, aggregationNode);
// TODO: this is a hack, we should change type coercions to coerce the inputs to functions/operators instead of coercing the output
if (needPostProjectionCoercion) {
ImmutableList.Builder<Expression> alreadyCoerced = ImmutableList.builder();
alreadyCoerced.addAll(groupByExpressions);
groupIdSymbol.map(SymbolUtils::toSymbolReference).ifPresent(alreadyCoerced::add);
subPlan = explicitCoercionFields(subPlan, alreadyCoerced.build(), analysis.getAggregates(node));
}
// 4. Project and re-write all grouping functions
return handleGroupingOperations(subPlan, node, groupIdSymbol, columnOnlyGroupingSets);
}
use of io.prestosql.spi.plan.AggregationNode.Aggregation in project hetu-core by openlookeng.
the class ImplementFilteredAggregations method apply.
@Override
public Result apply(AggregationNode aggregationNode, Captures captures, Context context) {
Assignments.Builder newAssignments = Assignments.builder();
ImmutableMap.Builder<Symbol, Aggregation> aggregations = ImmutableMap.builder();
ImmutableList.Builder<Expression> maskSymbols = ImmutableList.builder();
boolean aggregateWithoutFilterPresent = false;
for (Map.Entry<Symbol, Aggregation> entry : aggregationNode.getAggregations().entrySet()) {
Symbol output = entry.getKey();
// strip the filters
Aggregation aggregation = entry.getValue();
Optional<Symbol> mask = aggregation.getMask();
if (aggregation.getFilter().isPresent()) {
Symbol filter = aggregation.getFilter().get();
Symbol symbol = context.getSymbolAllocator().newSymbol(filter.getName(), BOOLEAN);
verify(!mask.isPresent(), "Expected aggregation without mask symbols, see Rule pattern");
newAssignments.put(symbol, castToRowExpression(toSymbolReference(filter)));
mask = Optional.of(symbol);
maskSymbols.add(toSymbolReference(symbol));
} else {
aggregateWithoutFilterPresent = true;
}
aggregations.put(output, new Aggregation(aggregation.getFunctionCall(), aggregation.getArguments(), aggregation.isDistinct(), Optional.empty(), aggregation.getOrderingScheme(), mask));
}
Expression predicate = TRUE_LITERAL;
if (!aggregationNode.hasNonEmptyGroupingSet() && !aggregateWithoutFilterPresent) {
predicate = combineDisjunctsWithDefault(maskSymbols.build(), TRUE_LITERAL);
}
// identity projection for all existing inputs
newAssignments.putAll(AssignmentUtils.identityAsSymbolReferences(aggregationNode.getSource().getOutputSymbols()));
return Result.ofPlanNode(new AggregationNode(context.getIdAllocator().getNextId(), new FilterNode(context.getIdAllocator().getNextId(), new ProjectNode(context.getIdAllocator().getNextId(), aggregationNode.getSource(), newAssignments.build()), castToRowExpression(predicate)), aggregations.build(), aggregationNode.getGroupingSets(), ImmutableList.of(), aggregationNode.getStep(), aggregationNode.getHashSymbol(), aggregationNode.getGroupIdSymbol(), aggregationNode.getAggregationType(), aggregationNode.getFinalizeSymbol()));
}
use of io.prestosql.spi.plan.AggregationNode.Aggregation in project hetu-core by openlookeng.
the class SingleDistinctAggregationToGroupBy method apply.
@Override
public Result apply(AggregationNode aggregation, Captures captures, Context context) {
List<Set<Expression>> argumentSets = extractArgumentSets(aggregation).collect(Collectors.toList());
Set<Symbol> symbols = Iterables.getOnlyElement(argumentSets).stream().map(SymbolUtils::from).collect(Collectors.toSet());
return Result.ofPlanNode(new AggregationNode(aggregation.getId(), new AggregationNode(context.getIdAllocator().getNextId(), aggregation.getSource(), ImmutableMap.of(), singleGroupingSet(ImmutableList.<Symbol>builder().addAll(aggregation.getGroupingKeys()).addAll(symbols).build()), ImmutableList.of(), SINGLE, Optional.empty(), Optional.empty(), aggregation.getAggregationType(), aggregation.getFinalizeSymbol()), // remove DISTINCT flag from function calls
aggregation.getAggregations().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> removeDistinct(e.getValue()))), aggregation.getGroupingSets(), emptyList(), aggregation.getStep(), aggregation.getHashSymbol(), aggregation.getGroupIdSymbol(), aggregation.getAggregationType(), aggregation.getFinalizeSymbol()));
}
Aggregations