use of com.facebook.presto.spi.plan.AggregationNode.Aggregation in project presto by prestodb.
the class QueryPlanner method aggregate.
private PlanBuilder aggregate(PlanBuilder subPlan, QuerySpecification node) {
if (!analysis.isAggregation(node)) {
return subPlan;
}
// 1. Pre-project all scalar inputs (arguments and non-trivial group by expressions)
Set<Expression> groupByExpressions = ImmutableSet.copyOf(analysis.getGroupByExpressions(node));
ImmutableList.Builder<Expression> arguments = ImmutableList.builder();
analysis.getAggregates(node).stream().map(FunctionCall::getArguments).flatMap(List::stream).filter(// lambda expression is generated at execution time
exp -> !(exp instanceof LambdaExpression)).forEach(arguments::add);
analysis.getAggregates(node).stream().map(FunctionCall::getOrderBy).filter(Optional::isPresent).map(Optional::get).map(OrderBy::getSortItems).flatMap(List::stream).map(SortItem::getSortKey).forEach(arguments::add);
// filter expressions need to be projected first
analysis.getAggregates(node).stream().map(FunctionCall::getFilter).filter(Optional::isPresent).map(Optional::get).forEach(arguments::add);
Iterable<Expression> inputs = Iterables.concat(groupByExpressions, arguments.build());
subPlan = handleSubqueries(subPlan, node, inputs);
if (!Iterables.isEmpty(inputs)) {
// avoid an empty projection if the only aggregation is COUNT (which has no arguments)
subPlan = project(subPlan, inputs);
}
// 2. Aggregate
// 2.a. Rewrite aggregate arguments
TranslationMap argumentTranslations = new TranslationMap(subPlan.getRelationPlan(), analysis, lambdaDeclarationToVariableMap);
ImmutableList.Builder<VariableReferenceExpression> aggregationArgumentsBuilder = ImmutableList.builder();
for (Expression argument : arguments.build()) {
VariableReferenceExpression variable = subPlan.translate(argument);
argumentTranslations.put(argument, variable);
aggregationArgumentsBuilder.add(variable);
}
List<VariableReferenceExpression> aggregationArguments = aggregationArgumentsBuilder.build();
// 2.b. Rewrite grouping columns
TranslationMap groupingTranslations = new TranslationMap(subPlan.getRelationPlan(), analysis, lambdaDeclarationToVariableMap);
Map<VariableReferenceExpression, VariableReferenceExpression> groupingSetMappings = new LinkedHashMap<>();
for (Expression expression : groupByExpressions) {
VariableReferenceExpression input = subPlan.translate(expression);
VariableReferenceExpression output = variableAllocator.newVariable(expression, analysis.getTypeWithCoercions(expression), "gid");
groupingTranslations.put(expression, output);
groupingSetMappings.put(output, input);
}
// This tracks the grouping sets before complex expressions are considered (see comments below)
// It's also used to compute the descriptors needed to implement grouping()
List<Set<FieldId>> columnOnlyGroupingSets = ImmutableList.of(ImmutableSet.of());
List<List<VariableReferenceExpression>> groupingSets = ImmutableList.of(ImmutableList.of());
if (node.getGroupBy().isPresent()) {
// For the purpose of "distinct", we need to canonicalize column references that may have varying
// syntactic forms (e.g., "t.a" vs "a"). Thus we need to enumerate grouping sets based on the underlying
// fieldId associated with each column reference expression.
// The catch is that simple group-by expressions can be arbitrary expressions (this is a departure from the SQL specification).
// But, they don't affect the number of grouping sets or the behavior of "distinct" . We can compute all the candidate
// grouping sets in terms of fieldId, dedup as appropriate and then cross-join them with the complex expressions.
Analysis.GroupingSetAnalysis groupingSetAnalysis = analysis.getGroupingSets(node);
columnOnlyGroupingSets = enumerateGroupingSets(groupingSetAnalysis);
if (node.getGroupBy().get().isDistinct()) {
columnOnlyGroupingSets = columnOnlyGroupingSets.stream().distinct().collect(toImmutableList());
}
// add in the complex expressions an turn materialize the grouping sets in terms of plan columns
ImmutableList.Builder<List<VariableReferenceExpression>> groupingSetBuilder = ImmutableList.builder();
for (Set<FieldId> groupingSet : columnOnlyGroupingSets) {
ImmutableList.Builder<VariableReferenceExpression> columns = ImmutableList.builder();
groupingSetAnalysis.getComplexExpressions().stream().map(groupingTranslations::get).forEach(columns::add);
groupingSet.stream().map(field -> groupingTranslations.get(new FieldReference(field.getFieldIndex()))).forEach(columns::add);
groupingSetBuilder.add(columns.build());
}
groupingSets = groupingSetBuilder.build();
}
// 2.c. Generate GroupIdNode (multiple grouping sets) or ProjectNode (single grouping set)
Optional<VariableReferenceExpression> groupIdVariable = Optional.empty();
if (groupingSets.size() > 1) {
groupIdVariable = Optional.of(variableAllocator.newVariable("groupId", BIGINT));
GroupIdNode groupId = new GroupIdNode(subPlan.getRoot().getSourceLocation(), idAllocator.getNextId(), subPlan.getRoot(), groupingSets, groupingSetMappings, aggregationArguments, groupIdVariable.get());
subPlan = new PlanBuilder(groupingTranslations, groupId);
} else {
Assignments.Builder assignments = Assignments.builder();
aggregationArguments.stream().map(AssignmentUtils::identityAsSymbolReference).forEach(assignments::put);
groupingSetMappings.forEach((key, value) -> assignments.put(key, castToRowExpression(asSymbolReference(value))));
ProjectNode project = new ProjectNode(subPlan.getRoot().getSourceLocation(), idAllocator.getNextId(), subPlan.getRoot(), assignments.build(), LOCAL);
subPlan = new PlanBuilder(groupingTranslations, project);
}
TranslationMap aggregationTranslations = new TranslationMap(subPlan.getRelationPlan(), analysis, lambdaDeclarationToVariableMap);
aggregationTranslations.copyMappingsFrom(groupingTranslations);
// 2.d. Rewrite aggregates
ImmutableMap.Builder<VariableReferenceExpression, Aggregation> aggregationsBuilder = ImmutableMap.builder();
boolean needPostProjectionCoercion = false;
for (FunctionCall aggregate : analysis.getAggregates(node)) {
Expression rewritten = argumentTranslations.rewrite(aggregate);
VariableReferenceExpression newVariable = variableAllocator.newVariable(rewritten, analysis.getType(aggregate));
// Therefore we can end up with this implicit cast, and have to move it into a post-projection
if (rewritten instanceof Cast) {
rewritten = ((Cast) rewritten).getExpression();
needPostProjectionCoercion = true;
}
aggregationTranslations.put(aggregate, newVariable);
FunctionCall rewrittenFunction = (FunctionCall) rewritten;
aggregationsBuilder.put(newVariable, new Aggregation(new CallExpression(getSourceLocation(rewrittenFunction), aggregate.getName().getSuffix(), analysis.getFunctionHandle(aggregate), analysis.getType(aggregate), rewrittenFunction.getArguments().stream().map(OriginalExpressionUtils::castToRowExpression).collect(toImmutableList())), rewrittenFunction.getFilter().map(OriginalExpressionUtils::castToRowExpression), rewrittenFunction.getOrderBy().map(orderBy -> toOrderingScheme(orderBy, variableAllocator.getTypes())), rewrittenFunction.isDistinct(), Optional.empty()));
}
Map<VariableReferenceExpression, Aggregation> aggregations = aggregationsBuilder.build();
ImmutableSet.Builder<Integer> globalGroupingSets = ImmutableSet.builder();
for (int i = 0; i < groupingSets.size(); i++) {
if (groupingSets.get(i).isEmpty()) {
globalGroupingSets.add(i);
}
}
ImmutableList.Builder<VariableReferenceExpression> groupingKeys = ImmutableList.builder();
groupingSets.stream().flatMap(List::stream).distinct().forEach(groupingKeys::add);
groupIdVariable.ifPresent(groupingKeys::add);
AggregationNode aggregationNode = new AggregationNode(subPlan.getRoot().getSourceLocation(), idAllocator.getNextId(), subPlan.getRoot(), aggregations, groupingSets(groupingKeys.build(), groupingSets.size(), globalGroupingSets.build()), ImmutableList.of(), AggregationNode.Step.SINGLE, Optional.empty(), groupIdVariable);
subPlan = new PlanBuilder(aggregationTranslations, aggregationNode);
// TODO: this is a hack, we should change type coercions to coerce the inputs to functions/operators instead of coercing the output
if (needPostProjectionCoercion) {
ImmutableList.Builder<Expression> alreadyCoerced = ImmutableList.builder();
alreadyCoerced.addAll(groupByExpressions);
groupIdVariable.map(ExpressionTreeUtils::createSymbolReference).ifPresent(alreadyCoerced::add);
subPlan = explicitCoercionFields(subPlan, alreadyCoerced.build(), analysis.getAggregates(node));
}
// 4. Project and re-write all grouping functions
return handleGroupingOperations(subPlan, node, groupIdVariable, columnOnlyGroupingSets);
}
use of com.facebook.presto.spi.plan.AggregationNode.Aggregation in project presto by prestodb.
the class CanonicalPlanGenerator method visitAggregation.
@Override
public Optional<PlanNode> visitAggregation(AggregationNode node, Map<VariableReferenceExpression, VariableReferenceExpression> context) {
Optional<PlanNode> source = node.getSource().accept(this, context);
if (!source.isPresent()) {
return Optional.empty();
}
// Steps to get canonical aggregations:
// 1. Transform aggregation into canonical form
// 2. Sort based on canonical aggregation expression
// 3. Get new variable reference for aggregation expression
// 4. Record mapping from original variable reference to the new one
List<AggregationReference> aggregationReferences = node.getAggregations().entrySet().stream().map(entry -> new AggregationReference(getCanonicalAggregation(entry.getValue(), context), entry.getKey())).sorted(comparing(aggregationReference -> aggregationReference.getAggregation().getCall().toString())).collect(toImmutableList());
ImmutableMap.Builder<VariableReferenceExpression, Aggregation> aggregations = ImmutableMap.builder();
for (AggregationReference aggregationReference : aggregationReferences) {
VariableReferenceExpression reference = variableAllocator.newVariable(aggregationReference.getAggregation().getCall());
context.put(aggregationReference.getVariableReferenceExpression(), reference);
aggregations.put(reference, aggregationReference.getAggregation());
}
return Optional.of(new AggregationNode(node.getSourceLocation(), planNodeidAllocator.getNextId(), source.get(), aggregations.build(), getCanonicalGroupingSetDescriptor(node.getGroupingSets(), context), node.getPreGroupedVariables().stream().map(context::get).collect(toImmutableList()), node.getStep(), node.getHashVariable().map(ignored -> variableAllocator.newHashVariable()), node.getGroupIdVariable().map(context::get)));
}
use of com.facebook.presto.spi.plan.AggregationNode.Aggregation in project presto by prestodb.
the class ScalarAggregationToJoinRewriter method createAggregationNode.
private Optional<AggregationNode> createAggregationNode(AggregationNode scalarAggregation, JoinNode leftOuterJoin, VariableReferenceExpression nonNull) {
ImmutableMap.Builder<VariableReferenceExpression, Aggregation> aggregations = ImmutableMap.builder();
for (Map.Entry<VariableReferenceExpression, Aggregation> entry : scalarAggregation.getAggregations().entrySet()) {
VariableReferenceExpression variable = entry.getKey();
if (functionResolution.isCountFunction(entry.getValue().getFunctionHandle())) {
Type scalarAggregationSourceType = nonNull.getType();
aggregations.put(variable, new Aggregation(new CallExpression(variable.getSourceLocation(), "count", functionResolution.countFunction(scalarAggregationSourceType), BIGINT, ImmutableList.of(castToRowExpression(asSymbolReference(nonNull)))), Optional.empty(), Optional.empty(), false, entry.getValue().getMask()));
} else {
aggregations.put(variable, entry.getValue());
}
}
return Optional.of(new AggregationNode(scalarAggregation.getSourceLocation(), idAllocator.getNextId(), leftOuterJoin, aggregations.build(), singleGroupingSet(leftOuterJoin.getLeft().getOutputVariables()), ImmutableList.of(), scalarAggregation.getStep(), scalarAggregation.getHashVariable(), Optional.empty()));
}
use of com.facebook.presto.spi.plan.AggregationNode.Aggregation in project presto by prestodb.
the class StatisticAggregations method split.
private Parts split(PlanVariableAllocator variableAllocator, FunctionAndTypeManager functionAndTypeManager, boolean intermediate) {
ImmutableMap.Builder<VariableReferenceExpression, Aggregation> finalOrIntermediateAggregations = ImmutableMap.builder();
ImmutableMap.Builder<VariableReferenceExpression, Aggregation> partialAggregations = ImmutableMap.builder();
for (Map.Entry<VariableReferenceExpression, Aggregation> entry : aggregations.entrySet()) {
Aggregation originalAggregation = entry.getValue();
FunctionHandle functionHandle = originalAggregation.getFunctionHandle();
InternalAggregationFunction function = functionAndTypeManager.getAggregateFunctionImplementation(functionHandle);
// create partial aggregation
VariableReferenceExpression partialVariable = variableAllocator.newVariable(entry.getValue().getCall().getSourceLocation(), functionAndTypeManager.getFunctionMetadata(functionHandle).getName().getObjectName(), function.getIntermediateType());
partialAggregations.put(partialVariable, new Aggregation(new CallExpression(originalAggregation.getCall().getSourceLocation(), originalAggregation.getCall().getDisplayName(), functionHandle, function.getIntermediateType(), originalAggregation.getArguments()), originalAggregation.getFilter(), originalAggregation.getOrderBy(), originalAggregation.isDistinct(), originalAggregation.getMask()));
// create final aggregation
finalOrIntermediateAggregations.put(entry.getKey(), new Aggregation(new CallExpression(originalAggregation.getCall().getSourceLocation(), originalAggregation.getCall().getDisplayName(), functionHandle, intermediate ? function.getIntermediateType() : function.getFinalType(), ImmutableList.of(partialVariable)), Optional.empty(), Optional.empty(), false, Optional.empty()));
}
StatisticAggregations finalOrIntermediateAggregation = new StatisticAggregations(finalOrIntermediateAggregations.build(), groupingVariables);
return new Parts(intermediate ? Optional.empty() : Optional.of(finalOrIntermediateAggregation), intermediate ? Optional.of(finalOrIntermediateAggregation) : Optional.empty(), new StatisticAggregations(partialAggregations.build(), groupingVariables));
}
use of com.facebook.presto.spi.plan.AggregationNode.Aggregation in project presto by prestodb.
the class AggregatedParquetPageSource method getNextPage.
@Override
public Page getNextPage() {
if (completed) {
return null;
}
long start = System.nanoTime();
Block[] blocks = new Block[columnHandles.size()];
for (int fieldId = 0; fieldId < blocks.length; fieldId++) {
HiveColumnHandle columnHandle = columnHandles.get(fieldId);
Aggregation aggregation = columnHandle.getPartialAggregation().get();
Type type = typeManager.getType(columnHandle.getTypeSignature());
BlockBuilder blockBuilder = type.createBlockBuilder(null, batchSize, 0);
int columnIndex = columnHandle.getHiveColumnIndex();
FunctionHandle functionHandle = aggregation.getFunctionHandle();
if (functionResolution.isCountFunction(functionHandle)) {
long rowCount = getRowCountFromParquetMetadata(parquetMetadata);
if (!aggregation.getArguments().isEmpty()) {
rowCount -= getNumNulls(parquetMetadata, columnIndex);
}
blockBuilder = blockBuilder.writeLong(rowCount);
} else if (functionResolution.isMaxFunction(functionHandle)) {
writeMinMax(parquetMetadata, columnIndex, blockBuilder, type, columnHandle.getHiveType(), false);
} else if (functionResolution.isMinFunction(functionHandle)) {
writeMinMax(parquetMetadata, columnIndex, blockBuilder, type, columnHandle.getHiveType(), true);
} else {
throw new UnsupportedOperationException(aggregation.getFunctionHandle().toString() + " is not supported");
}
blocks[fieldId] = blockBuilder.build();
}
completed = true;
readTimeNanos += System.nanoTime() - start;
return new Page(batchSize, blocks);
}
Aggregations