use of io.trino.sql.planner.Partitioning in project trino by trinodb.
the class PushPartialAggregationThroughExchange method apply.
@Override
public Result apply(AggregationNode aggregationNode, Captures captures, Context context) {
ExchangeNode exchangeNode = captures.get(EXCHANGE_NODE);
boolean decomposable = aggregationNode.isDecomposable(plannerContext.getMetadata());
if (aggregationNode.getStep() == SINGLE && aggregationNode.hasEmptyGroupingSet() && aggregationNode.hasNonEmptyGroupingSet() && exchangeNode.getType() == REPARTITION) {
// single-step aggregation w/ empty grouping sets in a partitioned stage, so we need a partial that will produce
// the default intermediates for the empty grouping set that will be routed to the appropriate final aggregation.
// TODO: technically, AddExchanges generates a broken plan that this rule "fixes"
checkState(decomposable, "Distributed aggregation with empty grouping set requires partial but functions are not decomposable");
return Result.ofPlanNode(split(aggregationNode, context));
}
if (!decomposable || !preferPartialAggregation(context.getSession())) {
return Result.empty();
}
// the cardinality of the stream (i.e., gather or repartition)
if ((exchangeNode.getType() != GATHER && exchangeNode.getType() != REPARTITION) || exchangeNode.getPartitioningScheme().isReplicateNullsAndAny()) {
return Result.empty();
}
if (exchangeNode.getType() == REPARTITION) {
// if partitioning columns are not a subset of grouping keys,
// we can't push this through
List<Symbol> partitioningColumns = exchangeNode.getPartitioningScheme().getPartitioning().getArguments().stream().filter(Partitioning.ArgumentBinding::isVariable).map(Partitioning.ArgumentBinding::getColumn).collect(Collectors.toList());
if (!aggregationNode.getGroupingKeys().containsAll(partitioningColumns)) {
return Result.empty();
}
}
// currently, we only support plans that don't use pre-computed hash functions
if (aggregationNode.getHashSymbol().isPresent() || exchangeNode.getPartitioningScheme().getHashColumn().isPresent()) {
return Result.empty();
}
switch(aggregationNode.getStep()) {
case SINGLE:
// Split it into a FINAL on top of a PARTIAL and
return Result.ofPlanNode(split(aggregationNode, context));
case PARTIAL:
// Push it underneath each branch of the exchange
return Result.ofPlanNode(pushPartial(aggregationNode, exchangeNode, context));
default:
return Result.empty();
}
}
use of io.trino.sql.planner.Partitioning in project trino by trinodb.
the class PlanPrinter method formatFragment.
private static String formatFragment(Function<TableScanNode, TableInfo> tableInfoSupplier, Map<DynamicFilterId, DynamicFilterDomainStats> dynamicFilterDomainStats, ValuePrinter valuePrinter, PlanFragment fragment, Optional<StageInfo> stageInfo, Optional<Map<PlanNodeId, PlanNodeStats>> planNodeStats, boolean verbose, TypeProvider typeProvider) {
StringBuilder builder = new StringBuilder();
builder.append(format("Fragment %s [%s]\n", fragment.getId(), fragment.getPartitioning()));
if (stageInfo.isPresent()) {
StageStats stageStats = stageInfo.get().getStageStats();
double avgPositionsPerTask = stageInfo.get().getTasks().stream().mapToLong(task -> task.getStats().getProcessedInputPositions()).average().orElse(Double.NaN);
double squaredDifferences = stageInfo.get().getTasks().stream().mapToDouble(task -> Math.pow(task.getStats().getProcessedInputPositions() - avgPositionsPerTask, 2)).sum();
double sdAmongTasks = Math.sqrt(squaredDifferences / stageInfo.get().getTasks().size());
builder.append(indentString(1)).append(format("CPU: %s, Scheduled: %s, Input: %s (%s); per task: avg.: %s std.dev.: %s, Output: %s (%s)\n", stageStats.getTotalCpuTime().convertToMostSuccinctTimeUnit(), stageStats.getTotalScheduledTime().convertToMostSuccinctTimeUnit(), formatPositions(stageStats.getProcessedInputPositions()), stageStats.getProcessedInputDataSize(), formatDouble(avgPositionsPerTask), formatDouble(sdAmongTasks), formatPositions(stageStats.getOutputPositions()), stageStats.getOutputDataSize()));
}
PartitioningScheme partitioningScheme = fragment.getPartitioningScheme();
builder.append(indentString(1)).append(format("Output layout: [%s]\n", Joiner.on(", ").join(partitioningScheme.getOutputLayout())));
boolean replicateNullsAndAny = partitioningScheme.isReplicateNullsAndAny();
List<String> arguments = partitioningScheme.getPartitioning().getArguments().stream().map(argument -> {
if (argument.isConstant()) {
NullableValue constant = argument.getConstant();
String printableValue = valuePrinter.castToVarchar(constant.getType(), constant.getValue());
return constant.getType().getDisplayName() + "(" + printableValue + ")";
}
return argument.getColumn().toString();
}).collect(toImmutableList());
builder.append(indentString(1));
if (replicateNullsAndAny) {
builder.append(format("Output partitioning: %s (replicate nulls and any) [%s]%s\n", partitioningScheme.getPartitioning().getHandle(), Joiner.on(", ").join(arguments), formatHash(partitioningScheme.getHashColumn())));
} else {
builder.append(format("Output partitioning: %s [%s]%s\n", partitioningScheme.getPartitioning().getHandle(), Joiner.on(", ").join(arguments), formatHash(partitioningScheme.getHashColumn())));
}
builder.append(indentString(1)).append(format("Stage Execution Strategy: %s\n", fragment.getStageExecutionDescriptor().getStageExecutionStrategy()));
builder.append(new PlanPrinter(fragment.getRoot(), typeProvider, Optional.of(fragment.getStageExecutionDescriptor()), tableInfoSupplier, dynamicFilterDomainStats, valuePrinter, fragment.getStatsAndCosts(), planNodeStats).toText(verbose, 1)).append("\n");
return builder.toString();
}
Aggregations