Search in sources :

Example 1 with Partitioning

use of io.trino.sql.planner.Partitioning in project trino by trinodb.

the class PushPartialAggregationThroughExchange method apply.

@Override
public Result apply(AggregationNode aggregationNode, Captures captures, Context context) {
    ExchangeNode exchangeNode = captures.get(EXCHANGE_NODE);
    boolean decomposable = aggregationNode.isDecomposable(plannerContext.getMetadata());
    if (aggregationNode.getStep() == SINGLE && aggregationNode.hasEmptyGroupingSet() && aggregationNode.hasNonEmptyGroupingSet() && exchangeNode.getType() == REPARTITION) {
        // single-step aggregation w/ empty grouping sets in a partitioned stage, so we need a partial that will produce
        // the default intermediates for the empty grouping set that will be routed to the appropriate final aggregation.
        // TODO: technically, AddExchanges generates a broken plan that this rule "fixes"
        checkState(decomposable, "Distributed aggregation with empty grouping set requires partial but functions are not decomposable");
        return Result.ofPlanNode(split(aggregationNode, context));
    }
    if (!decomposable || !preferPartialAggregation(context.getSession())) {
        return Result.empty();
    }
    // the cardinality of the stream (i.e., gather or repartition)
    if ((exchangeNode.getType() != GATHER && exchangeNode.getType() != REPARTITION) || exchangeNode.getPartitioningScheme().isReplicateNullsAndAny()) {
        return Result.empty();
    }
    if (exchangeNode.getType() == REPARTITION) {
        // if partitioning columns are not a subset of grouping keys,
        // we can't push this through
        List<Symbol> partitioningColumns = exchangeNode.getPartitioningScheme().getPartitioning().getArguments().stream().filter(Partitioning.ArgumentBinding::isVariable).map(Partitioning.ArgumentBinding::getColumn).collect(Collectors.toList());
        if (!aggregationNode.getGroupingKeys().containsAll(partitioningColumns)) {
            return Result.empty();
        }
    }
    // currently, we only support plans that don't use pre-computed hash functions
    if (aggregationNode.getHashSymbol().isPresent() || exchangeNode.getPartitioningScheme().getHashColumn().isPresent()) {
        return Result.empty();
    }
    switch(aggregationNode.getStep()) {
        case SINGLE:
            // Split it into a FINAL on top of a PARTIAL and
            return Result.ofPlanNode(split(aggregationNode, context));
        case PARTIAL:
            // Push it underneath each branch of the exchange
            return Result.ofPlanNode(pushPartial(aggregationNode, exchangeNode, context));
        default:
            return Result.empty();
    }
}
Also used : Partitioning(io.trino.sql.planner.Partitioning) ExchangeNode(io.trino.sql.planner.plan.ExchangeNode) Symbol(io.trino.sql.planner.Symbol)

Example 2 with Partitioning

use of io.trino.sql.planner.Partitioning in project trino by trinodb.

the class PlanPrinter method formatFragment.

private static String formatFragment(Function<TableScanNode, TableInfo> tableInfoSupplier, Map<DynamicFilterId, DynamicFilterDomainStats> dynamicFilterDomainStats, ValuePrinter valuePrinter, PlanFragment fragment, Optional<StageInfo> stageInfo, Optional<Map<PlanNodeId, PlanNodeStats>> planNodeStats, boolean verbose, TypeProvider typeProvider) {
    StringBuilder builder = new StringBuilder();
    builder.append(format("Fragment %s [%s]\n", fragment.getId(), fragment.getPartitioning()));
    if (stageInfo.isPresent()) {
        StageStats stageStats = stageInfo.get().getStageStats();
        double avgPositionsPerTask = stageInfo.get().getTasks().stream().mapToLong(task -> task.getStats().getProcessedInputPositions()).average().orElse(Double.NaN);
        double squaredDifferences = stageInfo.get().getTasks().stream().mapToDouble(task -> Math.pow(task.getStats().getProcessedInputPositions() - avgPositionsPerTask, 2)).sum();
        double sdAmongTasks = Math.sqrt(squaredDifferences / stageInfo.get().getTasks().size());
        builder.append(indentString(1)).append(format("CPU: %s, Scheduled: %s, Input: %s (%s); per task: avg.: %s std.dev.: %s, Output: %s (%s)\n", stageStats.getTotalCpuTime().convertToMostSuccinctTimeUnit(), stageStats.getTotalScheduledTime().convertToMostSuccinctTimeUnit(), formatPositions(stageStats.getProcessedInputPositions()), stageStats.getProcessedInputDataSize(), formatDouble(avgPositionsPerTask), formatDouble(sdAmongTasks), formatPositions(stageStats.getOutputPositions()), stageStats.getOutputDataSize()));
    }
    PartitioningScheme partitioningScheme = fragment.getPartitioningScheme();
    builder.append(indentString(1)).append(format("Output layout: [%s]\n", Joiner.on(", ").join(partitioningScheme.getOutputLayout())));
    boolean replicateNullsAndAny = partitioningScheme.isReplicateNullsAndAny();
    List<String> arguments = partitioningScheme.getPartitioning().getArguments().stream().map(argument -> {
        if (argument.isConstant()) {
            NullableValue constant = argument.getConstant();
            String printableValue = valuePrinter.castToVarchar(constant.getType(), constant.getValue());
            return constant.getType().getDisplayName() + "(" + printableValue + ")";
        }
        return argument.getColumn().toString();
    }).collect(toImmutableList());
    builder.append(indentString(1));
    if (replicateNullsAndAny) {
        builder.append(format("Output partitioning: %s (replicate nulls and any) [%s]%s\n", partitioningScheme.getPartitioning().getHandle(), Joiner.on(", ").join(arguments), formatHash(partitioningScheme.getHashColumn())));
    } else {
        builder.append(format("Output partitioning: %s [%s]%s\n", partitioningScheme.getPartitioning().getHandle(), Joiner.on(", ").join(arguments), formatHash(partitioningScheme.getHashColumn())));
    }
    builder.append(indentString(1)).append(format("Stage Execution Strategy: %s\n", fragment.getStageExecutionDescriptor().getStageExecutionStrategy()));
    builder.append(new PlanPrinter(fragment.getRoot(), typeProvider, Optional.of(fragment.getStageExecutionDescriptor()), tableInfoSupplier, dynamicFilterDomainStats, valuePrinter, fragment.getStatsAndCosts(), planNodeStats).toText(verbose, 1)).append("\n");
    return builder.toString();
}
Also used : PlanFragment(io.trino.sql.planner.PlanFragment) Scope(io.trino.sql.planner.plan.ExchangeNode.Scope) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) AggregationValuePointer(io.trino.sql.planner.rowpattern.AggregationValuePointer) CorrelatedJoinNode(io.trino.sql.planner.plan.CorrelatedJoinNode) PlanNode(io.trino.sql.planner.plan.PlanNode) RemoteSourceNode(io.trino.sql.planner.plan.RemoteSourceNode) TypedSymbol(io.trino.sql.planner.planprinter.NodeRepresentation.TypedSymbol) DynamicFilters(io.trino.sql.DynamicFilters) PlanNodeId(io.trino.sql.planner.plan.PlanNodeId) Map(java.util.Map) TextRenderer.formatPositions(io.trino.sql.planner.planprinter.TextRenderer.formatPositions) OutputNode(io.trino.sql.planner.plan.OutputNode) TableScanNode(io.trino.sql.planner.plan.TableScanNode) PlanNodeStatsEstimate(io.trino.cost.PlanNodeStatsEstimate) ExplainAnalyzeNode(io.trino.sql.planner.plan.ExplainAnalyzeNode) Range(io.trino.spi.predicate.Range) Domain(io.trino.spi.predicate.Domain) StatisticsWriterNode(io.trino.sql.planner.plan.StatisticsWriterNode) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Assignments(io.trino.sql.planner.plan.Assignments) Set(java.util.Set) SemiJoinNode(io.trino.sql.planner.plan.SemiJoinNode) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) Collectors.joining(java.util.stream.Collectors.joining) IntersectNode(io.trino.sql.planner.plan.IntersectNode) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) SubPlan(io.trino.sql.planner.SubPlan) SymbolReference(io.trino.sql.tree.SymbolReference) TopNRankingNode(io.trino.sql.planner.plan.TopNRankingNode) PlanFragmentId(io.trino.sql.planner.plan.PlanFragmentId) ValuesNode(io.trino.sql.planner.plan.ValuesNode) Position(io.trino.sql.tree.SkipTo.Position) Joiner(com.google.common.base.Joiner) Session(io.trino.Session) TableExecuteNode(io.trino.sql.planner.plan.TableExecuteNode) NullableValue(io.trino.spi.predicate.NullableValue) LimitNode(io.trino.sql.planner.plan.LimitNode) StageStats(io.trino.execution.StageStats) PlanCostEstimate(io.trino.cost.PlanCostEstimate) StatsAndCosts(io.trino.cost.StatsAndCosts) DynamicFilterId(io.trino.sql.planner.plan.DynamicFilterId) ArrayList(java.util.ArrayList) RowNumberNode(io.trino.sql.planner.plan.RowNumberNode) ColumnHandle(io.trino.spi.connector.ColumnHandle) AggregationNode(io.trino.sql.planner.plan.AggregationNode) StageExecutionDescriptor.ungroupedExecution(io.trino.operator.StageExecutionDescriptor.ungroupedExecution) TextRenderer.formatDouble(io.trino.sql.planner.planprinter.TextRenderer.formatDouble) StageInfo.getAllStages(io.trino.execution.StageInfo.getAllStages) StageExecutionDescriptor(io.trino.operator.StageExecutionDescriptor) IrLabel(io.trino.sql.planner.rowpattern.ir.IrLabel) UnnestNode(io.trino.sql.planner.plan.UnnestNode) TableHandle(io.trino.metadata.TableHandle) ExceptNode(io.trino.sql.planner.plan.ExceptNode) GroupIdNode(io.trino.sql.planner.plan.GroupIdNode) OffsetNode(io.trino.sql.planner.plan.OffsetNode) TableFinishNode(io.trino.sql.planner.plan.TableFinishNode) SampleNode(io.trino.sql.planner.plan.SampleNode) IndexSourceNode(io.trino.sql.planner.plan.IndexSourceNode) GroupReference(io.trino.sql.planner.iterative.GroupReference) Aggregation(io.trino.sql.planner.plan.AggregationNode.Aggregation) TableWriterNode(io.trino.sql.planner.plan.TableWriterNode) DynamicFilters.extractDynamicFilters(io.trino.sql.DynamicFilters.extractDynamicFilters) DynamicFilterDomainStats(io.trino.server.DynamicFilterService.DynamicFilterDomainStats) Math.abs(java.lang.Math.abs) FilterNode(io.trino.sql.planner.plan.FilterNode) Duration(io.airlift.units.Duration) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) TableDeleteNode(io.trino.sql.planner.plan.TableDeleteNode) TextRenderer.indentString(io.trino.sql.planner.planprinter.TextRenderer.indentString) SpatialJoinNode(io.trino.sql.planner.plan.SpatialJoinNode) DeleteNode(io.trino.sql.planner.plan.DeleteNode) LogicalIndexPointer(io.trino.sql.planner.rowpattern.LogicalIndexPointer) SINGLE_DISTRIBUTION(io.trino.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION) WINDOW(io.trino.sql.tree.PatternRecognitionRelation.RowsPerMatch.WINDOW) JoinNode(io.trino.sql.planner.plan.JoinNode) AssignUniqueId(io.trino.sql.planner.plan.AssignUniqueId) FunctionCall(io.trino.sql.tree.FunctionCall) PlanNodeStatsSummarizer.aggregateStageStats(io.trino.sql.planner.planprinter.PlanNodeStatsSummarizer.aggregateStageStats) MarkDistinctNode(io.trino.sql.planner.plan.MarkDistinctNode) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) EnforceSingleRowNode(io.trino.sql.planner.plan.EnforceSingleRowNode) ExpressionTreeRewriter(io.trino.sql.tree.ExpressionTreeRewriter) Collection(java.util.Collection) IndexJoinNode(io.trino.sql.planner.plan.IndexJoinNode) Streams(com.google.common.collect.Streams) PatternRecognitionNode(io.trino.sql.planner.plan.PatternRecognitionNode) Collectors(java.util.stream.Collectors) ComparisonExpression(io.trino.sql.tree.ComparisonExpression) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) Objects(java.util.Objects) List(java.util.List) StatisticAggregationsDescriptor(io.trino.sql.planner.plan.StatisticAggregationsDescriptor) TableInfo(io.trino.execution.TableInfo) Entry(java.util.Map.Entry) Function.identity(java.util.function.Function.identity) ApplyNode(io.trino.sql.planner.plan.ApplyNode) Optional(java.util.Optional) ExchangeNode(io.trino.sql.planner.plan.ExchangeNode) Expression(io.trino.sql.tree.Expression) TableStatisticType(io.trino.spi.statistics.TableStatisticType) WindowNode(io.trino.sql.planner.plan.WindowNode) Arrays.stream(java.util.Arrays.stream) UnionNode(io.trino.sql.planner.plan.UnionNode) INNER(io.trino.sql.planner.plan.JoinNode.Type.INNER) Type(io.trino.spi.type.Type) Measure(io.trino.sql.planner.plan.PatternRecognitionNode.Measure) StatisticAggregations(io.trino.sql.planner.plan.StatisticAggregations) Partitioning(io.trino.sql.planner.Partitioning) PartitioningScheme(io.trino.sql.planner.PartitioningScheme) SortNode(io.trino.sql.planner.plan.SortNode) Function(java.util.function.Function) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) Objects.requireNonNull(java.util.Objects.requireNonNull) RefreshMaterializedViewNode(io.trino.sql.planner.plan.RefreshMaterializedViewNode) ExpressionRewriter(io.trino.sql.tree.ExpressionRewriter) LinkedList(java.util.LinkedList) ProjectNode(io.trino.sql.planner.plan.ProjectNode) ExpressionAndValuePointers(io.trino.sql.planner.rowpattern.LogicalIndexExtractor.ExpressionAndValuePointers) StageInfo(io.trino.execution.StageInfo) Symbol(io.trino.sql.planner.Symbol) ExpressionUtils.combineConjunctsWithDuplicates(io.trino.sql.ExpressionUtils.combineConjunctsWithDuplicates) RowsPerMatch(io.trino.sql.tree.PatternRecognitionRelation.RowsPerMatch) CaseFormat(com.google.common.base.CaseFormat) PlanVisitor(io.trino.sql.planner.plan.PlanVisitor) TopNNode(io.trino.sql.planner.plan.TopNNode) ScalarValuePointer(io.trino.sql.planner.rowpattern.ScalarValuePointer) TRUE_LITERAL(io.trino.sql.tree.BooleanLiteral.TRUE_LITERAL) TupleDomain(io.trino.spi.predicate.TupleDomain) OrderingScheme(io.trino.sql.planner.OrderingScheme) PlanNodeStatsAndCostSummary(io.trino.cost.PlanNodeStatsAndCostSummary) FunctionManager(io.trino.metadata.FunctionManager) UpdateNode(io.trino.sql.planner.plan.UpdateNode) QualifiedName(io.trino.sql.tree.QualifiedName) UPPER_UNDERSCORE(com.google.common.base.CaseFormat.UPPER_UNDERSCORE) Collectors.toList(java.util.stream.Collectors.toList) QueryStats(io.trino.execution.QueryStats) DistinctLimitNode(io.trino.sql.planner.plan.DistinctLimitNode) ValuePointer(io.trino.sql.planner.rowpattern.ValuePointer) Row(io.trino.sql.tree.Row) Metadata(io.trino.metadata.Metadata) TypeProvider(io.trino.sql.planner.TypeProvider) ResolvedFunction.extractFunctionName(io.trino.metadata.ResolvedFunction.extractFunctionName) StageStats(io.trino.execution.StageStats) PlanNodeStatsSummarizer.aggregateStageStats(io.trino.sql.planner.planprinter.PlanNodeStatsSummarizer.aggregateStageStats) PartitioningScheme(io.trino.sql.planner.PartitioningScheme) NullableValue(io.trino.spi.predicate.NullableValue) TextRenderer.indentString(io.trino.sql.planner.planprinter.TextRenderer.indentString)

Aggregations

Partitioning (io.trino.sql.planner.Partitioning)2 Symbol (io.trino.sql.planner.Symbol)2 ExchangeNode (io.trino.sql.planner.plan.ExchangeNode)2 CaseFormat (com.google.common.base.CaseFormat)1 UPPER_UNDERSCORE (com.google.common.base.CaseFormat.UPPER_UNDERSCORE)1 Joiner (com.google.common.base.Joiner)1 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Preconditions.checkState (com.google.common.base.Preconditions.checkState)1 Verify.verify (com.google.common.base.Verify.verify)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Streams (com.google.common.collect.Streams)1 Duration (io.airlift.units.Duration)1 Session (io.trino.Session)1 PlanCostEstimate (io.trino.cost.PlanCostEstimate)1 PlanNodeStatsAndCostSummary (io.trino.cost.PlanNodeStatsAndCostSummary)1 PlanNodeStatsEstimate (io.trino.cost.PlanNodeStatsEstimate)1