use of io.trino.sql.planner.plan.ExchangeNode in project trino by trinodb.
the class PushRemoteExchangeThroughAssignUniqueId method apply.
@Override
public Result apply(ExchangeNode node, Captures captures, Context context) {
checkArgument(node.getOrderingScheme().isEmpty(), "Merge exchange over AssignUniqueId not supported");
AssignUniqueId assignUniqueId = captures.get(ASSIGN_UNIQUE_ID);
PartitioningScheme partitioningScheme = node.getPartitioningScheme();
if (partitioningScheme.getPartitioning().getColumns().contains(assignUniqueId.getIdColumn())) {
// Hence, AssignUniqueId node has to stay below the exchange node.
return Result.empty();
}
return Result.ofPlanNode(new AssignUniqueId(assignUniqueId.getId(), new ExchangeNode(node.getId(), node.getType(), node.getScope(), new PartitioningScheme(partitioningScheme.getPartitioning(), removeSymbol(partitioningScheme.getOutputLayout(), assignUniqueId.getIdColumn()), partitioningScheme.getHashColumn(), partitioningScheme.isReplicateNullsAndAny(), partitioningScheme.getBucketToPartition()), ImmutableList.of(assignUniqueId.getSource()), ImmutableList.of(removeSymbol(getOnlyElement(node.getInputs()), assignUniqueId.getIdColumn())), Optional.empty()), assignUniqueId.getIdColumn()));
}
use of io.trino.sql.planner.plan.ExchangeNode in project trino by trinodb.
the class PushPartialAggregationThroughExchange method apply.
@Override
public Result apply(AggregationNode aggregationNode, Captures captures, Context context) {
ExchangeNode exchangeNode = captures.get(EXCHANGE_NODE);
boolean decomposable = aggregationNode.isDecomposable(plannerContext.getMetadata());
if (aggregationNode.getStep() == SINGLE && aggregationNode.hasEmptyGroupingSet() && aggregationNode.hasNonEmptyGroupingSet() && exchangeNode.getType() == REPARTITION) {
// single-step aggregation w/ empty grouping sets in a partitioned stage, so we need a partial that will produce
// the default intermediates for the empty grouping set that will be routed to the appropriate final aggregation.
// TODO: technically, AddExchanges generates a broken plan that this rule "fixes"
checkState(decomposable, "Distributed aggregation with empty grouping set requires partial but functions are not decomposable");
return Result.ofPlanNode(split(aggregationNode, context));
}
if (!decomposable || !preferPartialAggregation(context.getSession())) {
return Result.empty();
}
// the cardinality of the stream (i.e., gather or repartition)
if ((exchangeNode.getType() != GATHER && exchangeNode.getType() != REPARTITION) || exchangeNode.getPartitioningScheme().isReplicateNullsAndAny()) {
return Result.empty();
}
if (exchangeNode.getType() == REPARTITION) {
// if partitioning columns are not a subset of grouping keys,
// we can't push this through
List<Symbol> partitioningColumns = exchangeNode.getPartitioningScheme().getPartitioning().getArguments().stream().filter(Partitioning.ArgumentBinding::isVariable).map(Partitioning.ArgumentBinding::getColumn).collect(Collectors.toList());
if (!aggregationNode.getGroupingKeys().containsAll(partitioningColumns)) {
return Result.empty();
}
}
// currently, we only support plans that don't use pre-computed hash functions
if (aggregationNode.getHashSymbol().isPresent() || exchangeNode.getPartitioningScheme().getHashColumn().isPresent()) {
return Result.empty();
}
switch(aggregationNode.getStep()) {
case SINGLE:
// Split it into a FINAL on top of a PARTIAL and
return Result.ofPlanNode(split(aggregationNode, context));
case PARTIAL:
// Push it underneath each branch of the exchange
return Result.ofPlanNode(pushPartial(aggregationNode, exchangeNode, context));
default:
return Result.empty();
}
}
use of io.trino.sql.planner.plan.ExchangeNode in project trino by trinodb.
the class PushPartialAggregationThroughExchange method pushPartial.
private PlanNode pushPartial(AggregationNode aggregation, ExchangeNode exchange, Context context) {
List<PlanNode> partials = new ArrayList<>();
for (int i = 0; i < exchange.getSources().size(); i++) {
PlanNode source = exchange.getSources().get(i);
SymbolMapper.Builder mappingsBuilder = SymbolMapper.builder();
for (int outputIndex = 0; outputIndex < exchange.getOutputSymbols().size(); outputIndex++) {
Symbol output = exchange.getOutputSymbols().get(outputIndex);
Symbol input = exchange.getInputs().get(i).get(outputIndex);
if (!output.equals(input)) {
mappingsBuilder.put(output, input);
}
}
SymbolMapper symbolMapper = mappingsBuilder.build();
AggregationNode mappedPartial = symbolMapper.map(aggregation, source, context.getIdAllocator().getNextId());
Assignments.Builder assignments = Assignments.builder();
for (Symbol output : aggregation.getOutputSymbols()) {
Symbol input = symbolMapper.map(output);
assignments.put(output, input.toSymbolReference());
}
partials.add(new ProjectNode(context.getIdAllocator().getNextId(), mappedPartial, assignments.build()));
}
for (PlanNode node : partials) {
verify(aggregation.getOutputSymbols().equals(node.getOutputSymbols()));
}
// Since this exchange source is now guaranteed to have the same symbols as the inputs to the partial
// aggregation, we don't need to rewrite symbols in the partitioning function
PartitioningScheme partitioning = new PartitioningScheme(exchange.getPartitioningScheme().getPartitioning(), aggregation.getOutputSymbols(), exchange.getPartitioningScheme().getHashColumn(), exchange.getPartitioningScheme().isReplicateNullsAndAny(), exchange.getPartitioningScheme().getBucketToPartition());
return new ExchangeNode(context.getIdAllocator().getNextId(), exchange.getType(), exchange.getScope(), partitioning, partials, ImmutableList.copyOf(Collections.nCopies(partials.size(), aggregation.getOutputSymbols())), Optional.empty());
}
use of io.trino.sql.planner.plan.ExchangeNode in project trino by trinodb.
the class TestLogicalPlanner method testUsesDistributedJoinIfNaturallyPartitionedOnProbeSymbols.
@Test
public void testUsesDistributedJoinIfNaturallyPartitionedOnProbeSymbols() {
Session broadcastJoin = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.BROADCAST.name()).setSystemProperty(FORCE_SINGLE_NODE_OUTPUT, Boolean.toString(false)).setSystemProperty(OPTIMIZE_HASH_GENERATION, Boolean.toString(false)).build();
// replicated join with naturally partitioned and distributed probe side is rewritten to partitioned join
assertPlanWithSession("SELECT r1.regionkey FROM (SELECT regionkey FROM region GROUP BY regionkey) r1, region r2 WHERE r2.regionkey = r1.regionkey", broadcastJoin, false, anyTree(join(INNER, ImmutableList.of(equiJoinClause("LEFT_REGIONKEY", "RIGHT_REGIONKEY")), Optional.empty(), Optional.of(PARTITIONED), // the only remote exchange in probe side should be below aggregation
aggregation(ImmutableMap.of(), anyTree(exchange(REMOTE, REPARTITION, anyTree(tableScan("region", ImmutableMap.of("LEFT_REGIONKEY", "regionkey")))))), anyTree(exchange(REMOTE, REPARTITION, tableScan("region", ImmutableMap.of("RIGHT_REGIONKEY", "regionkey")))))), // make sure there are only two remote exchanges (one in probe and one in build side)
plan -> assertEquals(countOfMatchingNodes(plan, node -> node instanceof ExchangeNode && ((ExchangeNode) node).getScope() == REMOTE), 2));
// replicated join is preserved if probe side is single node
assertPlanWithSession("SELECT * FROM (VALUES 1, 2, 3) t(a), region r WHERE r.regionkey = t.a", broadcastJoin, false, anyTree(node(JoinNode.class, anyTree(node(ValuesNode.class)), anyTree(exchange(REMOTE, GATHER, node(TableScanNode.class))))));
// replicated join is preserved if there are no equality criteria
assertPlanWithSession("SELECT * FROM (SELECT regionkey FROM region GROUP BY regionkey) r1, region r2 WHERE r2.regionkey > r1.regionkey", broadcastJoin, false, anyTree(join(INNER, ImmutableList.of(), Optional.empty(), Optional.of(REPLICATED), anyTree(node(TableScanNode.class)), anyTree(exchange(REMOTE, REPLICATE, node(TableScanNode.class))))));
}
Aggregations