use of com.facebook.presto.spi.plan.MarkDistinctNode in project presto by prestodb.
the class TransformCorrelatedScalarSubquery method apply.
@Override
public Result apply(LateralJoinNode lateralJoinNode, Captures captures, Context context) {
PlanNode subquery = context.getLookup().resolve(lateralJoinNode.getSubquery());
if (!searchFrom(subquery, context.getLookup()).where(EnforceSingleRowNode.class::isInstance).recurseOnlyWhen(ProjectNode.class::isInstance).matches()) {
return Result.empty();
}
PlanNode rewrittenSubquery = searchFrom(subquery, context.getLookup()).where(EnforceSingleRowNode.class::isInstance).recurseOnlyWhen(ProjectNode.class::isInstance).removeFirst();
if (isAtMostScalar(rewrittenSubquery, context.getLookup())) {
return Result.ofPlanNode(new LateralJoinNode(lateralJoinNode.getSourceLocation(), context.getIdAllocator().getNextId(), lateralJoinNode.getInput(), rewrittenSubquery, lateralJoinNode.getCorrelation(), lateralJoinNode.getType(), lateralJoinNode.getOriginSubqueryError()));
}
VariableReferenceExpression unique = context.getVariableAllocator().newVariable("unique", BIGINT);
LateralJoinNode rewrittenLateralJoinNode = new LateralJoinNode(lateralJoinNode.getSourceLocation(), context.getIdAllocator().getNextId(), new AssignUniqueId(lateralJoinNode.getSourceLocation(), context.getIdAllocator().getNextId(), lateralJoinNode.getInput(), unique), rewrittenSubquery, lateralJoinNode.getCorrelation(), lateralJoinNode.getType(), lateralJoinNode.getOriginSubqueryError());
VariableReferenceExpression isDistinct = context.getVariableAllocator().newVariable("is_distinct", BooleanType.BOOLEAN);
MarkDistinctNode markDistinctNode = new MarkDistinctNode(rewrittenLateralJoinNode.getSourceLocation(), context.getIdAllocator().getNextId(), rewrittenLateralJoinNode, isDistinct, rewrittenLateralJoinNode.getInput().getOutputVariables(), Optional.empty());
FilterNode filterNode = new FilterNode(markDistinctNode.getSourceLocation(), context.getIdAllocator().getNextId(), markDistinctNode, castToRowExpression(new SimpleCaseExpression(createSymbolReference(isDistinct), ImmutableList.of(new WhenClause(TRUE_LITERAL, TRUE_LITERAL)), Optional.of(new Cast(new FunctionCall(QualifiedName.of("fail"), ImmutableList.of(new LongLiteral(Integer.toString(SUBQUERY_MULTIPLE_ROWS.toErrorCode().getCode())), new StringLiteral("Scalar sub-query has returned multiple rows"))), BOOLEAN)))));
return Result.ofPlanNode(new ProjectNode(context.getIdAllocator().getNextId(), filterNode, identityAssignmentsAsSymbolReferences(lateralJoinNode.getOutputVariables())));
}
use of com.facebook.presto.spi.plan.MarkDistinctNode in project presto by prestodb.
the class TestHiveIntegrationSmokeTest method testMaterializedPartitioning.
private void testMaterializedPartitioning(Session materializeExchangesSession) {
// Simple smoke tests for materialized partitioning
// Comprehensive testing is done by TestHiveDistributedAggregationsWithExchangeMaterialization, TestHiveDistributedQueriesWithExchangeMaterialization
// simple aggregation
assertQuery(materializeExchangesSession, "SELECT orderkey, COUNT(*) lines FROM lineitem GROUP BY orderkey", assertRemoteMaterializedExchangesCount(1));
// simple distinct
assertQuery(materializeExchangesSession, "SELECT distinct orderkey FROM lineitem", assertRemoteMaterializedExchangesCount(1));
// more complex aggregation
assertQuery(materializeExchangesSession, "SELECT custkey, orderstatus, COUNT(DISTINCT orderkey) FROM orders GROUP BY custkey, orderstatus", assertRemoteMaterializedExchangesCount(2));
// mark distinct
assertQuery(materializeExchangesSession, "SELECT custkey, COUNT(DISTINCT orderstatus), COUNT(DISTINCT orderkey) FROM orders GROUP BY custkey", assertRemoteMaterializedExchangesCount(3).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof MarkDistinctNode).matches())));
// join
assertQuery(materializeExchangesSession, "SELECT * FROM (lineitem JOIN orders ON lineitem.orderkey = orders.orderkey) x", assertRemoteMaterializedExchangesCount(2));
// 3-way join
try {
assertUpdate("CREATE TABLE test_orders_part1 AS SELECT orderkey, totalprice FROM orders", "SELECT count(*) FROM orders");
assertUpdate("CREATE TABLE test_orders_part2 AS SELECT orderkey, comment FROM orders", "SELECT count(*) FROM orders");
assertQuery(materializeExchangesSession, "SELECT lineitem.orderkey, lineitem.comment, test_orders_part1.totalprice, test_orders_part2.comment ordercomment\n" + "FROM lineitem JOIN test_orders_part1\n" + "ON lineitem.orderkey = test_orders_part1.orderkey\n" + "JOIN test_orders_part2\n" + "ON lineitem.orderkey = test_orders_part2.orderkey", "SELECT lineitem.orderkey, lineitem.comment, orders.totalprice, orders.comment ordercomment\n" + "FROM lineitem JOIN orders\n" + "ON lineitem.orderkey = orders.orderkey", assertRemoteMaterializedExchangesCount(3));
} finally {
assertUpdate("DROP TABLE IF EXISTS test_orders_part1");
assertUpdate("DROP TABLE IF EXISTS test_orders_part2");
}
try {
// join a bucketed table with an unbucketed table
assertUpdate(// bucket count has to be different from materialized bucket number
"CREATE TABLE test_bucketed_lineitem1\n" + "WITH (bucket_count = 17, bucketed_by = ARRAY['orderkey']) AS\n" + "SELECT * FROM lineitem", "SELECT count(*) from lineitem");
// bucketed table as probe side
assertQuery(materializeExchangesSession, "SELECT * FROM test_bucketed_lineitem1 JOIN orders ON test_bucketed_lineitem1.orderkey = orders.orderkey", "SELECT * FROM lineitem JOIN orders ON lineitem.orderkey = orders.orderkey", assertRemoteMaterializedExchangesCount(1));
// unbucketed table as probe side
assertQuery(materializeExchangesSession, "SELECT * FROM orders JOIN test_bucketed_lineitem1 ON test_bucketed_lineitem1.orderkey = orders.orderkey", "SELECT * FROM orders JOIN lineitem ON lineitem.orderkey = orders.orderkey", assertRemoteMaterializedExchangesCount(1));
// join a bucketed table with an unbucketed table; the join has constant pushdown
assertUpdate(// bucket count has to be different from materialized bucket number
"CREATE TABLE test_bucketed_lineitem2\n" + "WITH (bucket_count = 17, bucketed_by = ARRAY['partkey', 'suppkey']) AS\n" + "SELECT * FROM lineitem", "SELECT count(*) from lineitem");
// bucketed table as probe side
assertQuery(materializeExchangesSession, "SELECT * \n" + "FROM test_bucketed_lineitem2 JOIN partsupp\n" + "ON test_bucketed_lineitem2.partkey = partsupp.partkey AND\n" + "test_bucketed_lineitem2.suppkey = partsupp.suppkey\n" + "WHERE test_bucketed_lineitem2.suppkey = 42", "SELECT * \n" + "FROM lineitem JOIN partsupp\n" + "ON lineitem.partkey = partsupp.partkey AND\n" + "lineitem.suppkey = partsupp.suppkey\n" + "WHERE lineitem.suppkey = 42", assertRemoteMaterializedExchangesCount(1));
// unbucketed table as probe side
assertQuery(materializeExchangesSession, "SELECT * \n" + "FROM partsupp JOIN test_bucketed_lineitem2\n" + "ON test_bucketed_lineitem2.partkey = partsupp.partkey AND\n" + "test_bucketed_lineitem2.suppkey = partsupp.suppkey\n" + "WHERE test_bucketed_lineitem2.suppkey = 42", "SELECT * \n" + "FROM partsupp JOIN lineitem\n" + "ON lineitem.partkey = partsupp.partkey AND\n" + "lineitem.suppkey = partsupp.suppkey\n" + "WHERE lineitem.suppkey = 42", assertRemoteMaterializedExchangesCount(1));
} finally {
assertUpdate("DROP TABLE IF EXISTS test_bucketed_lineitem1");
assertUpdate("DROP TABLE IF EXISTS test_bucketed_lineitem2");
}
// Window functions
assertQuery(materializeExchangesSession, "SELECT sum(rn) FROM (SELECT row_number() OVER(PARTITION BY orderkey ORDER BY linenumber) as rn FROM lineitem) WHERE rn > 5", "SELECT 41137", assertRemoteMaterializedExchangesCount(1).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof WindowNode).matches())));
assertQuery(materializeExchangesSession, "SELECT sum(rn) FROM (SELECT row_number() OVER(PARTITION BY orderkey) as rn FROM lineitem)", "SELECT 180782", assertRemoteMaterializedExchangesCount(1).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof RowNumberNode).matches())));
assertQuery(materializeExchangesSession, "SELECT sum(rn) FROM (SELECT row_number() OVER(PARTITION BY orderkey ORDER BY linenumber) as rn FROM lineitem) WHERE rn < 5", "SELECT 107455", assertRemoteMaterializedExchangesCount(1).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof TopNRowNumberNode).matches())));
// union
assertQuery(materializeExchangesSession, "SELECT partkey, count(*), sum(cost) " + "FROM ( " + " SELECT partkey, CAST(extendedprice AS BIGINT) cost FROM lineitem " + " UNION ALL " + " SELECT partkey, CAST(supplycost AS BIGINT) cost FROM partsupp " + ") " + "GROUP BY partkey", assertRemoteMaterializedExchangesCount(2));
// union over aggregation + broadcast join
Session broadcastJoinMaterializeExchangesSession = Session.builder(materializeExchangesSession).setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()).build();
Session broadcastJoinStreamingExchangesSession = Session.builder(getSession()).setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()).build();
// compatible union partitioning
assertQuery(broadcastJoinMaterializeExchangesSession, "WITH union_of_aggregations as ( " + " SELECT " + " partkey, " + " count(*) AS value " + " FROM lineitem " + " GROUP BY " + " 1 " + " UNION ALL " + " SELECT " + " partkey, " + " sum(suppkey) AS value " + " FROM lineitem " + " GROUP BY " + " 1 " + ") " + "SELECT " + " sum(a.value + b.value) " + "FROM union_of_aggregations a, union_of_aggregations b " + "WHERE a.partkey = b.partkey ", "SELECT 12404708", assertRemoteExchangesCount(6).andThen(assertRemoteMaterializedExchangesCount(4)));
// incompatible union partitioning, requires an extra remote exchange for build and probe
String incompatiblePartitioningQuery = "WITH union_of_aggregations as ( " + " SELECT " + " partkey, " + " count(*) as value " + " FROM lineitem " + " GROUP BY " + " 1 " + " UNION ALL " + " SELECT " + " partkey, " + " suppkey as value " + " FROM lineitem " + " GROUP BY " + " 1, 2 " + ") " + "SELECT " + " sum(a.value + b.value) " + "FROM union_of_aggregations a, union_of_aggregations b " + "WHERE a.partkey = b.partkey ";
// system partitioning handle is always compatible
assertQuery(broadcastJoinStreamingExchangesSession, incompatiblePartitioningQuery, "SELECT 4639006", assertRemoteExchangesCount(6));
// hive partitioning handle is incompatible
assertQuery(broadcastJoinMaterializeExchangesSession, incompatiblePartitioningQuery, "SELECT 4639006", assertRemoteExchangesCount(8).andThen(assertRemoteMaterializedExchangesCount(4)));
}
use of com.facebook.presto.spi.plan.MarkDistinctNode in project presto by prestodb.
the class MultipleDistinctAggregationToMarkDistinct method apply.
@Override
public Result apply(AggregationNode parent, Captures captures, Context context) {
if (!SystemSessionProperties.useMarkDistinct(context.getSession())) {
return Result.empty();
}
// the distinct marker for the given set of input columns
Map<Set<VariableReferenceExpression>, VariableReferenceExpression> markers = new HashMap<>();
Map<VariableReferenceExpression, Aggregation> newAggregations = new HashMap<>();
PlanNode subPlan = parent.getSource();
for (Map.Entry<VariableReferenceExpression, Aggregation> entry : parent.getAggregations().entrySet()) {
Aggregation aggregation = entry.getValue();
if (aggregation.isDistinct() && !aggregation.getFilter().isPresent() && !aggregation.getMask().isPresent()) {
Set<VariableReferenceExpression> inputs = aggregation.getArguments().stream().map(OriginalExpressionUtils::castToExpression).map(context.getVariableAllocator()::toVariableReference).collect(toSet());
VariableReferenceExpression marker = markers.get(inputs);
if (marker == null) {
marker = context.getVariableAllocator().newVariable(Iterables.getLast(inputs).getName(), BOOLEAN, "distinct");
markers.put(inputs, marker);
ImmutableSet.Builder<VariableReferenceExpression> distinctVariables = ImmutableSet.<VariableReferenceExpression>builder().addAll(parent.getGroupingKeys()).addAll(inputs);
parent.getGroupIdVariable().ifPresent(distinctVariables::add);
subPlan = new MarkDistinctNode(subPlan.getSourceLocation(), context.getIdAllocator().getNextId(), subPlan, marker, ImmutableList.copyOf(distinctVariables.build()), Optional.empty());
}
// remove the distinct flag and set the distinct marker
newAggregations.put(entry.getKey(), new Aggregation(aggregation.getCall(), aggregation.getFilter(), aggregation.getOrderBy(), false, Optional.of(marker)));
} else {
newAggregations.put(entry.getKey(), aggregation);
}
}
return Result.ofPlanNode(new AggregationNode(parent.getSourceLocation(), parent.getId(), subPlan, newAggregations, parent.getGroupingSets(), ImmutableList.of(), parent.getStep(), parent.getHashVariable(), parent.getGroupIdVariable()));
}
use of com.facebook.presto.spi.plan.MarkDistinctNode in project presto by prestodb.
the class MarkDistinctMatcher method detailMatches.
@Override
public MatchResult detailMatches(PlanNode node, StatsProvider stats, Session session, Metadata metadata, SymbolAliases symbolAliases) {
checkState(shapeMatches(node), "Plan testing framework error: shapeMatches returned false in detailMatches in %s", this.getClass().getName());
MarkDistinctNode markDistinctNode = (MarkDistinctNode) node;
if (!markDistinctNode.getHashVariable().map(variable -> new Symbol(variable.getName())).equals(hashSymbol.map(alias -> alias.toSymbol(symbolAliases)))) {
return NO_MATCH;
}
if (!markDistinctNode.getDistinctVariables().stream().map(VariableReferenceExpression::getName).map(Symbol::new).collect(toImmutableSet()).equals(distinctSymbols.stream().map(alias -> alias.toSymbol(symbolAliases)).collect(toImmutableSet()))) {
return NO_MATCH;
}
return match(markerSymbol.toString(), createSymbolReference(markDistinctNode.getMarkerVariable()));
}
Aggregations