use of com.facebook.presto.sql.analyzer.FeaturesConfig.JoinDistributionType.BROADCAST in project presto by prestodb.
the class TestHiveIntegrationSmokeTest method testMaterializedPartitioning.
private void testMaterializedPartitioning(Session materializeExchangesSession) {
// Simple smoke tests for materialized partitioning
// Comprehensive testing is done by TestHiveDistributedAggregationsWithExchangeMaterialization, TestHiveDistributedQueriesWithExchangeMaterialization
// simple aggregation
assertQuery(materializeExchangesSession, "SELECT orderkey, COUNT(*) lines FROM lineitem GROUP BY orderkey", assertRemoteMaterializedExchangesCount(1));
// simple distinct
assertQuery(materializeExchangesSession, "SELECT distinct orderkey FROM lineitem", assertRemoteMaterializedExchangesCount(1));
// more complex aggregation
assertQuery(materializeExchangesSession, "SELECT custkey, orderstatus, COUNT(DISTINCT orderkey) FROM orders GROUP BY custkey, orderstatus", assertRemoteMaterializedExchangesCount(2));
// mark distinct
assertQuery(materializeExchangesSession, "SELECT custkey, COUNT(DISTINCT orderstatus), COUNT(DISTINCT orderkey) FROM orders GROUP BY custkey", assertRemoteMaterializedExchangesCount(3).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof MarkDistinctNode).matches())));
// join
assertQuery(materializeExchangesSession, "SELECT * FROM (lineitem JOIN orders ON lineitem.orderkey = orders.orderkey) x", assertRemoteMaterializedExchangesCount(2));
// 3-way join
try {
assertUpdate("CREATE TABLE test_orders_part1 AS SELECT orderkey, totalprice FROM orders", "SELECT count(*) FROM orders");
assertUpdate("CREATE TABLE test_orders_part2 AS SELECT orderkey, comment FROM orders", "SELECT count(*) FROM orders");
assertQuery(materializeExchangesSession, "SELECT lineitem.orderkey, lineitem.comment, test_orders_part1.totalprice, test_orders_part2.comment ordercomment\n" + "FROM lineitem JOIN test_orders_part1\n" + "ON lineitem.orderkey = test_orders_part1.orderkey\n" + "JOIN test_orders_part2\n" + "ON lineitem.orderkey = test_orders_part2.orderkey", "SELECT lineitem.orderkey, lineitem.comment, orders.totalprice, orders.comment ordercomment\n" + "FROM lineitem JOIN orders\n" + "ON lineitem.orderkey = orders.orderkey", assertRemoteMaterializedExchangesCount(3));
} finally {
assertUpdate("DROP TABLE IF EXISTS test_orders_part1");
assertUpdate("DROP TABLE IF EXISTS test_orders_part2");
}
try {
// join a bucketed table with an unbucketed table
assertUpdate(// bucket count has to be different from materialized bucket number
"CREATE TABLE test_bucketed_lineitem1\n" + "WITH (bucket_count = 17, bucketed_by = ARRAY['orderkey']) AS\n" + "SELECT * FROM lineitem", "SELECT count(*) from lineitem");
// bucketed table as probe side
assertQuery(materializeExchangesSession, "SELECT * FROM test_bucketed_lineitem1 JOIN orders ON test_bucketed_lineitem1.orderkey = orders.orderkey", "SELECT * FROM lineitem JOIN orders ON lineitem.orderkey = orders.orderkey", assertRemoteMaterializedExchangesCount(1));
// unbucketed table as probe side
assertQuery(materializeExchangesSession, "SELECT * FROM orders JOIN test_bucketed_lineitem1 ON test_bucketed_lineitem1.orderkey = orders.orderkey", "SELECT * FROM orders JOIN lineitem ON lineitem.orderkey = orders.orderkey", assertRemoteMaterializedExchangesCount(1));
// join a bucketed table with an unbucketed table; the join has constant pushdown
assertUpdate(// bucket count has to be different from materialized bucket number
"CREATE TABLE test_bucketed_lineitem2\n" + "WITH (bucket_count = 17, bucketed_by = ARRAY['partkey', 'suppkey']) AS\n" + "SELECT * FROM lineitem", "SELECT count(*) from lineitem");
// bucketed table as probe side
assertQuery(materializeExchangesSession, "SELECT * \n" + "FROM test_bucketed_lineitem2 JOIN partsupp\n" + "ON test_bucketed_lineitem2.partkey = partsupp.partkey AND\n" + "test_bucketed_lineitem2.suppkey = partsupp.suppkey\n" + "WHERE test_bucketed_lineitem2.suppkey = 42", "SELECT * \n" + "FROM lineitem JOIN partsupp\n" + "ON lineitem.partkey = partsupp.partkey AND\n" + "lineitem.suppkey = partsupp.suppkey\n" + "WHERE lineitem.suppkey = 42", assertRemoteMaterializedExchangesCount(1));
// unbucketed table as probe side
assertQuery(materializeExchangesSession, "SELECT * \n" + "FROM partsupp JOIN test_bucketed_lineitem2\n" + "ON test_bucketed_lineitem2.partkey = partsupp.partkey AND\n" + "test_bucketed_lineitem2.suppkey = partsupp.suppkey\n" + "WHERE test_bucketed_lineitem2.suppkey = 42", "SELECT * \n" + "FROM partsupp JOIN lineitem\n" + "ON lineitem.partkey = partsupp.partkey AND\n" + "lineitem.suppkey = partsupp.suppkey\n" + "WHERE lineitem.suppkey = 42", assertRemoteMaterializedExchangesCount(1));
} finally {
assertUpdate("DROP TABLE IF EXISTS test_bucketed_lineitem1");
assertUpdate("DROP TABLE IF EXISTS test_bucketed_lineitem2");
}
// Window functions
assertQuery(materializeExchangesSession, "SELECT sum(rn) FROM (SELECT row_number() OVER(PARTITION BY orderkey ORDER BY linenumber) as rn FROM lineitem) WHERE rn > 5", "SELECT 41137", assertRemoteMaterializedExchangesCount(1).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof WindowNode).matches())));
assertQuery(materializeExchangesSession, "SELECT sum(rn) FROM (SELECT row_number() OVER(PARTITION BY orderkey) as rn FROM lineitem)", "SELECT 180782", assertRemoteMaterializedExchangesCount(1).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof RowNumberNode).matches())));
assertQuery(materializeExchangesSession, "SELECT sum(rn) FROM (SELECT row_number() OVER(PARTITION BY orderkey ORDER BY linenumber) as rn FROM lineitem) WHERE rn < 5", "SELECT 107455", assertRemoteMaterializedExchangesCount(1).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof TopNRowNumberNode).matches())));
// union
assertQuery(materializeExchangesSession, "SELECT partkey, count(*), sum(cost) " + "FROM ( " + " SELECT partkey, CAST(extendedprice AS BIGINT) cost FROM lineitem " + " UNION ALL " + " SELECT partkey, CAST(supplycost AS BIGINT) cost FROM partsupp " + ") " + "GROUP BY partkey", assertRemoteMaterializedExchangesCount(2));
// union over aggregation + broadcast join
Session broadcastJoinMaterializeExchangesSession = Session.builder(materializeExchangesSession).setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()).build();
Session broadcastJoinStreamingExchangesSession = Session.builder(getSession()).setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()).build();
// compatible union partitioning
assertQuery(broadcastJoinMaterializeExchangesSession, "WITH union_of_aggregations as ( " + " SELECT " + " partkey, " + " count(*) AS value " + " FROM lineitem " + " GROUP BY " + " 1 " + " UNION ALL " + " SELECT " + " partkey, " + " sum(suppkey) AS value " + " FROM lineitem " + " GROUP BY " + " 1 " + ") " + "SELECT " + " sum(a.value + b.value) " + "FROM union_of_aggregations a, union_of_aggregations b " + "WHERE a.partkey = b.partkey ", "SELECT 12404708", assertRemoteExchangesCount(6).andThen(assertRemoteMaterializedExchangesCount(4)));
// incompatible union partitioning, requires an extra remote exchange for build and probe
String incompatiblePartitioningQuery = "WITH union_of_aggregations as ( " + " SELECT " + " partkey, " + " count(*) as value " + " FROM lineitem " + " GROUP BY " + " 1 " + " UNION ALL " + " SELECT " + " partkey, " + " suppkey as value " + " FROM lineitem " + " GROUP BY " + " 1, 2 " + ") " + "SELECT " + " sum(a.value + b.value) " + "FROM union_of_aggregations a, union_of_aggregations b " + "WHERE a.partkey = b.partkey ";
// system partitioning handle is always compatible
assertQuery(broadcastJoinStreamingExchangesSession, incompatiblePartitioningQuery, "SELECT 4639006", assertRemoteExchangesCount(6));
// hive partitioning handle is incompatible
assertQuery(broadcastJoinMaterializeExchangesSession, incompatiblePartitioningQuery, "SELECT 4639006", assertRemoteExchangesCount(8).andThen(assertRemoteMaterializedExchangesCount(4)));
}
Aggregations