use of com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE in project presto by prestodb.
the class TestLogicalPlanner method testBroadcastCorrelatedSubqueryAvoidsRemoteExchangeBeforeAggregation.
@Test
public void testBroadcastCorrelatedSubqueryAvoidsRemoteExchangeBeforeAggregation() {
Session broadcastJoin = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.BROADCAST.name()).setSystemProperty(FORCE_SINGLE_NODE_OUTPUT, Boolean.toString(false)).build();
// make sure there is a remote exchange on the build side
PlanMatchPattern joinBuildSideWithRemoteExchange = anyTree(node(JoinNode.class, anyTree(node(TableScanNode.class)), anyTree(exchange(REMOTE_STREAMING, ExchangeNode.Type.REPLICATE, anyTree(node(TableScanNode.class))))));
// validates that there exists only one remote exchange
Consumer<Plan> validateSingleRemoteExchange = plan -> assertEquals(countOfMatchingNodes(plan, node -> node instanceof ExchangeNode && ((ExchangeNode) node).getScope().isRemote()), 1);
Consumer<Plan> validateSingleStreamingAggregation = plan -> assertEquals(countOfMatchingNodes(plan, node -> node instanceof AggregationNode && ((AggregationNode) node).getGroupingKeys().contains(new VariableReferenceExpression(Optional.empty(), "unique", BIGINT)) && ((AggregationNode) node).isStreamable()), 1);
// region is unpartitioned, AssignUniqueId should provide satisfying partitioning for count(*) after LEFT JOIN
assertPlanWithSession("SELECT (SELECT COUNT(*) FROM region r2 WHERE r2.regionkey > r1.regionkey) FROM region r1", broadcastJoin, false, joinBuildSideWithRemoteExchange, validateSingleRemoteExchange.andThen(validateSingleStreamingAggregation));
// orders is naturally partitioned, AssignUniqueId should not overwrite its natural partitioning
assertPlanWithSession("SELECT COUNT(COUNT) " + "FROM (SELECT o1.orderkey orderkey, (SELECT COUNT(*) FROM orders o2 WHERE o2.orderkey > o1.orderkey) COUNT FROM orders o1) " + "GROUP BY orderkey", broadcastJoin, false, joinBuildSideWithRemoteExchange, validateSingleRemoteExchange.andThen(validateSingleStreamingAggregation));
}
use of com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE in project presto by prestodb.
the class TestHiveIntegrationSmokeTest method testMaterializedPartitioning.
private void testMaterializedPartitioning(Session materializeExchangesSession) {
// Simple smoke tests for materialized partitioning
// Comprehensive testing is done by TestHiveDistributedAggregationsWithExchangeMaterialization, TestHiveDistributedQueriesWithExchangeMaterialization
// simple aggregation
assertQuery(materializeExchangesSession, "SELECT orderkey, COUNT(*) lines FROM lineitem GROUP BY orderkey", assertRemoteMaterializedExchangesCount(1));
// simple distinct
assertQuery(materializeExchangesSession, "SELECT distinct orderkey FROM lineitem", assertRemoteMaterializedExchangesCount(1));
// more complex aggregation
assertQuery(materializeExchangesSession, "SELECT custkey, orderstatus, COUNT(DISTINCT orderkey) FROM orders GROUP BY custkey, orderstatus", assertRemoteMaterializedExchangesCount(2));
// mark distinct
assertQuery(materializeExchangesSession, "SELECT custkey, COUNT(DISTINCT orderstatus), COUNT(DISTINCT orderkey) FROM orders GROUP BY custkey", assertRemoteMaterializedExchangesCount(3).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof MarkDistinctNode).matches())));
// join
assertQuery(materializeExchangesSession, "SELECT * FROM (lineitem JOIN orders ON lineitem.orderkey = orders.orderkey) x", assertRemoteMaterializedExchangesCount(2));
// 3-way join
try {
assertUpdate("CREATE TABLE test_orders_part1 AS SELECT orderkey, totalprice FROM orders", "SELECT count(*) FROM orders");
assertUpdate("CREATE TABLE test_orders_part2 AS SELECT orderkey, comment FROM orders", "SELECT count(*) FROM orders");
assertQuery(materializeExchangesSession, "SELECT lineitem.orderkey, lineitem.comment, test_orders_part1.totalprice, test_orders_part2.comment ordercomment\n" + "FROM lineitem JOIN test_orders_part1\n" + "ON lineitem.orderkey = test_orders_part1.orderkey\n" + "JOIN test_orders_part2\n" + "ON lineitem.orderkey = test_orders_part2.orderkey", "SELECT lineitem.orderkey, lineitem.comment, orders.totalprice, orders.comment ordercomment\n" + "FROM lineitem JOIN orders\n" + "ON lineitem.orderkey = orders.orderkey", assertRemoteMaterializedExchangesCount(3));
} finally {
assertUpdate("DROP TABLE IF EXISTS test_orders_part1");
assertUpdate("DROP TABLE IF EXISTS test_orders_part2");
}
try {
// join a bucketed table with an unbucketed table
assertUpdate(// bucket count has to be different from materialized bucket number
"CREATE TABLE test_bucketed_lineitem1\n" + "WITH (bucket_count = 17, bucketed_by = ARRAY['orderkey']) AS\n" + "SELECT * FROM lineitem", "SELECT count(*) from lineitem");
// bucketed table as probe side
assertQuery(materializeExchangesSession, "SELECT * FROM test_bucketed_lineitem1 JOIN orders ON test_bucketed_lineitem1.orderkey = orders.orderkey", "SELECT * FROM lineitem JOIN orders ON lineitem.orderkey = orders.orderkey", assertRemoteMaterializedExchangesCount(1));
// unbucketed table as probe side
assertQuery(materializeExchangesSession, "SELECT * FROM orders JOIN test_bucketed_lineitem1 ON test_bucketed_lineitem1.orderkey = orders.orderkey", "SELECT * FROM orders JOIN lineitem ON lineitem.orderkey = orders.orderkey", assertRemoteMaterializedExchangesCount(1));
// join a bucketed table with an unbucketed table; the join has constant pushdown
assertUpdate(// bucket count has to be different from materialized bucket number
"CREATE TABLE test_bucketed_lineitem2\n" + "WITH (bucket_count = 17, bucketed_by = ARRAY['partkey', 'suppkey']) AS\n" + "SELECT * FROM lineitem", "SELECT count(*) from lineitem");
// bucketed table as probe side
assertQuery(materializeExchangesSession, "SELECT * \n" + "FROM test_bucketed_lineitem2 JOIN partsupp\n" + "ON test_bucketed_lineitem2.partkey = partsupp.partkey AND\n" + "test_bucketed_lineitem2.suppkey = partsupp.suppkey\n" + "WHERE test_bucketed_lineitem2.suppkey = 42", "SELECT * \n" + "FROM lineitem JOIN partsupp\n" + "ON lineitem.partkey = partsupp.partkey AND\n" + "lineitem.suppkey = partsupp.suppkey\n" + "WHERE lineitem.suppkey = 42", assertRemoteMaterializedExchangesCount(1));
// unbucketed table as probe side
assertQuery(materializeExchangesSession, "SELECT * \n" + "FROM partsupp JOIN test_bucketed_lineitem2\n" + "ON test_bucketed_lineitem2.partkey = partsupp.partkey AND\n" + "test_bucketed_lineitem2.suppkey = partsupp.suppkey\n" + "WHERE test_bucketed_lineitem2.suppkey = 42", "SELECT * \n" + "FROM partsupp JOIN lineitem\n" + "ON lineitem.partkey = partsupp.partkey AND\n" + "lineitem.suppkey = partsupp.suppkey\n" + "WHERE lineitem.suppkey = 42", assertRemoteMaterializedExchangesCount(1));
} finally {
assertUpdate("DROP TABLE IF EXISTS test_bucketed_lineitem1");
assertUpdate("DROP TABLE IF EXISTS test_bucketed_lineitem2");
}
// Window functions
assertQuery(materializeExchangesSession, "SELECT sum(rn) FROM (SELECT row_number() OVER(PARTITION BY orderkey ORDER BY linenumber) as rn FROM lineitem) WHERE rn > 5", "SELECT 41137", assertRemoteMaterializedExchangesCount(1).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof WindowNode).matches())));
assertQuery(materializeExchangesSession, "SELECT sum(rn) FROM (SELECT row_number() OVER(PARTITION BY orderkey) as rn FROM lineitem)", "SELECT 180782", assertRemoteMaterializedExchangesCount(1).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof RowNumberNode).matches())));
assertQuery(materializeExchangesSession, "SELECT sum(rn) FROM (SELECT row_number() OVER(PARTITION BY orderkey ORDER BY linenumber) as rn FROM lineitem) WHERE rn < 5", "SELECT 107455", assertRemoteMaterializedExchangesCount(1).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof TopNRowNumberNode).matches())));
// union
assertQuery(materializeExchangesSession, "SELECT partkey, count(*), sum(cost) " + "FROM ( " + " SELECT partkey, CAST(extendedprice AS BIGINT) cost FROM lineitem " + " UNION ALL " + " SELECT partkey, CAST(supplycost AS BIGINT) cost FROM partsupp " + ") " + "GROUP BY partkey", assertRemoteMaterializedExchangesCount(2));
// union over aggregation + broadcast join
Session broadcastJoinMaterializeExchangesSession = Session.builder(materializeExchangesSession).setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()).build();
Session broadcastJoinStreamingExchangesSession = Session.builder(getSession()).setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()).build();
// compatible union partitioning
assertQuery(broadcastJoinMaterializeExchangesSession, "WITH union_of_aggregations as ( " + " SELECT " + " partkey, " + " count(*) AS value " + " FROM lineitem " + " GROUP BY " + " 1 " + " UNION ALL " + " SELECT " + " partkey, " + " sum(suppkey) AS value " + " FROM lineitem " + " GROUP BY " + " 1 " + ") " + "SELECT " + " sum(a.value + b.value) " + "FROM union_of_aggregations a, union_of_aggregations b " + "WHERE a.partkey = b.partkey ", "SELECT 12404708", assertRemoteExchangesCount(6).andThen(assertRemoteMaterializedExchangesCount(4)));
// incompatible union partitioning, requires an extra remote exchange for build and probe
String incompatiblePartitioningQuery = "WITH union_of_aggregations as ( " + " SELECT " + " partkey, " + " count(*) as value " + " FROM lineitem " + " GROUP BY " + " 1 " + " UNION ALL " + " SELECT " + " partkey, " + " suppkey as value " + " FROM lineitem " + " GROUP BY " + " 1, 2 " + ") " + "SELECT " + " sum(a.value + b.value) " + "FROM union_of_aggregations a, union_of_aggregations b " + "WHERE a.partkey = b.partkey ";
// system partitioning handle is always compatible
assertQuery(broadcastJoinStreamingExchangesSession, incompatiblePartitioningQuery, "SELECT 4639006", assertRemoteExchangesCount(6));
// hive partitioning handle is incompatible
assertQuery(broadcastJoinMaterializeExchangesSession, incompatiblePartitioningQuery, "SELECT 4639006", assertRemoteExchangesCount(8).andThen(assertRemoteMaterializedExchangesCount(4)));
}
use of com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE in project presto by prestodb.
the class TestDetermineJoinDistributionType method testChoosesLeftWhenCriteriaEmpty.
@Test
public void testChoosesLeftWhenCriteriaEmpty() {
int aRows = 1_000__00;
int bRows = 1_0;
assertDetermineJoinDistributionType(new CostComparator(75, 10, 15)).setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "1PB").overrideStats("valuesA", PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addVariableStatistics(ImmutableMap.of(new VariableReferenceExpression(Optional.empty(), "A1", BIGINT), new VariableStatsEstimate(0, 100, 0, 640000, 100))).build()).overrideStats("valuesB", PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addVariableStatistics(ImmutableMap.of(new VariableReferenceExpression(Optional.empty(), "B1", BIGINT), new VariableStatsEstimate(0, 100, 0, 640000, 100))).build()).on(p -> p.join(RIGHT, p.values(new PlanNodeId("valuesA"), aRows, p.variable("A1", BIGINT)), p.values(new PlanNodeId("valuesB"), bRows, p.variable("B1", BIGINT)), ImmutableList.of(), ImmutableList.of(p.variable("A1", BIGINT), p.variable("B1", BIGINT)), Optional.empty())).matches(join(LEFT, ImmutableList.of(), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("B1", 0)), values(ImmutableMap.of("A1", 0))));
}
use of com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE in project presto by prestodb.
the class TestDetermineJoinDistributionType method testFlipAndReplicateRightOuterJoinWhenJoinCardinalityUnknown.
@Test
public void testFlipAndReplicateRightOuterJoinWhenJoinCardinalityUnknown() {
int aRows = 10;
int bRows = 1_000_000;
assertDetermineJoinDistributionType(new CostComparator(75, 10, 15)).setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).overrideStats("valuesA", PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addVariableStatistics(ImmutableMap.of(new VariableReferenceExpression(Optional.empty(), "A1", BIGINT), VariableStatsEstimate.unknown())).build()).overrideStats("valuesB", PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addVariableStatistics(ImmutableMap.of(new VariableReferenceExpression(Optional.empty(), "B1", BIGINT), VariableStatsEstimate.unknown())).build()).on(p -> p.join(RIGHT, p.values(new PlanNodeId("valuesA"), aRows, p.variable("A1", BIGINT)), p.values(new PlanNodeId("valuesB"), bRows, p.variable("B1", BIGINT)), ImmutableList.of(new JoinNode.EquiJoinClause(p.variable("A1", BIGINT), p.variable("B1", BIGINT))), ImmutableList.of(p.variable("A1", BIGINT), p.variable("B1", BIGINT)), Optional.empty())).matches(join(LEFT, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
}
use of com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE in project presto by prestodb.
the class TestDetermineJoinDistributionType method testReplicateLeftOuterJoin.
@Test
public void testReplicateLeftOuterJoin() {
int aRows = 10_000;
int bRows = 10;
assertDetermineJoinDistributionType(new CostComparator(75, 10, 15)).setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.AUTOMATIC.name()).overrideStats("valuesA", PlanNodeStatsEstimate.builder().setOutputRowCount(aRows).addVariableStatistics(ImmutableMap.of(new VariableReferenceExpression(Optional.empty(), "A1", BIGINT), new VariableStatsEstimate(0, 100, 0, 640000, 100))).build()).overrideStats("valuesB", PlanNodeStatsEstimate.builder().setOutputRowCount(bRows).addVariableStatistics(ImmutableMap.of(new VariableReferenceExpression(Optional.empty(), "B1", BIGINT), new VariableStatsEstimate(0, 100, 0, 640000, 100))).build()).on(p -> p.join(LEFT, p.values(new PlanNodeId("valuesA"), aRows, p.variable("A1", BIGINT)), p.values(new PlanNodeId("valuesB"), bRows, p.variable("B1", BIGINT)), ImmutableList.of(new JoinNode.EquiJoinClause(p.variable("A1", BIGINT), p.variable("B1", BIGINT))), ImmutableList.of(p.variable("A1", BIGINT), p.variable("B1", BIGINT)), Optional.empty())).matches(join(LEFT, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0))));
}
Aggregations