Search in sources :

Example 1 with MarkDistinctNode

use of com.facebook.presto.spi.plan.MarkDistinctNode in project presto by prestodb.

the class TransformCorrelatedScalarSubquery method apply.

@Override
public Result apply(LateralJoinNode lateralJoinNode, Captures captures, Context context) {
    PlanNode subquery = context.getLookup().resolve(lateralJoinNode.getSubquery());
    if (!searchFrom(subquery, context.getLookup()).where(EnforceSingleRowNode.class::isInstance).recurseOnlyWhen(ProjectNode.class::isInstance).matches()) {
        return Result.empty();
    }
    PlanNode rewrittenSubquery = searchFrom(subquery, context.getLookup()).where(EnforceSingleRowNode.class::isInstance).recurseOnlyWhen(ProjectNode.class::isInstance).removeFirst();
    if (isAtMostScalar(rewrittenSubquery, context.getLookup())) {
        return Result.ofPlanNode(new LateralJoinNode(lateralJoinNode.getSourceLocation(), context.getIdAllocator().getNextId(), lateralJoinNode.getInput(), rewrittenSubquery, lateralJoinNode.getCorrelation(), lateralJoinNode.getType(), lateralJoinNode.getOriginSubqueryError()));
    }
    VariableReferenceExpression unique = context.getVariableAllocator().newVariable("unique", BIGINT);
    LateralJoinNode rewrittenLateralJoinNode = new LateralJoinNode(lateralJoinNode.getSourceLocation(), context.getIdAllocator().getNextId(), new AssignUniqueId(lateralJoinNode.getSourceLocation(), context.getIdAllocator().getNextId(), lateralJoinNode.getInput(), unique), rewrittenSubquery, lateralJoinNode.getCorrelation(), lateralJoinNode.getType(), lateralJoinNode.getOriginSubqueryError());
    VariableReferenceExpression isDistinct = context.getVariableAllocator().newVariable("is_distinct", BooleanType.BOOLEAN);
    MarkDistinctNode markDistinctNode = new MarkDistinctNode(rewrittenLateralJoinNode.getSourceLocation(), context.getIdAllocator().getNextId(), rewrittenLateralJoinNode, isDistinct, rewrittenLateralJoinNode.getInput().getOutputVariables(), Optional.empty());
    FilterNode filterNode = new FilterNode(markDistinctNode.getSourceLocation(), context.getIdAllocator().getNextId(), markDistinctNode, castToRowExpression(new SimpleCaseExpression(createSymbolReference(isDistinct), ImmutableList.of(new WhenClause(TRUE_LITERAL, TRUE_LITERAL)), Optional.of(new Cast(new FunctionCall(QualifiedName.of("fail"), ImmutableList.of(new LongLiteral(Integer.toString(SUBQUERY_MULTIPLE_ROWS.toErrorCode().getCode())), new StringLiteral("Scalar sub-query has returned multiple rows"))), BOOLEAN)))));
    return Result.ofPlanNode(new ProjectNode(context.getIdAllocator().getNextId(), filterNode, identityAssignmentsAsSymbolReferences(lateralJoinNode.getOutputVariables())));
}
Also used : Cast(com.facebook.presto.sql.tree.Cast) MarkDistinctNode(com.facebook.presto.spi.plan.MarkDistinctNode) LongLiteral(com.facebook.presto.sql.tree.LongLiteral) FilterNode(com.facebook.presto.spi.plan.FilterNode) SimpleCaseExpression(com.facebook.presto.sql.tree.SimpleCaseExpression) WhenClause(com.facebook.presto.sql.tree.WhenClause) PlanNode(com.facebook.presto.spi.plan.PlanNode) AssignUniqueId(com.facebook.presto.sql.planner.plan.AssignUniqueId) LateralJoinNode(com.facebook.presto.sql.planner.plan.LateralJoinNode) StringLiteral(com.facebook.presto.sql.tree.StringLiteral) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) EnforceSingleRowNode(com.facebook.presto.sql.planner.plan.EnforceSingleRowNode) ProjectNode(com.facebook.presto.spi.plan.ProjectNode) FunctionCall(com.facebook.presto.sql.tree.FunctionCall)

Example 2 with MarkDistinctNode

use of com.facebook.presto.spi.plan.MarkDistinctNode in project presto by prestodb.

the class TestHiveIntegrationSmokeTest method testMaterializedPartitioning.

private void testMaterializedPartitioning(Session materializeExchangesSession) {
    // Simple smoke tests for materialized partitioning
    // Comprehensive testing is done by TestHiveDistributedAggregationsWithExchangeMaterialization, TestHiveDistributedQueriesWithExchangeMaterialization
    // simple aggregation
    assertQuery(materializeExchangesSession, "SELECT orderkey, COUNT(*) lines FROM lineitem GROUP BY orderkey", assertRemoteMaterializedExchangesCount(1));
    // simple distinct
    assertQuery(materializeExchangesSession, "SELECT distinct orderkey FROM lineitem", assertRemoteMaterializedExchangesCount(1));
    // more complex aggregation
    assertQuery(materializeExchangesSession, "SELECT custkey, orderstatus, COUNT(DISTINCT orderkey) FROM orders GROUP BY custkey, orderstatus", assertRemoteMaterializedExchangesCount(2));
    // mark distinct
    assertQuery(materializeExchangesSession, "SELECT custkey, COUNT(DISTINCT orderstatus), COUNT(DISTINCT orderkey) FROM orders GROUP BY custkey", assertRemoteMaterializedExchangesCount(3).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof MarkDistinctNode).matches())));
    // join
    assertQuery(materializeExchangesSession, "SELECT * FROM (lineitem JOIN orders ON lineitem.orderkey = orders.orderkey) x", assertRemoteMaterializedExchangesCount(2));
    // 3-way join
    try {
        assertUpdate("CREATE TABLE test_orders_part1 AS SELECT orderkey, totalprice FROM orders", "SELECT count(*) FROM orders");
        assertUpdate("CREATE TABLE test_orders_part2 AS SELECT orderkey, comment FROM orders", "SELECT count(*) FROM orders");
        assertQuery(materializeExchangesSession, "SELECT lineitem.orderkey, lineitem.comment, test_orders_part1.totalprice, test_orders_part2.comment ordercomment\n" + "FROM lineitem JOIN test_orders_part1\n" + "ON lineitem.orderkey = test_orders_part1.orderkey\n" + "JOIN test_orders_part2\n" + "ON lineitem.orderkey = test_orders_part2.orderkey", "SELECT lineitem.orderkey, lineitem.comment, orders.totalprice, orders.comment ordercomment\n" + "FROM lineitem JOIN orders\n" + "ON lineitem.orderkey = orders.orderkey", assertRemoteMaterializedExchangesCount(3));
    } finally {
        assertUpdate("DROP TABLE IF EXISTS test_orders_part1");
        assertUpdate("DROP TABLE IF EXISTS test_orders_part2");
    }
    try {
        // join a bucketed table with an unbucketed table
        assertUpdate(// bucket count has to be different from materialized bucket number
        "CREATE TABLE test_bucketed_lineitem1\n" + "WITH (bucket_count = 17, bucketed_by = ARRAY['orderkey']) AS\n" + "SELECT * FROM lineitem", "SELECT count(*) from lineitem");
        // bucketed table as probe side
        assertQuery(materializeExchangesSession, "SELECT * FROM test_bucketed_lineitem1 JOIN orders ON test_bucketed_lineitem1.orderkey = orders.orderkey", "SELECT * FROM lineitem JOIN orders ON lineitem.orderkey = orders.orderkey", assertRemoteMaterializedExchangesCount(1));
        // unbucketed table as probe side
        assertQuery(materializeExchangesSession, "SELECT * FROM orders JOIN test_bucketed_lineitem1 ON test_bucketed_lineitem1.orderkey = orders.orderkey", "SELECT * FROM orders JOIN lineitem ON lineitem.orderkey = orders.orderkey", assertRemoteMaterializedExchangesCount(1));
        // join a bucketed table with an unbucketed table; the join has constant pushdown
        assertUpdate(// bucket count has to be different from materialized bucket number
        "CREATE TABLE test_bucketed_lineitem2\n" + "WITH (bucket_count = 17, bucketed_by = ARRAY['partkey', 'suppkey']) AS\n" + "SELECT * FROM lineitem", "SELECT count(*) from lineitem");
        // bucketed table as probe side
        assertQuery(materializeExchangesSession, "SELECT * \n" + "FROM test_bucketed_lineitem2 JOIN partsupp\n" + "ON test_bucketed_lineitem2.partkey = partsupp.partkey AND\n" + "test_bucketed_lineitem2.suppkey = partsupp.suppkey\n" + "WHERE test_bucketed_lineitem2.suppkey = 42", "SELECT * \n" + "FROM lineitem JOIN partsupp\n" + "ON lineitem.partkey = partsupp.partkey AND\n" + "lineitem.suppkey = partsupp.suppkey\n" + "WHERE lineitem.suppkey = 42", assertRemoteMaterializedExchangesCount(1));
        // unbucketed table as probe side
        assertQuery(materializeExchangesSession, "SELECT * \n" + "FROM partsupp JOIN test_bucketed_lineitem2\n" + "ON test_bucketed_lineitem2.partkey = partsupp.partkey AND\n" + "test_bucketed_lineitem2.suppkey = partsupp.suppkey\n" + "WHERE test_bucketed_lineitem2.suppkey = 42", "SELECT * \n" + "FROM partsupp JOIN lineitem\n" + "ON lineitem.partkey = partsupp.partkey AND\n" + "lineitem.suppkey = partsupp.suppkey\n" + "WHERE lineitem.suppkey = 42", assertRemoteMaterializedExchangesCount(1));
    } finally {
        assertUpdate("DROP TABLE IF EXISTS test_bucketed_lineitem1");
        assertUpdate("DROP TABLE IF EXISTS test_bucketed_lineitem2");
    }
    // Window functions
    assertQuery(materializeExchangesSession, "SELECT sum(rn) FROM (SELECT row_number() OVER(PARTITION BY orderkey ORDER BY linenumber) as rn FROM lineitem) WHERE rn > 5", "SELECT 41137", assertRemoteMaterializedExchangesCount(1).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof WindowNode).matches())));
    assertQuery(materializeExchangesSession, "SELECT sum(rn) FROM (SELECT row_number() OVER(PARTITION BY orderkey) as rn FROM lineitem)", "SELECT 180782", assertRemoteMaterializedExchangesCount(1).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof RowNumberNode).matches())));
    assertQuery(materializeExchangesSession, "SELECT sum(rn) FROM (SELECT row_number() OVER(PARTITION BY orderkey ORDER BY linenumber) as rn FROM lineitem) WHERE rn < 5", "SELECT 107455", assertRemoteMaterializedExchangesCount(1).andThen(plan -> assertTrue(searchFrom(plan.getRoot()).where(node -> node instanceof TopNRowNumberNode).matches())));
    // union
    assertQuery(materializeExchangesSession, "SELECT partkey, count(*), sum(cost) " + "FROM ( " + "  SELECT partkey, CAST(extendedprice AS BIGINT) cost FROM lineitem " + "  UNION ALL " + "  SELECT partkey, CAST(supplycost AS BIGINT) cost FROM partsupp " + ") " + "GROUP BY partkey", assertRemoteMaterializedExchangesCount(2));
    // union over aggregation + broadcast join
    Session broadcastJoinMaterializeExchangesSession = Session.builder(materializeExchangesSession).setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()).build();
    Session broadcastJoinStreamingExchangesSession = Session.builder(getSession()).setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()).build();
    // compatible union partitioning
    assertQuery(broadcastJoinMaterializeExchangesSession, "WITH union_of_aggregations as ( " + "    SELECT " + "        partkey, " + "        count(*) AS value " + "    FROM lineitem " + "    GROUP BY  " + "        1 " + "    UNION ALL " + "    SELECT " + "        partkey, " + "        sum(suppkey) AS value " + "    FROM lineitem " + "    GROUP BY  " + "        1        " + ") " + "SELECT " + "    sum(a.value + b.value) " + "FROM union_of_aggregations a, union_of_aggregations b  " + "WHERE a.partkey = b.partkey ", "SELECT 12404708", assertRemoteExchangesCount(6).andThen(assertRemoteMaterializedExchangesCount(4)));
    // incompatible union partitioning, requires an extra remote exchange for build and probe
    String incompatiblePartitioningQuery = "WITH union_of_aggregations as ( " + "    SELECT " + "        partkey, " + "        count(*) as value " + "    FROM lineitem " + "    GROUP BY  " + "        1 " + "    UNION ALL " + "    SELECT " + "        partkey, " + "        suppkey as value " + "    FROM lineitem " + "    GROUP BY  " + "        1, 2        " + ") " + "SELECT " + "    sum(a.value + b.value) " + "FROM union_of_aggregations a, union_of_aggregations b  " + "WHERE a.partkey = b.partkey ";
    // system partitioning handle is always compatible
    assertQuery(broadcastJoinStreamingExchangesSession, incompatiblePartitioningQuery, "SELECT 4639006", assertRemoteExchangesCount(6));
    // hive partitioning handle is incompatible
    assertQuery(broadcastJoinMaterializeExchangesSession, incompatiblePartitioningQuery, "SELECT 4639006", assertRemoteExchangesCount(8).andThen(assertRemoteMaterializedExchangesCount(4)));
}
Also used : CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) FILE_SIZE_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.FILE_SIZE_COLUMN_NAME) PUSH_THROUGH_LOW_MEMORY_OPERATORS(com.facebook.presto.sql.analyzer.FeaturesConfig.PartialMergePushdownStrategy.PUSH_THROUGH_LOW_MEMORY_OPERATORS) MaterializedResult.resultBuilder(com.facebook.presto.testing.MaterializedResult.resultBuilder) QueryRunner(com.facebook.presto.testing.QueryRunner) Test(org.testng.annotations.Test) PARTIAL_MERGE_PUSHDOWN_STRATEGY(com.facebook.presto.SystemSessionProperties.PARTIAL_MERGE_PUSHDOWN_STRATEGY) TypeSignature(com.facebook.presto.common.type.TypeSignature) BigDecimal(java.math.BigDecimal) Map(java.util.Map) QualifiedObjectName(com.facebook.presto.common.QualifiedObjectName) FileAssert.assertFile(org.testng.FileAssert.assertFile) TableColumnInfo(com.facebook.presto.sql.planner.planPrinter.IOPlanPrinter.IOPlan.TableColumnInfo) CUSTOMER(io.airlift.tpch.TpchTable.CUSTOMER) Assert.assertFalse(org.testng.Assert.assertFalse) EXCHANGE_MATERIALIZATION_STRATEGY(com.facebook.presto.SystemSessionProperties.EXCHANGE_MATERIALIZATION_STRATEGY) Assert.assertNotEquals(org.testng.Assert.assertNotEquals) RowNumberNode(com.facebook.presto.sql.planner.plan.RowNumberNode) PATH_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.PATH_COLUMN_NAME) JOIN_DISTRIBUTION_TYPE(com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE) PartialMergePushdownStrategy(com.facebook.presto.sql.analyzer.FeaturesConfig.PartialMergePushdownStrategy) JsonCodec.jsonCodec(com.facebook.airlift.json.JsonCodec.jsonCodec) MANIFEST_VERIFICATION_ENABLED(com.facebook.presto.hive.HiveSessionProperties.MANIFEST_VERIFICATION_ENABLED) QueryAssertions(com.facebook.presto.tests.QueryAssertions) Collectors.joining(java.util.stream.Collectors.joining) ConnectorSession(com.facebook.presto.spi.ConnectorSession) QueryInfo(com.facebook.presto.execution.QueryInfo) CONCURRENT_LIFESPANS_PER_NODE(com.facebook.presto.SystemSessionProperties.CONCURRENT_LIFESPANS_PER_NODE) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) LocalDateTime(java.time.LocalDateTime) BUCKET_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) HiveUtil.columnExtraInfo(com.facebook.presto.hive.HiveUtil.columnExtraInfo) PAGEFILE(com.facebook.presto.hive.HiveStorageFormat.PAGEFILE) ArrayList(java.util.ArrayList) TableWriterMergeNode(com.facebook.presto.sql.planner.plan.TableWriterMergeNode) HiveQueryRunner.createBucketedSession(com.facebook.presto.hive.HiveQueryRunner.createBucketedSession) QueryAssertions.assertEqualsIgnoreOrder(com.facebook.presto.tests.QueryAssertions.assertEqualsIgnoreOrder) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) InsertTableHandle(com.facebook.presto.metadata.InsertTableHandle) Identity(com.facebook.presto.spi.security.Identity) Files.asCharSink(com.google.common.io.Files.asCharSink) BiConsumer(java.util.function.BiConsumer) ResultWithQueryId(com.facebook.presto.tests.ResultWithQueryId) EXACTLY(com.facebook.presto.common.predicate.Marker.Bound.EXACTLY) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) TableHandle(com.facebook.presto.spi.TableHandle) SORTED_WRITE_TEMP_PATH_SUBDIRECTORY_COUNT(com.facebook.presto.hive.HiveSessionProperties.SORTED_WRITE_TEMP_PATH_SUBDIRECTORY_COUNT) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) LongStream(java.util.stream.LongStream) HIVE_CATALOG(com.facebook.presto.hive.HiveQueryRunner.HIVE_CATALOG) WindowNode(com.facebook.presto.sql.planner.plan.WindowNode) Language(org.intellij.lang.annotations.Language) Session(com.facebook.presto.Session) ELIMINATE_CROSS_JOINS(com.facebook.presto.sql.analyzer.FeaturesConfig.JoinReorderingStrategy.ELIMINATE_CROSS_JOINS) Constraint(com.facebook.presto.spi.Constraint) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) FormattedRange(com.facebook.presto.sql.planner.planPrinter.IOPlanPrinter.FormattedRange) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) PUSHDOWN_FILTER_ENABLED(com.facebook.presto.hive.HiveSessionProperties.PUSHDOWN_FILTER_ENABLED) COLOCATED_JOIN(com.facebook.presto.SystemSessionProperties.COLOCATED_JOIN) Metadata(com.facebook.presto.metadata.Metadata) BROADCAST(com.facebook.presto.sql.analyzer.FeaturesConfig.JoinDistributionType.BROADCAST) SORTED_WRITE_TO_TEMP_PATH_ENABLED(com.facebook.presto.hive.HiveSessionProperties.SORTED_WRITE_TO_TEMP_PATH_ENABLED) TableMetadata(com.facebook.presto.metadata.TableMetadata) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) LINE_ITEM(io.airlift.tpch.TpchTable.LINE_ITEM) PREFER_MANIFESTS_TO_LIST_FILES(com.facebook.presto.hive.HiveSessionProperties.PREFER_MANIFESTS_TO_LIST_FILES) PlanPrinter.textLogicalPlan(com.facebook.presto.sql.planner.planPrinter.PlanPrinter.textLogicalPlan) DistributedQueryRunner(com.facebook.presto.tests.DistributedQueryRunner) FILE_MODIFIED_TIME_COLUMN_NAME(com.facebook.presto.hive.HiveColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME) HiveQueryRunner.createMaterializeExchangesSession(com.facebook.presto.hive.HiveQueryRunner.createMaterializeExchangesSession) RCFILE_OPTIMIZED_WRITER_ENABLED(com.facebook.presto.hive.HiveSessionProperties.RCFILE_OPTIMIZED_WRITER_ENABLED) TopNRowNumberNode(com.facebook.presto.sql.planner.plan.TopNRowNumberNode) TransactionBuilder.transaction(com.facebook.presto.transaction.TransactionBuilder.transaction) HiveSessionProperties.getInsertExistingPartitionsBehavior(com.facebook.presto.hive.HiveSessionProperties.getInsertExistingPartitionsBehavior) BUCKETED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) Path(org.apache.hadoop.fs.Path) ImmutableSet(com.google.common.collect.ImmutableSet) Files.createTempDir(com.google.common.io.Files.createTempDir) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) PART_SUPPLIER(io.airlift.tpch.TpchTable.PART_SUPPLIER) TPCH_SCHEMA(com.facebook.presto.hive.HiveQueryRunner.TPCH_SCHEMA) Assert.assertNotNull(org.testng.Assert.assertNotNull) Instant(java.time.Instant) InsertExistingPartitionsBehavior(com.facebook.presto.hive.HiveSessionProperties.InsertExistingPartitionsBehavior) PARTITIONED(com.facebook.presto.sql.analyzer.FeaturesConfig.JoinDistributionType.PARTITIONED) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) STORAGE_FORMAT_PROPERTY(com.facebook.presto.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) List(java.util.List) ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) JOIN_REORDERING_STRATEGY(com.facebook.presto.SystemSessionProperties.JOIN_REORDERING_STRATEGY) REMOTE_MATERIALIZED(com.facebook.presto.sql.planner.plan.ExchangeNode.Scope.REMOTE_MATERIALIZED) LocalDate(java.time.LocalDate) Optional(java.util.Optional) AbstractTestIntegrationSmokeTest(com.facebook.presto.tests.AbstractTestIntegrationSmokeTest) ConnectorId(com.facebook.presto.spi.ConnectorId) MarkDistinctNode(com.facebook.presto.spi.plan.MarkDistinctNode) NATION(io.airlift.tpch.TpchTable.NATION) Assert.assertNull(org.testng.Assert.assertNull) Assert.assertEquals(com.facebook.presto.testing.assertions.Assert.assertEquals) PARTITIONING_PROVIDER_CATALOG(com.facebook.presto.SystemSessionProperties.PARTITIONING_PROVIDER_CATALOG) ORDERS(io.airlift.tpch.TpchTable.ORDERS) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) HashMap(java.util.HashMap) ColumnConstraint(com.facebook.presto.sql.planner.planPrinter.IOPlanPrinter.ColumnConstraint) Function(java.util.function.Function) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) ROLE(com.facebook.presto.spi.security.SelectedRole.Type.ROLE) OPTIMIZED_PARTITION_UPDATE_SERIALIZATION_ENABLED(com.facebook.presto.hive.HiveSessionProperties.OPTIMIZED_PARTITION_UPDATE_SERIALIZATION_ENABLED) ImmutableList(com.google.common.collect.ImmutableList) PARTITIONED_BY_PROPERTY(com.facebook.presto.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Files(com.google.common.io.Files) Verify.verify(com.google.common.base.Verify.verify) CatalogSchemaTableName(com.facebook.presto.spi.CatalogSchemaTableName) FormattedMarker(com.facebook.presto.sql.planner.planPrinter.IOPlanPrinter.FormattedMarker) Objects.requireNonNull(java.util.Objects.requireNonNull) Plan(com.facebook.presto.sql.planner.Plan) Type(com.facebook.presto.common.type.Type) PlanNodeSearcher.searchFrom(com.facebook.presto.sql.planner.optimizations.PlanNodeSearcher.searchFrom) GROUPED_EXECUTION(com.facebook.presto.SystemSessionProperties.GROUPED_EXECUTION) SelectedRole(com.facebook.presto.spi.security.SelectedRole) TableLayout(com.facebook.presto.metadata.TableLayout) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Assert.fail(org.testng.Assert.fail) TestingSession.testSessionBuilder(com.facebook.presto.testing.TestingSession.testSessionBuilder) FILE_RENAMING_ENABLED(com.facebook.presto.hive.HiveSessionProperties.FILE_RENAMING_ENABLED) Consumer(java.util.function.Consumer) FormattedDomain(com.facebook.presto.sql.planner.planPrinter.IOPlanPrinter.FormattedDomain) MaterializedResult(com.facebook.presto.testing.MaterializedResult) TEMPORARY_STAGING_DIRECTORY_ENABLED(com.facebook.presto.hive.HiveSessionProperties.TEMPORARY_STAGING_DIRECTORY_ENABLED) BUCKET_COUNT_PROPERTY(com.facebook.presto.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) MaterializedRow(com.facebook.presto.testing.MaterializedRow) StatsAndCosts(com.facebook.presto.cost.StatsAndCosts) Assert.assertTrue(org.testng.Assert.assertTrue) Collections(java.util.Collections) IOPlan(com.facebook.presto.sql.planner.planPrinter.IOPlanPrinter.IOPlan) ExchangeNode(com.facebook.presto.sql.planner.plan.ExchangeNode) DecimalType.createDecimalType(com.facebook.presto.common.type.DecimalType.createDecimalType) WindowNode(com.facebook.presto.sql.planner.plan.WindowNode) MarkDistinctNode(com.facebook.presto.spi.plan.MarkDistinctNode) TopNRowNumberNode(com.facebook.presto.sql.planner.plan.TopNRowNumberNode) RowNumberNode(com.facebook.presto.sql.planner.plan.RowNumberNode) TopNRowNumberNode(com.facebook.presto.sql.planner.plan.TopNRowNumberNode) ConnectorSession(com.facebook.presto.spi.ConnectorSession) HiveQueryRunner.createBucketedSession(com.facebook.presto.hive.HiveQueryRunner.createBucketedSession) Session(com.facebook.presto.Session) HiveQueryRunner.createMaterializeExchangesSession(com.facebook.presto.hive.HiveQueryRunner.createMaterializeExchangesSession)

Example 3 with MarkDistinctNode

use of com.facebook.presto.spi.plan.MarkDistinctNode in project presto by prestodb.

the class MultipleDistinctAggregationToMarkDistinct method apply.

@Override
public Result apply(AggregationNode parent, Captures captures, Context context) {
    if (!SystemSessionProperties.useMarkDistinct(context.getSession())) {
        return Result.empty();
    }
    // the distinct marker for the given set of input columns
    Map<Set<VariableReferenceExpression>, VariableReferenceExpression> markers = new HashMap<>();
    Map<VariableReferenceExpression, Aggregation> newAggregations = new HashMap<>();
    PlanNode subPlan = parent.getSource();
    for (Map.Entry<VariableReferenceExpression, Aggregation> entry : parent.getAggregations().entrySet()) {
        Aggregation aggregation = entry.getValue();
        if (aggregation.isDistinct() && !aggregation.getFilter().isPresent() && !aggregation.getMask().isPresent()) {
            Set<VariableReferenceExpression> inputs = aggregation.getArguments().stream().map(OriginalExpressionUtils::castToExpression).map(context.getVariableAllocator()::toVariableReference).collect(toSet());
            VariableReferenceExpression marker = markers.get(inputs);
            if (marker == null) {
                marker = context.getVariableAllocator().newVariable(Iterables.getLast(inputs).getName(), BOOLEAN, "distinct");
                markers.put(inputs, marker);
                ImmutableSet.Builder<VariableReferenceExpression> distinctVariables = ImmutableSet.<VariableReferenceExpression>builder().addAll(parent.getGroupingKeys()).addAll(inputs);
                parent.getGroupIdVariable().ifPresent(distinctVariables::add);
                subPlan = new MarkDistinctNode(subPlan.getSourceLocation(), context.getIdAllocator().getNextId(), subPlan, marker, ImmutableList.copyOf(distinctVariables.build()), Optional.empty());
            }
            // remove the distinct flag and set the distinct marker
            newAggregations.put(entry.getKey(), new Aggregation(aggregation.getCall(), aggregation.getFilter(), aggregation.getOrderBy(), false, Optional.of(marker)));
        } else {
            newAggregations.put(entry.getKey(), aggregation);
        }
    }
    return Result.ofPlanNode(new AggregationNode(parent.getSourceLocation(), parent.getId(), subPlan, newAggregations, parent.getGroupingSets(), ImmutableList.of(), parent.getStep(), parent.getHashVariable(), parent.getGroupIdVariable()));
}
Also used : ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) Collectors.toSet(java.util.stream.Collectors.toSet) MarkDistinctNode(com.facebook.presto.spi.plan.MarkDistinctNode) HashMap(java.util.HashMap) AggregationNode(com.facebook.presto.spi.plan.AggregationNode) Aggregation(com.facebook.presto.spi.plan.AggregationNode.Aggregation) PlanNode(com.facebook.presto.spi.plan.PlanNode) ImmutableSet(com.google.common.collect.ImmutableSet) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) OriginalExpressionUtils(com.facebook.presto.sql.relational.OriginalExpressionUtils) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with MarkDistinctNode

use of com.facebook.presto.spi.plan.MarkDistinctNode in project presto by prestodb.

the class MarkDistinctMatcher method detailMatches.

@Override
public MatchResult detailMatches(PlanNode node, StatsProvider stats, Session session, Metadata metadata, SymbolAliases symbolAliases) {
    checkState(shapeMatches(node), "Plan testing framework error: shapeMatches returned false in detailMatches in %s", this.getClass().getName());
    MarkDistinctNode markDistinctNode = (MarkDistinctNode) node;
    if (!markDistinctNode.getHashVariable().map(variable -> new Symbol(variable.getName())).equals(hashSymbol.map(alias -> alias.toSymbol(symbolAliases)))) {
        return NO_MATCH;
    }
    if (!markDistinctNode.getDistinctVariables().stream().map(VariableReferenceExpression::getName).map(Symbol::new).collect(toImmutableSet()).equals(distinctSymbols.stream().map(alias -> alias.toSymbol(symbolAliases)).collect(toImmutableSet()))) {
        return NO_MATCH;
    }
    return match(markerSymbol.toString(), createSymbolReference(markDistinctNode.getMarkerVariable()));
}
Also used : Session(com.facebook.presto.Session) StatsProvider(com.facebook.presto.cost.StatsProvider) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) Preconditions.checkState(com.google.common.base.Preconditions.checkState) PlanNode(com.facebook.presto.spi.plan.PlanNode) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Symbol(com.facebook.presto.sql.planner.Symbol) MatchResult.match(com.facebook.presto.sql.planner.assertions.MatchResult.match) Objects.requireNonNull(java.util.Objects.requireNonNull) Optional(java.util.Optional) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) NO_MATCH(com.facebook.presto.sql.planner.assertions.MatchResult.NO_MATCH) Metadata(com.facebook.presto.metadata.Metadata) MarkDistinctNode(com.facebook.presto.spi.plan.MarkDistinctNode) ExpressionTreeUtils.createSymbolReference(com.facebook.presto.sql.analyzer.ExpressionTreeUtils.createSymbolReference) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) MarkDistinctNode(com.facebook.presto.spi.plan.MarkDistinctNode) Symbol(com.facebook.presto.sql.planner.Symbol) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression)

Aggregations

MarkDistinctNode (com.facebook.presto.spi.plan.MarkDistinctNode)4 PlanNode (com.facebook.presto.spi.plan.PlanNode)3 VariableReferenceExpression (com.facebook.presto.spi.relation.VariableReferenceExpression)3 Session (com.facebook.presto.Session)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 JsonCodec.jsonCodec (com.facebook.airlift.json.JsonCodec.jsonCodec)1 COLOCATED_JOIN (com.facebook.presto.SystemSessionProperties.COLOCATED_JOIN)1 CONCURRENT_LIFESPANS_PER_NODE (com.facebook.presto.SystemSessionProperties.CONCURRENT_LIFESPANS_PER_NODE)1 EXCHANGE_MATERIALIZATION_STRATEGY (com.facebook.presto.SystemSessionProperties.EXCHANGE_MATERIALIZATION_STRATEGY)1 GROUPED_EXECUTION (com.facebook.presto.SystemSessionProperties.GROUPED_EXECUTION)1 JOIN_DISTRIBUTION_TYPE (com.facebook.presto.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE)1 JOIN_REORDERING_STRATEGY (com.facebook.presto.SystemSessionProperties.JOIN_REORDERING_STRATEGY)1 PARTIAL_MERGE_PUSHDOWN_STRATEGY (com.facebook.presto.SystemSessionProperties.PARTIAL_MERGE_PUSHDOWN_STRATEGY)1 PARTITIONING_PROVIDER_CATALOG (com.facebook.presto.SystemSessionProperties.PARTITIONING_PROVIDER_CATALOG)1 QualifiedObjectName (com.facebook.presto.common.QualifiedObjectName)1 EXACTLY (com.facebook.presto.common.predicate.Marker.Bound.EXACTLY)1 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)1 BOOLEAN (com.facebook.presto.common.type.BooleanType.BOOLEAN)1