Search in sources :

Example 96 with MetastoreContext

use of com.facebook.presto.hive.metastore.MetastoreContext in project presto by prestodb.

the class AbstractTestHiveClient method doInsertIntoNewPartition.

private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName, PageSinkContext pageSinkContext) throws Exception {
    // creating the table
    doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
    // insert the data
    String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
    Set<String> existingFiles;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), false, DEFAULT_COLUMN_CONVERTER_PROVIDER);
        // verify partitions were created
        List<String> partitionNames = transaction.getMetastore().getPartitionNames(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
        assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
        // verify the node versions in partitions
        Map<String, Optional<Partition>> partitions = getMetastoreClient().getPartitionsByNames(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), partitionNames);
        assertEquals(partitions.size(), partitionNames.size());
        for (String partitionName : partitionNames) {
            Partition partition = partitions.get(partitionName).get();
            assertEquals(partition.getParameters().get(PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
            assertEquals(partition.getParameters().get(PRESTO_QUERY_ID_NAME), queryId);
        }
        // load the new table
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        // verify the data
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
        assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
        // test rollback
        existingFiles = listAllDataFiles(metastoreContext, transaction, tableName.getSchemaName(), tableName.getTableName());
        assertFalse(existingFiles.isEmpty());
        // test statistics
        for (String partitionName : partitionNames) {
            HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(metastoreContext, transaction, tableName, partitionName);
            assertEquals(partitionStatistics.getRowCount().getAsLong(), 1L);
            assertEquals(partitionStatistics.getFileCount().getAsLong(), 1L);
            assertGreaterThan(partitionStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
            assertGreaterThan(partitionStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
        }
    }
    Path stagingPathRoot;
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        // "stage" insert data
        HiveInsertTableHandle insertTableHandle = (HiveInsertTableHandle) metadata.beginInsert(session, tableHandle);
        stagingPathRoot = getStagingPathRoot(insertTableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, pageSinkContext);
        sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        if (pageSinkContext.isCommitRequired()) {
            assertValidPageSinkCommitFragments(fragments);
            metadata.commitPageSinkAsync(session, insertTableHandle, fragments).get();
        }
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        // verify all temp files start with the unique prefix
        HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName(), insertTableHandle.getLocationHandle().getTargetPath().toString(), false);
        Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
        assertTrue(!tempFiles.isEmpty());
        for (String filePath : tempFiles) {
            assertTrue(new Path(filePath).getName().startsWith(session.getQueryId()));
        }
        // rollback insert
        transaction.rollback();
    }
    // verify the data is unchanged
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, newSession(), TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
        // verify we did not modify the table directory
        MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), false, DEFAULT_COLUMN_CONVERTER_PROVIDER);
        assertEquals(listAllDataFiles(metastoreContext, transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
        // verify temp directory is empty
        HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName(), stagingPathRoot.toString(), false);
        assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(com.facebook.presto.hive.metastore.Partition) HiveColumnHandle.bucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Optional(java.util.Optional) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) MaterializedResult(com.facebook.presto.testing.MaterializedResult) ConnectorPageSink(com.facebook.presto.spi.ConnectorPageSink)

Example 97 with MetastoreContext

use of com.facebook.presto.hive.metastore.MetastoreContext in project presto by prestodb.

the class AbstractTestHiveClient method doInsertIntoExistingPartition.

private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, SchemaTableName tableName, PageSinkContext pageSinkContext) throws Exception {
    // creating the table
    doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
    MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_PARTITIONED_DATA.getTypes());
    for (int i = 0; i < 3; i++) {
        // insert the data
        insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
        try (Transaction transaction = newTransaction()) {
            ConnectorSession session = newSession();
            ConnectorMetadata metadata = transaction.getMetadata();
            ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
            MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), false, DEFAULT_COLUMN_CONVERTER_PROVIDER);
            // verify partitions were created
            List<String> partitionNames = transaction.getMetastore().getPartitionNames(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
            assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
            // load the new table
            List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
            // verify the data
            resultBuilder.rows(CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
            MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
            assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
            // test statistics
            for (String partitionName : partitionNames) {
                HiveBasicStatistics statistics = getBasicStatisticsForPartition(metastoreContext, transaction, tableName, partitionName);
                assertEquals(statistics.getRowCount().getAsLong(), i + 1L);
                assertEquals(statistics.getFileCount().getAsLong(), i + 1L);
                assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
                assertGreaterThan(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
            }
        }
    }
    // test rollback
    Set<String> existingFiles;
    Path stagingPathRoot;
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), false, DEFAULT_COLUMN_CONVERTER_PROVIDER);
        existingFiles = listAllDataFiles(metastoreContext, transaction, tableName.getSchemaName(), tableName.getTableName());
        assertFalse(existingFiles.isEmpty());
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        // "stage" insert data
        HiveInsertTableHandle insertTableHandle = (HiveInsertTableHandle) metadata.beginInsert(session, tableHandle);
        stagingPathRoot = getStagingPathRoot(insertTableHandle);
        ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, pageSinkContext);
        sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage());
        sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage());
        Collection<Slice> fragments = getFutureValue(sink.finish());
        if (pageSinkContext.isCommitRequired()) {
            assertValidPageSinkCommitFragments(fragments);
            metadata.commitPageSinkAsync(session, insertTableHandle, fragments).get();
        }
        metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
        // verify all temp files start with the unique prefix
        HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName(), insertTableHandle.getLocationHandle().getTargetPath().toString(), false);
        Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
        assertTrue(!tempFiles.isEmpty());
        for (String filePath : tempFiles) {
            assertTrue(new Path(filePath).getName().startsWith(session.getQueryId()));
        }
        // verify statistics are visible from within of the current transaction
        List<String> partitionNames = transaction.getMetastore().getPartitionNames(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
        for (String partitionName : partitionNames) {
            HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(metastoreContext, transaction, tableName, partitionName);
            assertEquals(partitionStatistics.getRowCount().getAsLong(), 5L);
        }
        // rollback insert
        transaction.rollback();
    }
    try (Transaction transaction = newTransaction()) {
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorSession session = newSession();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
        MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), false, DEFAULT_COLUMN_CONVERTER_PROVIDER);
        // verify the data is unchanged
        MaterializedResult result = readTable(transaction, tableHandle, columnHandles, newSession(), TupleDomain.all(), OptionalInt.empty(), Optional.empty());
        assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows());
        // verify we did not modify the table directory
        assertEquals(listAllDataFiles(metastoreContext, transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
        // verify temp directory is empty
        HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName(), stagingPathRoot.toString(), false);
        assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
        // verify statistics have been rolled back
        List<String> partitionNames = transaction.getMetastore().getPartitionNames(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
        for (String partitionName : partitionNames) {
            HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(metastoreContext, transaction, tableName, partitionName);
            assertEquals(partitionStatistics.getRowCount().getAsLong(), 3L);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HiveColumnHandle.bucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(com.facebook.presto.spi.ColumnHandle) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) Constraint(com.facebook.presto.spi.Constraint) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) Slice(io.airlift.slice.Slice) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) MaterializedResult(com.facebook.presto.testing.MaterializedResult) ConnectorPageSink(com.facebook.presto.spi.ConnectorPageSink)

Example 98 with MetastoreContext

use of com.facebook.presto.hive.metastore.MetastoreContext in project presto by prestodb.

the class TestHiveLogicalPlanner method testMetadataAggregationFoldingWithEmptyPartitions.

@Test
public void testMetadataAggregationFoldingWithEmptyPartitions() {
    QueryRunner queryRunner = getQueryRunner();
    Session optimizeMetadataQueries = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES, Boolean.toString(true)).build();
    Session optimizeMetadataQueriesIgnoreStats = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES_IGNORE_STATS, Boolean.toString(true)).build();
    Session shufflePartitionColumns = Session.builder(this.getQueryRunner().getDefaultSession()).setCatalogSessionProperty(HIVE_CATALOG, SHUFFLE_PARTITIONED_COLUMNS_FOR_TABLE_WRITE, Boolean.toString(true)).build();
    queryRunner.execute(shufflePartitionColumns, "CREATE TABLE test_metadata_aggregation_folding_with_empty_partitions WITH (partitioned_by = ARRAY['ds']) AS " + "SELECT orderkey, CAST(to_iso8601(date_add('DAY', orderkey % 2, date('2020-07-01'))) AS VARCHAR) AS ds FROM orders WHERE orderkey < 1000");
    ExtendedHiveMetastore metastore = replicateHiveMetastore((DistributedQueryRunner) queryRunner);
    MetastoreContext metastoreContext = new MetastoreContext(getSession().getUser(), getSession().getQueryId().getId(), Optional.empty(), Optional.empty(), Optional.empty(), false, HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
    Table table = metastore.getTable(metastoreContext, getSession().getSchema().get(), "test_metadata_aggregation_folding_with_empty_partitions").get();
    // Add one partition with no statistics.
    String partitionNameNoStats = "ds=2020-07-20";
    Partition partitionNoStats = createDummyPartition(table, partitionNameNoStats);
    metastore.addPartitions(metastoreContext, table.getDatabaseName(), table.getTableName(), ImmutableList.of(new PartitionWithStatistics(partitionNoStats, partitionNameNoStats, PartitionStatistics.empty())));
    // Add one partition with statistics indicating that it has no rows.
    String emptyPartitionName = "ds=2020-06-30";
    Partition emptyPartition = createDummyPartition(table, emptyPartitionName);
    metastore.addPartitions(metastoreContext, table.getDatabaseName(), table.getTableName(), ImmutableList.of(new PartitionWithStatistics(emptyPartition, emptyPartitionName, PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(1, 0, 0, 0)).build())));
    try {
        // Max ds doesn't have stats. Disable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT max(ds) from test_metadata_aggregation_folding_with_empty_partitions)", anyTree(anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions")), anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions"))));
        // Ignore metastore stats. Enable rewrite.
        assertPlan(optimizeMetadataQueriesIgnoreStats, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT max(ds) from test_metadata_aggregation_folding_with_empty_partitions)", anyTree(join(INNER, ImmutableList.of(), tableScan("test_metadata_aggregation_folding_with_empty_partitions", getSingleValueColumnDomain("ds", "2020-07-20"), TRUE_CONSTANT, ImmutableSet.of("ds")), anyTree(any()))));
        // Max ds matching the filter has stats. Enable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT max(ds) from test_metadata_aggregation_folding_with_empty_partitions WHERE ds <= '2020-07-02')", anyTree(join(INNER, ImmutableList.of(), tableScan("test_metadata_aggregation_folding_with_empty_partitions", getSingleValueColumnDomain("ds", "2020-07-02"), TRUE_CONSTANT, ImmutableSet.of("ds")), anyTree(any()))));
        // Min ds partition stats indicates that it is an empty partition. Disable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT min(ds) from test_metadata_aggregation_folding_with_empty_partitions)", anyTree(anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions")), anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions"))));
        // Min ds partition matching the filter has non-empty stats. Enable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT min(ds) from test_metadata_aggregation_folding_with_empty_partitions WHERE ds >= '2020-07-01')", anyTree(join(INNER, ImmutableList.of(), tableScan("test_metadata_aggregation_folding_with_empty_partitions", getSingleValueColumnDomain("ds", "2020-07-01"), TRUE_CONSTANT, ImmutableSet.of("ds")), anyTree(any()))));
        // Test the non-reducible code path.
        // Disable rewrite as there are partitions with empty stats.
        assertPlan(optimizeMetadataQueries, "SELECT DISTINCT ds FROM test_metadata_aggregation_folding_with_empty_partitions", anyTree(tableScanWithConstraint("test_metadata_aggregation_folding_with_empty_partitions", ImmutableMap.of("ds", multipleValues(VARCHAR, utf8Slices("2020-06-30", "2020-07-01", "2020-07-02", "2020-07-20"))))));
        // Enable rewrite as all matching partitions have stats.
        assertPlan(optimizeMetadataQueries, "SELECT DISTINCT ds FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-07-01' AND '2020-07-03'", anyTree(values(ImmutableList.of("ds"), ImmutableList.of(ImmutableList.of(new StringLiteral("2020-07-01")), ImmutableList.of(new StringLiteral("2020-07-02"))))));
        // One of two resulting partitions doesn't have stats. Disable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-06-30' AND '2020-07-03'", anyTree(tableScanWithConstraint("test_metadata_aggregation_folding_with_empty_partitions", ImmutableMap.of("ds", multipleValues(VARCHAR, utf8Slices("2020-06-30", "2020-07-01", "2020-07-02"))))));
        // Ignore metadata stats. Always enable rewrite.
        assertPlan(optimizeMetadataQueriesIgnoreStats, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-06-30' AND '2020-07-03'", anyTree(project(ImmutableMap.of("min", expression("'2020-06-30'"), "max", expression("'2020-07-02'")), anyTree(values()))));
        // Both resulting partitions have stats. Enable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-07-01' AND '2020-07-03'", anyTree(project(ImmutableMap.of("min", expression("'2020-07-01'"), "max", expression("'2020-07-02'")), anyTree(values()))));
    } finally {
        queryRunner.execute("DROP TABLE IF EXISTS test_metadata_aggregation_folding_with_empty_partitions");
    }
}
Also used : Partition(com.facebook.presto.hive.metastore.Partition) Table(com.facebook.presto.hive.metastore.Table) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics) StringLiteral(com.facebook.presto.sql.tree.StringLiteral) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) QueryRunner(com.facebook.presto.testing.QueryRunner) DistributedQueryRunner(com.facebook.presto.tests.DistributedQueryRunner) Session(com.facebook.presto.Session) Test(org.testng.annotations.Test)

Example 99 with MetastoreContext

use of com.facebook.presto.hive.metastore.MetastoreContext in project presto by prestodb.

the class TestHiveLogicalPlanner method testMetadataAggregationFoldingWithTwoPartitionColumns.

@Test
public void testMetadataAggregationFoldingWithTwoPartitionColumns() {
    QueryRunner queryRunner = getQueryRunner();
    Session optimizeMetadataQueries = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES, Boolean.toString(true)).build();
    Session shufflePartitionColumns = Session.builder(this.getQueryRunner().getDefaultSession()).setCatalogSessionProperty(HIVE_CATALOG, SHUFFLE_PARTITIONED_COLUMNS_FOR_TABLE_WRITE, Boolean.toString(true)).build();
    // Create a table with partitions: ds=2020-07-01/status=A, ds=2020-07-01/status=B, ds=2020-07-02/status=A, ds=2020-07-02/status=B
    queryRunner.execute(shufflePartitionColumns, "CREATE TABLE test_metadata_aggregation_folding_with_two_partitions_columns WITH (partitioned_by = ARRAY['ds', 'status']) AS " + "SELECT orderkey, CAST(to_iso8601(date_add('DAY', orderkey % 2, date('2020-07-01'))) AS VARCHAR) AS ds, IF(orderkey % 2 = 1, 'A', 'B') status " + "FROM orders WHERE orderkey < 1000");
    ExtendedHiveMetastore metastore = replicateHiveMetastore((DistributedQueryRunner) queryRunner);
    MetastoreContext metastoreContext = new MetastoreContext(getSession().getUser(), getSession().getQueryId().getId(), Optional.empty(), Optional.empty(), Optional.empty(), false, HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
    Table table = metastore.getTable(metastoreContext, getSession().getSchema().get(), "test_metadata_aggregation_folding_with_two_partitions_columns").get();
    // Add one partition with no statistics.
    String partitionNameNoStats = "ds=2020-07-03/status=C";
    Partition partitionNoStats = createDummyPartition(table, partitionNameNoStats);
    metastore.addPartitions(metastoreContext, table.getDatabaseName(), table.getTableName(), ImmutableList.of(new PartitionWithStatistics(partitionNoStats, partitionNameNoStats, PartitionStatistics.empty())));
    try {
        // All matching partitions have stats. Enable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_two_partitions_columns WHERE ds BETWEEN '2020-07-01' AND '2020-07-02'", anyTree(project(ImmutableMap.of("min", expression("'2020-07-01'"), "max", expression("'2020-07-02'")), anyTree(values()))));
        // All matching partitions have stats. Enable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT MIN(status), MAX(ds) FROM test_metadata_aggregation_folding_with_two_partitions_columns WHERE ds BETWEEN '2020-07-01' AND '2020-07-02'", anyTree(project(ImmutableMap.of("min", expression("'A'"), "max", expression("'2020-07-02'")), anyTree(values()))));
        // All matching partitions have stats. Enable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT MIN(ds) ds, MIN(status) status FROM test_metadata_aggregation_folding_with_two_partitions_columns", anyTree(project(ImmutableMap.of("ds", expression("'2020-07-01'"), "status", expression("'A'")), anyTree(values()))));
        // Resulting partition doesn't have stats. Disable rewrite.
        assertPlan(optimizeMetadataQueries, "SELECT MAX(status) status FROM test_metadata_aggregation_folding_with_two_partitions_columns", anyTree(tableScanWithConstraint("test_metadata_aggregation_folding_with_two_partitions_columns", ImmutableMap.of("status", multipleValues(VarcharType.createVarcharType(1), utf8Slices("A", "B", "C")), "ds", multipleValues(VARCHAR, utf8Slices("2020-07-01", "2020-07-02", "2020-07-03"))))));
    } finally {
        queryRunner.execute("DROP TABLE IF EXISTS test_metadata_aggregation_folding_with_two_partitions_columns");
    }
}
Also used : Partition(com.facebook.presto.hive.metastore.Partition) Table(com.facebook.presto.hive.metastore.Table) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) QueryRunner(com.facebook.presto.testing.QueryRunner) DistributedQueryRunner(com.facebook.presto.tests.DistributedQueryRunner) Session(com.facebook.presto.Session) Test(org.testng.annotations.Test)

Aggregations

MetastoreContext (com.facebook.presto.hive.metastore.MetastoreContext)99 Table (com.facebook.presto.hive.metastore.Table)59 PrestoException (com.facebook.presto.spi.PrestoException)53 SchemaTableName (com.facebook.presto.spi.SchemaTableName)52 TableNotFoundException (com.facebook.presto.spi.TableNotFoundException)49 Column (com.facebook.presto.hive.metastore.Column)42 ImmutableList (com.google.common.collect.ImmutableList)41 ImmutableMap (com.google.common.collect.ImmutableMap)41 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)40 Map (java.util.Map)38 Optional (java.util.Optional)38 PrestoPrincipal (com.facebook.presto.spi.security.PrestoPrincipal)37 List (java.util.List)37 Set (java.util.Set)37 PartitionStatistics (com.facebook.presto.hive.metastore.PartitionStatistics)35 Objects.requireNonNull (java.util.Objects.requireNonNull)35 Type (com.facebook.presto.common.type.Type)34 HivePrivilegeInfo (com.facebook.presto.hive.metastore.HivePrivilegeInfo)34 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)34 Domain (com.facebook.presto.common.predicate.Domain)33