use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class AbstractTestHiveClient method eraseStatistics.
private void eraseStatistics(SchemaTableName schemaTableName) {
ExtendedHiveMetastore metastoreClient = getMetastoreClient();
metastoreClient.updateTableStatistics(METASTORE_CONTEXT, schemaTableName.getSchemaName(), schemaTableName.getTableName(), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()));
Table table = metastoreClient.getTable(METASTORE_CONTEXT, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
List<String> partitionColumns = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
if (!table.getPartitionColumns().isEmpty()) {
List<String> partitionNames = metastoreClient.getPartitionNames(METASTORE_CONTEXT, schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElse(ImmutableList.of());
List<Partition> partitions = metastoreClient.getPartitionsByNames(METASTORE_CONTEXT, schemaTableName.getSchemaName(), schemaTableName.getTableName(), partitionNames).entrySet().stream().map(Map.Entry::getValue).filter(Optional::isPresent).map(Optional::get).collect(toImmutableList());
for (Partition partition : partitions) {
metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, schemaTableName.getSchemaName(), schemaTableName.getTableName(), makePartName(partitionColumns, partition.getValues()), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()));
}
}
}
use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class TestAbstractDwrfEncryptionInformationSource method testGetReadEncryptionInformationForPartitionedTableWithTableLevelEncryption.
@Test
public void testGetReadEncryptionInformationForPartitionedTableWithTableLevelEncryption() {
Table table = createTable(DWRF, Optional.of(forTable("table_level_key", "algo", "provider")), true);
Optional<Map<String, EncryptionInformation>> encryptionInformation = encryptionInformationSource.getReadEncryptionInformation(SESSION, table, Optional.of(ImmutableSet.of(// hiveColumnIndex value does not matter in this test
new HiveColumnHandle("col_bigint", HIVE_LONG, HIVE_LONG.getTypeSignature(), 0, REGULAR, Optional.empty(), Optional.empty()), new HiveColumnHandle("col_struct", STRUCT_TYPE, STRUCT_TYPE.getTypeSignature(), 0, REGULAR, Optional.empty(), ImmutableList.of(new Subfield("col_struct.a"), new Subfield("col_struct.b.b2")), Optional.empty()))), ImmutableMap.of("ds=2020-01-01", new Partition("dbName", "tableName", ImmutableList.of("2020-01-01"), table.getStorage(), table.getDataColumns(), ImmutableMap.of(), Optional.empty(), false, true, 0), "ds=2020-01-02", new Partition("dbName", "tableName", ImmutableList.of("2020-01-02"), table.getStorage(), table.getDataColumns(), ImmutableMap.of(), Optional.empty(), false, true, 0)));
assertTrue(encryptionInformation.isPresent());
assertEquals(encryptionInformation.get(), ImmutableMap.of("ds=2020-01-01", EncryptionInformation.fromEncryptionMetadata(DwrfEncryptionMetadata.forTable("table_level_key".getBytes(), ImmutableMap.of(TEST_EXTRA_METADATA, "ds=2020-01-01"), "algo", "provider")), "ds=2020-01-02", EncryptionInformation.fromEncryptionMetadata(DwrfEncryptionMetadata.forTable("table_level_key".getBytes(), ImmutableMap.of(TEST_EXTRA_METADATA, "ds=2020-01-02"), "algo", "provider"))));
}
use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class AbstractTestHiveClient method doInsertIntoNewPartition.
private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTableName tableName, PageSinkContext pageSinkContext) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
// insert the data
String queryId = insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
Set<String> existingFiles;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), false, DEFAULT_COLUMN_CONVERTER_PROVIDER);
// verify partitions were created
List<String> partitionNames = transaction.getMetastore().getPartitionNames(metastoreContext, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
// verify the node versions in partitions
Map<String, Optional<Partition>> partitions = getMetastoreClient().getPartitionsByNames(metastoreContext, tableName.getSchemaName(), tableName.getTableName(), partitionNames);
assertEquals(partitions.size(), partitionNames.size());
for (String partitionName : partitionNames) {
Partition partition = partitions.get(partitionName).get();
assertEquals(partition.getParameters().get(PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
assertEquals(partition.getParameters().get(PRESTO_QUERY_ID_NAME), queryId);
}
// load the new table
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// test rollback
existingFiles = listAllDataFiles(metastoreContext, transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(existingFiles.isEmpty());
// test statistics
for (String partitionName : partitionNames) {
HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(metastoreContext, transaction, tableName, partitionName);
assertEquals(partitionStatistics.getRowCount().getAsLong(), 1L);
assertEquals(partitionStatistics.getFileCount().getAsLong(), 1L);
assertGreaterThan(partitionStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertGreaterThan(partitionStatistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
Path stagingPathRoot;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// "stage" insert data
HiveInsertTableHandle insertTableHandle = (HiveInsertTableHandle) metadata.beginInsert(session, tableHandle);
stagingPathRoot = getStagingPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle, pageSinkContext);
sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
if (pageSinkContext.isCommitRequired()) {
assertValidPageSinkCommitFragments(fragments);
metadata.commitPageSinkAsync(session, insertTableHandle, fragments).get();
}
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
// verify all temp files start with the unique prefix
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName(), insertTableHandle.getLocationHandle().getTargetPath().toString(), false);
Set<String> tempFiles = listAllDataFiles(context, getStagingPathRoot(insertTableHandle));
assertTrue(!tempFiles.isEmpty());
for (String filePath : tempFiles) {
assertTrue(new Path(filePath).getName().startsWith(session.getQueryId()));
}
// rollback insert
transaction.rollback();
}
// verify the data is unchanged
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, newSession(), TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
// verify we did not modify the table directory
MetastoreContext metastoreContext = new MetastoreContext(session.getIdentity(), session.getQueryId(), session.getClientInfo(), session.getSource(), getMetastoreHeaders(session), false, DEFAULT_COLUMN_CONVERTER_PROVIDER);
assertEquals(listAllDataFiles(metastoreContext, transaction, tableName.getSchemaName(), tableName.getTableName()), existingFiles);
// verify temp directory is empty
HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName(), stagingPathRoot.toString(), false);
assertTrue(listAllDataFiles(context, stagingPathRoot).isEmpty());
}
}
use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class TestHiveLogicalPlanner method testMetadataAggregationFoldingWithEmptyPartitions.
@Test
public void testMetadataAggregationFoldingWithEmptyPartitions() {
QueryRunner queryRunner = getQueryRunner();
Session optimizeMetadataQueries = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES, Boolean.toString(true)).build();
Session optimizeMetadataQueriesIgnoreStats = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES_IGNORE_STATS, Boolean.toString(true)).build();
Session shufflePartitionColumns = Session.builder(this.getQueryRunner().getDefaultSession()).setCatalogSessionProperty(HIVE_CATALOG, SHUFFLE_PARTITIONED_COLUMNS_FOR_TABLE_WRITE, Boolean.toString(true)).build();
queryRunner.execute(shufflePartitionColumns, "CREATE TABLE test_metadata_aggregation_folding_with_empty_partitions WITH (partitioned_by = ARRAY['ds']) AS " + "SELECT orderkey, CAST(to_iso8601(date_add('DAY', orderkey % 2, date('2020-07-01'))) AS VARCHAR) AS ds FROM orders WHERE orderkey < 1000");
ExtendedHiveMetastore metastore = replicateHiveMetastore((DistributedQueryRunner) queryRunner);
MetastoreContext metastoreContext = new MetastoreContext(getSession().getUser(), getSession().getQueryId().getId(), Optional.empty(), Optional.empty(), Optional.empty(), false, HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
Table table = metastore.getTable(metastoreContext, getSession().getSchema().get(), "test_metadata_aggregation_folding_with_empty_partitions").get();
// Add one partition with no statistics.
String partitionNameNoStats = "ds=2020-07-20";
Partition partitionNoStats = createDummyPartition(table, partitionNameNoStats);
metastore.addPartitions(metastoreContext, table.getDatabaseName(), table.getTableName(), ImmutableList.of(new PartitionWithStatistics(partitionNoStats, partitionNameNoStats, PartitionStatistics.empty())));
// Add one partition with statistics indicating that it has no rows.
String emptyPartitionName = "ds=2020-06-30";
Partition emptyPartition = createDummyPartition(table, emptyPartitionName);
metastore.addPartitions(metastoreContext, table.getDatabaseName(), table.getTableName(), ImmutableList.of(new PartitionWithStatistics(emptyPartition, emptyPartitionName, PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(1, 0, 0, 0)).build())));
try {
// Max ds doesn't have stats. Disable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT max(ds) from test_metadata_aggregation_folding_with_empty_partitions)", anyTree(anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions")), anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions"))));
// Ignore metastore stats. Enable rewrite.
assertPlan(optimizeMetadataQueriesIgnoreStats, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT max(ds) from test_metadata_aggregation_folding_with_empty_partitions)", anyTree(join(INNER, ImmutableList.of(), tableScan("test_metadata_aggregation_folding_with_empty_partitions", getSingleValueColumnDomain("ds", "2020-07-20"), TRUE_CONSTANT, ImmutableSet.of("ds")), anyTree(any()))));
// Max ds matching the filter has stats. Enable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT max(ds) from test_metadata_aggregation_folding_with_empty_partitions WHERE ds <= '2020-07-02')", anyTree(join(INNER, ImmutableList.of(), tableScan("test_metadata_aggregation_folding_with_empty_partitions", getSingleValueColumnDomain("ds", "2020-07-02"), TRUE_CONSTANT, ImmutableSet.of("ds")), anyTree(any()))));
// Min ds partition stats indicates that it is an empty partition. Disable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT min(ds) from test_metadata_aggregation_folding_with_empty_partitions)", anyTree(anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions")), anyTree(PlanMatchPattern.tableScan("test_metadata_aggregation_folding_with_empty_partitions"))));
// Min ds partition matching the filter has non-empty stats. Enable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT * FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds = (SELECT min(ds) from test_metadata_aggregation_folding_with_empty_partitions WHERE ds >= '2020-07-01')", anyTree(join(INNER, ImmutableList.of(), tableScan("test_metadata_aggregation_folding_with_empty_partitions", getSingleValueColumnDomain("ds", "2020-07-01"), TRUE_CONSTANT, ImmutableSet.of("ds")), anyTree(any()))));
// Test the non-reducible code path.
// Disable rewrite as there are partitions with empty stats.
assertPlan(optimizeMetadataQueries, "SELECT DISTINCT ds FROM test_metadata_aggregation_folding_with_empty_partitions", anyTree(tableScanWithConstraint("test_metadata_aggregation_folding_with_empty_partitions", ImmutableMap.of("ds", multipleValues(VARCHAR, utf8Slices("2020-06-30", "2020-07-01", "2020-07-02", "2020-07-20"))))));
// Enable rewrite as all matching partitions have stats.
assertPlan(optimizeMetadataQueries, "SELECT DISTINCT ds FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-07-01' AND '2020-07-03'", anyTree(values(ImmutableList.of("ds"), ImmutableList.of(ImmutableList.of(new StringLiteral("2020-07-01")), ImmutableList.of(new StringLiteral("2020-07-02"))))));
// One of two resulting partitions doesn't have stats. Disable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-06-30' AND '2020-07-03'", anyTree(tableScanWithConstraint("test_metadata_aggregation_folding_with_empty_partitions", ImmutableMap.of("ds", multipleValues(VARCHAR, utf8Slices("2020-06-30", "2020-07-01", "2020-07-02"))))));
// Ignore metadata stats. Always enable rewrite.
assertPlan(optimizeMetadataQueriesIgnoreStats, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-06-30' AND '2020-07-03'", anyTree(project(ImmutableMap.of("min", expression("'2020-06-30'"), "max", expression("'2020-07-02'")), anyTree(values()))));
// Both resulting partitions have stats. Enable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_empty_partitions WHERE ds BETWEEN '2020-07-01' AND '2020-07-03'", anyTree(project(ImmutableMap.of("min", expression("'2020-07-01'"), "max", expression("'2020-07-02'")), anyTree(values()))));
} finally {
queryRunner.execute("DROP TABLE IF EXISTS test_metadata_aggregation_folding_with_empty_partitions");
}
}
use of com.facebook.presto.hive.metastore.Partition in project presto by prestodb.
the class TestHiveLogicalPlanner method testMetadataAggregationFoldingWithTwoPartitionColumns.
@Test
public void testMetadataAggregationFoldingWithTwoPartitionColumns() {
QueryRunner queryRunner = getQueryRunner();
Session optimizeMetadataQueries = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES, Boolean.toString(true)).build();
Session shufflePartitionColumns = Session.builder(this.getQueryRunner().getDefaultSession()).setCatalogSessionProperty(HIVE_CATALOG, SHUFFLE_PARTITIONED_COLUMNS_FOR_TABLE_WRITE, Boolean.toString(true)).build();
// Create a table with partitions: ds=2020-07-01/status=A, ds=2020-07-01/status=B, ds=2020-07-02/status=A, ds=2020-07-02/status=B
queryRunner.execute(shufflePartitionColumns, "CREATE TABLE test_metadata_aggregation_folding_with_two_partitions_columns WITH (partitioned_by = ARRAY['ds', 'status']) AS " + "SELECT orderkey, CAST(to_iso8601(date_add('DAY', orderkey % 2, date('2020-07-01'))) AS VARCHAR) AS ds, IF(orderkey % 2 = 1, 'A', 'B') status " + "FROM orders WHERE orderkey < 1000");
ExtendedHiveMetastore metastore = replicateHiveMetastore((DistributedQueryRunner) queryRunner);
MetastoreContext metastoreContext = new MetastoreContext(getSession().getUser(), getSession().getQueryId().getId(), Optional.empty(), Optional.empty(), Optional.empty(), false, HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
Table table = metastore.getTable(metastoreContext, getSession().getSchema().get(), "test_metadata_aggregation_folding_with_two_partitions_columns").get();
// Add one partition with no statistics.
String partitionNameNoStats = "ds=2020-07-03/status=C";
Partition partitionNoStats = createDummyPartition(table, partitionNameNoStats);
metastore.addPartitions(metastoreContext, table.getDatabaseName(), table.getTableName(), ImmutableList.of(new PartitionWithStatistics(partitionNoStats, partitionNameNoStats, PartitionStatistics.empty())));
try {
// All matching partitions have stats. Enable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT MIN(ds), MAX(ds) FROM test_metadata_aggregation_folding_with_two_partitions_columns WHERE ds BETWEEN '2020-07-01' AND '2020-07-02'", anyTree(project(ImmutableMap.of("min", expression("'2020-07-01'"), "max", expression("'2020-07-02'")), anyTree(values()))));
// All matching partitions have stats. Enable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT MIN(status), MAX(ds) FROM test_metadata_aggregation_folding_with_two_partitions_columns WHERE ds BETWEEN '2020-07-01' AND '2020-07-02'", anyTree(project(ImmutableMap.of("min", expression("'A'"), "max", expression("'2020-07-02'")), anyTree(values()))));
// All matching partitions have stats. Enable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT MIN(ds) ds, MIN(status) status FROM test_metadata_aggregation_folding_with_two_partitions_columns", anyTree(project(ImmutableMap.of("ds", expression("'2020-07-01'"), "status", expression("'A'")), anyTree(values()))));
// Resulting partition doesn't have stats. Disable rewrite.
assertPlan(optimizeMetadataQueries, "SELECT MAX(status) status FROM test_metadata_aggregation_folding_with_two_partitions_columns", anyTree(tableScanWithConstraint("test_metadata_aggregation_folding_with_two_partitions_columns", ImmutableMap.of("status", multipleValues(VarcharType.createVarcharType(1), utf8Slices("A", "B", "C")), "ds", multipleValues(VARCHAR, utf8Slices("2020-07-01", "2020-07-02", "2020-07-03"))))));
} finally {
queryRunner.execute("DROP TABLE IF EXISTS test_metadata_aggregation_folding_with_two_partitions_columns");
}
}
Aggregations