use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.
the class AbstractTestHiveClient method createDummyPartitionedTable.
protected void createDummyPartitionedTable(SchemaTableName tableName, List<ColumnMetadata> columns) throws Exception {
doCreateEmptyTable(tableName, ORC, columns);
ExtendedHiveMetastore metastoreClient = getMetastoreClient();
Table table = metastoreClient.getTable(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
List<String> firstPartitionValues = ImmutableList.of("2016-01-01");
List<String> secondPartitionValues = ImmutableList.of("2016-01-02");
String firstPartitionName = makePartName(ImmutableList.of("ds"), firstPartitionValues);
String secondPartitionName = makePartName(ImmutableList.of("ds"), secondPartitionValues);
List<PartitionWithStatistics> partitions = ImmutableList.of(firstPartitionName, secondPartitionName).stream().map(partitionName -> new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty())).collect(toImmutableList());
metastoreClient.addPartitions(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), partitions);
metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> EMPTY_TABLE_STATISTICS);
metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> EMPTY_TABLE_STATISTICS);
}
use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.
the class AbstractTestHiveClient method testStorePartitionWithStatistics.
protected void testStorePartitionWithStatistics(List<ColumnMetadata> columns, PartitionStatistics statsForAllColumns1, PartitionStatistics statsForAllColumns2, PartitionStatistics statsForSubsetOfColumns, PartitionStatistics emptyStatistics, Duration delayBetweenAlters) throws Exception {
SchemaTableName tableName = temporaryTable("store_partition_with_statistics");
try {
doCreateEmptyTable(tableName, ORC, columns);
ExtendedHiveMetastore metastoreClient = getMetastoreClient();
Table table = metastoreClient.getTable(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
List<String> partitionValues = ImmutableList.of("2016-01-01");
String partitionName = makePartName(ImmutableList.of("ds"), partitionValues);
Partition partition = createDummyPartition(table, partitionName);
// create partition with stats for all columns
metastoreClient.addPartitions(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(new PartitionWithStatistics(partition, partitionName, statsForAllColumns1)));
assertEquals(metastoreClient.getPartition(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), fromHiveStorageFormat(ORC));
assertThat(metastoreClient.getPartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns1));
sleep(delayBetweenAlters.toMillis());
// alter the partition into one with other stats
Partition modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(DWRF)).setLocation(partitionTargetPath(tableName, partitionName))).build();
metastoreClient.alterPartition(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForAllColumns2));
assertEquals(metastoreClient.getPartition(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), fromHiveStorageFormat(DWRF));
assertThat(metastoreClient.getPartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns2));
sleep(delayBetweenAlters.toMillis());
// alter the partition into one with stats for only subset of columns
modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
metastoreClient.alterPartition(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForSubsetOfColumns));
assertThat(metastoreClient.getPartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForSubsetOfColumns));
sleep(delayBetweenAlters.toMillis());
// alter the partition into one without stats
modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
metastoreClient.alterPartition(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, emptyStatistics));
assertThat(metastoreClient.getPartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, emptyStatistics));
} finally {
dropTable(tableName);
}
}
use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.
the class TestHiveLogicalPlanner method testMetadataAggregationFoldingWithEmptyPartitionsAndMetastoreThreshold.
@Test
public void testMetadataAggregationFoldingWithEmptyPartitionsAndMetastoreThreshold() {
QueryRunner queryRunner = getQueryRunner();
Session optimizeMetadataQueriesWithHighThreshold = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES, Boolean.toString(true)).setSystemProperty(OPTIMIZE_METADATA_QUERIES_CALL_THRESHOLD, Integer.toString(100)).build();
Session optimizeMetadataQueriesWithLowThreshold = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES, Boolean.toString(true)).setSystemProperty(OPTIMIZE_METADATA_QUERIES_CALL_THRESHOLD, Integer.toString(1)).build();
Session optimizeMetadataQueriesIgnoreStatsWithLowThreshold = Session.builder(this.getQueryRunner().getDefaultSession()).setSystemProperty(OPTIMIZE_METADATA_QUERIES_IGNORE_STATS, Boolean.toString(true)).setSystemProperty(OPTIMIZE_METADATA_QUERIES_CALL_THRESHOLD, Integer.toString(1)).build();
Session shufflePartitionColumns = Session.builder(this.getQueryRunner().getDefaultSession()).setCatalogSessionProperty(HIVE_CATALOG, SHUFFLE_PARTITIONED_COLUMNS_FOR_TABLE_WRITE, Boolean.toString(true)).build();
queryRunner.execute(shufflePartitionColumns, "CREATE TABLE test_metadata_aggregation_folding_with_empty_partitions_with_threshold WITH (partitioned_by = ARRAY['ds']) AS " + "SELECT orderkey, CAST(to_iso8601(date_add('DAY', orderkey % 2, date('2020-07-01'))) AS VARCHAR) AS ds FROM orders WHERE orderkey < 1000");
ExtendedHiveMetastore metastore = replicateHiveMetastore((DistributedQueryRunner) queryRunner);
MetastoreContext metastoreContext = new MetastoreContext(getSession().getUser(), getSession().getQueryId().getId(), Optional.empty(), Optional.empty(), Optional.empty(), false, HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
Table table = metastore.getTable(metastoreContext, getSession().getSchema().get(), "test_metadata_aggregation_folding_with_empty_partitions_with_threshold").get();
// Add one partition with no statistics.
String partitionNameNoStats = "ds=2020-07-20";
Partition partitionNoStats = createDummyPartition(table, partitionNameNoStats);
metastore.addPartitions(metastoreContext, table.getDatabaseName(), table.getTableName(), ImmutableList.of(new PartitionWithStatistics(partitionNoStats, partitionNameNoStats, PartitionStatistics.empty())));
// Add one partition with statistics indicating that it has no rows.
String emptyPartitionName = "ds=2020-06-30";
Partition emptyPartition = createDummyPartition(table, emptyPartitionName);
metastore.addPartitions(metastoreContext, table.getDatabaseName(), table.getTableName(), ImmutableList.of(new PartitionWithStatistics(emptyPartition, emptyPartitionName, PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(1, 0, 0, 0)).build())));
try {
// Test the non-reducible code path.
// Enable rewrite as all matching partitions have stats.
assertPlan(optimizeMetadataQueriesWithHighThreshold, "SELECT DISTINCT ds FROM test_metadata_aggregation_folding_with_empty_partitions_with_threshold WHERE ds BETWEEN '2020-07-01' AND '2020-07-03'", anyTree(values(ImmutableList.of("ds"), ImmutableList.of(ImmutableList.of(new StringLiteral("2020-07-01")), ImmutableList.of(new StringLiteral("2020-07-02"))))));
// All matching partitions have stats, Metastore threshold is reached, Disable rewrite
assertPlan(optimizeMetadataQueriesWithLowThreshold, "SELECT DISTINCT ds FROM test_metadata_aggregation_folding_with_empty_partitions_with_threshold WHERE ds BETWEEN '2020-07-01' AND '2020-07-03'", anyTree(tableScanWithConstraint("test_metadata_aggregation_folding_with_empty_partitions_with_threshold", ImmutableMap.of("ds", multipleValues(VARCHAR, utf8Slices("2020-07-01", "2020-07-02"))))));
// All matching partitions have stats, Metastore threshold is reached, but IgnoreStats will overwrite, Enable rewrite
assertPlan(optimizeMetadataQueriesIgnoreStatsWithLowThreshold, "SELECT DISTINCT ds FROM test_metadata_aggregation_folding_with_empty_partitions_with_threshold WHERE ds BETWEEN '2020-07-01' AND '2020-07-03'", anyTree(values(ImmutableList.of("ds"), ImmutableList.of(ImmutableList.of(new StringLiteral("2020-07-01")), ImmutableList.of(new StringLiteral("2020-07-02"))))));
} finally {
queryRunner.execute("DROP TABLE IF EXISTS test_metadata_aggregation_folding_with_empty_partitions_with_threshold");
}
}
use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.
the class TestHiveClientFileMetastore method createDummyPartitionedTable.
private void createDummyPartitionedTable(SchemaTableName tableName, List<ColumnMetadata> columns, Map<String, String> dynamicPartitionParameters) throws Exception {
doCreateEmptyTable(tableName, ORC, columns);
ExtendedHiveMetastore metastoreClient = getMetastoreClient();
Table table = metastoreClient.getTable(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
List<String> firstPartitionValues = ImmutableList.of("2020-01-01");
List<String> secondPartitionValues = ImmutableList.of("2020-01-02");
List<String> thirdPartitionValues = ImmutableList.of("2020-01-03");
String firstPartitionName = makePartName(ImmutableList.of("ds"), firstPartitionValues);
String secondPartitionName = makePartName(ImmutableList.of("ds"), secondPartitionValues);
String thirdPartitionName = makePartName(ImmutableList.of("ds"), thirdPartitionValues);
List<PartitionWithStatistics> partitions = ImmutableList.of(firstPartitionName, secondPartitionName).stream().map(partitionName -> new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty())).collect(toImmutableList());
metastoreClient.addPartitions(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), partitions);
metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> EMPTY_TABLE_STATISTICS);
metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> EMPTY_TABLE_STATISTICS);
List<PartitionWithStatistics> partitionsNotReadable = ImmutableList.of(thirdPartitionName).stream().map(partitionName -> new PartitionWithStatistics(createDummyPartition(table, partitionName, dynamicPartitionParameters), partitionName, PartitionStatistics.empty())).collect(toImmutableList());
metastoreClient.addPartitions(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), partitionsNotReadable);
metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), thirdPartitionName, currentStatistics -> EMPTY_TABLE_STATISTICS);
}
use of com.facebook.presto.hive.metastore.PartitionWithStatistics in project presto by prestodb.
the class ThriftHiveMetastore method addPartitions.
@Override
public void addPartitions(MetastoreContext metastoreContext, String databaseName, String tableName, List<PartitionWithStatistics> partitionsWithStatistics) {
List<Partition> partitions = partitionsWithStatistics.stream().map(part -> ThriftMetastoreUtil.toMetastoreApiPartition(part, metastoreContext.getColumnConverter())).collect(toImmutableList());
addPartitionsWithoutStatistics(metastoreContext, databaseName, tableName, partitions);
for (PartitionWithStatistics partitionWithStatistics : partitionsWithStatistics) {
storePartitionColumnStatistics(metastoreContext, databaseName, tableName, partitionWithStatistics.getPartitionName(), partitionWithStatistics);
}
}
Aggregations