use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.
the class MetastoreUtil method getHiveBasicStatistics.
public static HiveBasicStatistics getHiveBasicStatistics(Map<String, String> parameters) {
OptionalLong numFiles = parse(parameters.get(NUM_FILES));
OptionalLong numRows = parse(parameters.get(NUM_ROWS));
OptionalLong inMemoryDataSizeInBytes = parse(parameters.get(RAW_DATA_SIZE));
OptionalLong onDiskDataSizeInBytes = parse(parameters.get(TOTAL_SIZE));
return new HiveBasicStatistics(numFiles, numRows, inMemoryDataSizeInBytes, onDiskDataSizeInBytes);
}
use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.
the class SemiTransactionalHiveMetastore method updatePartitionStatistics.
// For HiveBasicStatistics, we only overwrite the original statistics if the new one is not empty.
// For HiveColumnStatistics, we always overwrite every statistics.
// TODO: Collect file count, on-disk size and in-memory size during ANALYZE
private PartitionStatistics updatePartitionStatistics(PartitionStatistics oldPartitionStats, PartitionStatistics newPartitionStats) {
HiveBasicStatistics oldBasicStatistics = oldPartitionStats.getBasicStatistics();
HiveBasicStatistics newBasicStatistics = newPartitionStats.getBasicStatistics();
HiveBasicStatistics updatedBasicStatistics = new HiveBasicStatistics(firstPresent(newBasicStatistics.getFileCount(), oldBasicStatistics.getFileCount()), firstPresent(newBasicStatistics.getRowCount(), oldBasicStatistics.getRowCount()), firstPresent(newBasicStatistics.getInMemoryDataSizeInBytes(), oldBasicStatistics.getInMemoryDataSizeInBytes()), firstPresent(newBasicStatistics.getOnDiskDataSizeInBytes(), oldBasicStatistics.getOnDiskDataSizeInBytes()));
return new PartitionStatistics(updatedBasicStatistics, newPartitionStats.getColumnStatistics());
}
use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.
the class ThriftHiveMetastore method getTableStatistics.
@Override
public PartitionStatistics getTableStatistics(MetastoreContext metastoreContext, String databaseName, String tableName) {
Table table = getTable(metastoreContext, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
List<String> dataColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(toImmutableList());
HiveBasicStatistics basicStatistics = getHiveBasicStatistics(table.getParameters());
Map<String, HiveColumnStatistics> columnStatistics = getTableColumnStatistics(metastoreContext, databaseName, tableName, dataColumns, basicStatistics.getRowCount());
return new PartitionStatistics(basicStatistics, columnStatistics);
}
use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.
the class ThriftHiveMetastore method updateTableStatistics.
@Override
public synchronized void updateTableStatistics(MetastoreContext metastoreContext, String databaseName, String tableName, Function<PartitionStatistics, PartitionStatistics> update) {
PartitionStatistics currentStatistics = getTableStatistics(metastoreContext, databaseName, tableName);
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
Table originalTable = getTable(metastoreContext, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
Table modifiedTable = originalTable.deepCopy();
HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
modifiedTable.setParameters(updateStatisticsParameters(modifiedTable.getParameters(), basicStatistics));
alterTable(metastoreContext, databaseName, tableName, modifiedTable);
com.facebook.presto.hive.metastore.Table table = fromMetastoreApiTable(modifiedTable, metastoreContext.getColumnConverter());
OptionalLong rowCount = basicStatistics.getRowCount();
List<ColumnStatisticsObj> metastoreColumnStatistics = updatedStatistics.getColumnStatistics().entrySet().stream().map(entry -> createMetastoreColumnStatistics(entry.getKey(), table.getColumn(entry.getKey()).get().getType(), entry.getValue(), rowCount)).collect(toImmutableList());
if (!metastoreColumnStatistics.isEmpty()) {
setTableColumnStatistics(metastoreContext, databaseName, tableName, metastoreColumnStatistics);
}
Set<String> removedColumnStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet());
removedColumnStatistics.forEach(column -> deleteTableColumnStatistics(metastoreContext, databaseName, tableName, column));
}
use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.
the class FileHiveMetastore method getPartitionStatistics.
@Override
public synchronized Map<String, PartitionStatistics> getPartitionStatistics(MetastoreContext metastoreContext, String databaseName, String tableName, Set<String> partitionNames) {
Table table = getRequiredTable(metastoreContext, databaseName, tableName);
ImmutableMap.Builder<String, PartitionStatistics> statistics = ImmutableMap.builder();
for (String partitionName : partitionNames) {
List<String> partitionValues = extractPartitionValues(partitionName);
Path partitionDirectory = getPartitionMetadataDirectory(table, ImmutableList.copyOf(partitionValues));
PartitionMetadata partitionMetadata = readSchemaFile("partition", partitionDirectory, partitionCodec).orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partitionValues));
HiveBasicStatistics basicStatistics = getHiveBasicStatistics(partitionMetadata.getParameters());
statistics.put(partitionName, new PartitionStatistics(basicStatistics, partitionMetadata.getColumnStatistics()));
}
return statistics.build();
}
Aggregations