Search in sources :

Example 6 with HiveBasicStatistics

use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.

the class MetastoreUtil method getHiveBasicStatistics.

public static HiveBasicStatistics getHiveBasicStatistics(Map<String, String> parameters) {
    OptionalLong numFiles = parse(parameters.get(NUM_FILES));
    OptionalLong numRows = parse(parameters.get(NUM_ROWS));
    OptionalLong inMemoryDataSizeInBytes = parse(parameters.get(RAW_DATA_SIZE));
    OptionalLong onDiskDataSizeInBytes = parse(parameters.get(TOTAL_SIZE));
    return new HiveBasicStatistics(numFiles, numRows, inMemoryDataSizeInBytes, onDiskDataSizeInBytes);
}
Also used : OptionalLong(java.util.OptionalLong) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics)

Example 7 with HiveBasicStatistics

use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.

the class SemiTransactionalHiveMetastore method updatePartitionStatistics.

// For HiveBasicStatistics, we only overwrite the original statistics if the new one is not empty.
// For HiveColumnStatistics, we always overwrite every statistics.
// TODO: Collect file count, on-disk size and in-memory size during ANALYZE
private PartitionStatistics updatePartitionStatistics(PartitionStatistics oldPartitionStats, PartitionStatistics newPartitionStats) {
    HiveBasicStatistics oldBasicStatistics = oldPartitionStats.getBasicStatistics();
    HiveBasicStatistics newBasicStatistics = newPartitionStats.getBasicStatistics();
    HiveBasicStatistics updatedBasicStatistics = new HiveBasicStatistics(firstPresent(newBasicStatistics.getFileCount(), oldBasicStatistics.getFileCount()), firstPresent(newBasicStatistics.getRowCount(), oldBasicStatistics.getRowCount()), firstPresent(newBasicStatistics.getInMemoryDataSizeInBytes(), oldBasicStatistics.getInMemoryDataSizeInBytes()), firstPresent(newBasicStatistics.getOnDiskDataSizeInBytes(), oldBasicStatistics.getOnDiskDataSizeInBytes()));
    return new PartitionStatistics(updatedBasicStatistics, newPartitionStats.getColumnStatistics());
}
Also used : HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics)

Example 8 with HiveBasicStatistics

use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.

the class ThriftHiveMetastore method getTableStatistics.

@Override
public PartitionStatistics getTableStatistics(MetastoreContext metastoreContext, String databaseName, String tableName) {
    Table table = getTable(metastoreContext, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
    List<String> dataColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(toImmutableList());
    HiveBasicStatistics basicStatistics = getHiveBasicStatistics(table.getParameters());
    Map<String, HiveColumnStatistics> columnStatistics = getTableColumnStatistics(metastoreContext, databaseName, tableName, dataColumns, basicStatistics.getRowCount());
    return new PartitionStatistics(basicStatistics, columnStatistics);
}
Also used : TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) Table(org.apache.hadoop.hive.metastore.api.Table) ThriftMetastoreUtil.fromMetastoreApiTable(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) MetastoreUtil.getHiveBasicStatistics(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) SchemaTableName(com.facebook.presto.spi.SchemaTableName)

Example 9 with HiveBasicStatistics

use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.

the class ThriftHiveMetastore method updateTableStatistics.

@Override
public synchronized void updateTableStatistics(MetastoreContext metastoreContext, String databaseName, String tableName, Function<PartitionStatistics, PartitionStatistics> update) {
    PartitionStatistics currentStatistics = getTableStatistics(metastoreContext, databaseName, tableName);
    PartitionStatistics updatedStatistics = update.apply(currentStatistics);
    Table originalTable = getTable(metastoreContext, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
    Table modifiedTable = originalTable.deepCopy();
    HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
    modifiedTable.setParameters(updateStatisticsParameters(modifiedTable.getParameters(), basicStatistics));
    alterTable(metastoreContext, databaseName, tableName, modifiedTable);
    com.facebook.presto.hive.metastore.Table table = fromMetastoreApiTable(modifiedTable, metastoreContext.getColumnConverter());
    OptionalLong rowCount = basicStatistics.getRowCount();
    List<ColumnStatisticsObj> metastoreColumnStatistics = updatedStatistics.getColumnStatistics().entrySet().stream().map(entry -> createMetastoreColumnStatistics(entry.getKey(), table.getColumn(entry.getKey()).get().getType(), entry.getValue(), rowCount)).collect(toImmutableList());
    if (!metastoreColumnStatistics.isEmpty()) {
        setTableColumnStatistics(metastoreContext, databaseName, tableName, metastoreColumnStatistics);
    }
    Set<String> removedColumnStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet());
    removedColumnStatistics.forEach(column -> deleteTableColumnStatistics(metastoreContext, databaseName, tableName, column));
}
Also used : SchemaAlreadyExistsException(com.facebook.presto.hive.SchemaAlreadyExistsException) EXCLUSIVE(org.apache.hadoop.hive.metastore.api.LockType.EXCLUSIVE) NUMBER_OF_NON_NULL_VALUES(com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics) PrestoPrincipal(com.facebook.presto.spi.security.PrestoPrincipal) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) Throwables.throwIfUnchecked(com.google.common.base.Throwables.throwIfUnchecked) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InetAddress(java.net.InetAddress) MetastoreUtil.getHiveBasicStatistics(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics) Sets.difference(com.google.common.collect.Sets.difference) MAX_VALUE_SIZE_IN_BYTES(com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE_SIZE_IN_BYTES) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) ThriftMetastoreUtil.toMetastoreApiPartition(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPartition) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) TableAlreadyExistsException(com.facebook.presto.hive.TableAlreadyExistsException) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) ThreadSafe(javax.annotation.concurrent.ThreadSafe) TypeUtils.isNumericType(com.facebook.presto.common.type.TypeUtils.isNumericType) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) MIN_VALUE(com.facebook.presto.spi.statistics.ColumnStatisticType.MIN_VALUE) NUMBER_OF_DISTINCT_VALUES(com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES) HivePrivilegeInfo(com.facebook.presto.hive.metastore.HivePrivilegeInfo) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) Iterables(com.google.common.collect.Iterables) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) Flatten(org.weakref.jmx.Flatten) ACQUIRED(org.apache.hadoop.hive.metastore.api.LockState.ACQUIRED) HIVE_FILTER_FIELD_PARAMS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS) Callable(java.util.concurrent.Callable) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) MetastoreUtil.convertPredicateToParts(com.facebook.presto.hive.metastore.MetastoreUtil.convertPredicateToParts) HiveBasicStatistics.createEmptyStatistics(com.facebook.presto.hive.HiveBasicStatistics.createEmptyStatistics) DATE(com.facebook.presto.common.type.DateType.DATE) OptionalLong(java.util.OptionalLong) Lists(com.google.common.collect.Lists) Managed(org.weakref.jmx.Managed) LockState(org.apache.hadoop.hive.metastore.api.LockState) UnlockRequest(org.apache.hadoop.hive.metastore.api.UnlockRequest) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) ArrayType(com.facebook.presto.common.type.ArrayType) CheckLockRequest(org.apache.hadoop.hive.metastore.api.CheckLockRequest) RetryDriver(com.facebook.presto.hive.RetryDriver) PrivilegeGrantInfo(org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo) UnknownDBException(org.apache.hadoop.hive.metastore.api.UnknownDBException) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) TException(org.apache.thrift.TException) PrincipalType(org.apache.hadoop.hive.metastore.api.PrincipalType) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Domain(com.facebook.presto.common.predicate.Domain) Table(org.apache.hadoop.hive.metastore.api.Table) HivePrivilege(com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege) ColumnStatisticType(com.facebook.presto.spi.statistics.ColumnStatisticType) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) TableType(org.apache.hadoop.hive.metastore.TableType) HiveObjectRef(org.apache.hadoop.hive.metastore.api.HiveObjectRef) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) RowType(com.facebook.presto.common.type.RowType) NUMBER_OF_TRUE_VALUES(com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_TRUE_VALUES) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) PartitionNotFoundException(com.facebook.presto.hive.PartitionNotFoundException) LockRequest(org.apache.hadoop.hive.metastore.api.LockRequest) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) HIVE_METASTORE_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_METASTORE_ERROR) ThriftMetastoreUtil.fromMetastoreApiPrincipalType(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiPrincipalType) SchemaNotFoundException(com.facebook.presto.spi.SchemaNotFoundException) LockComponent(org.apache.hadoop.hive.metastore.api.LockComponent) PRESTO_VIEW_FLAG(com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_VIEW_FLAG) HiveViewNotSupportedException(com.facebook.presto.hive.HiveViewNotSupportedException) Collectors.toSet(java.util.stream.Collectors.toSet) PrivilegeBag(org.apache.hadoop.hive.metastore.api.PrivilegeBag) ImmutableSet(com.google.common.collect.ImmutableSet) WAITING(org.apache.hadoop.hive.metastore.api.LockState.WAITING) ImmutableMap(com.google.common.collect.ImmutableMap) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) LockResponse(org.apache.hadoop.hive.metastore.api.LockResponse) MAX_VALUE(com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE) Collectors(java.util.stream.Collectors) ThriftMetastoreUtil.createMetastoreColumnStatistics(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.createMetastoreColumnStatistics) String.format(java.lang.String.format) List(java.util.List) RoleGrant(com.facebook.presto.spi.security.RoleGrant) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) ThriftMetastoreUtil.parsePrivilege(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.parsePrivilege) HiveObjectPrivilege(org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege) TOTAL_SIZE_IN_BYTES(com.facebook.presto.spi.statistics.ColumnStatisticType.TOTAL_SIZE_IN_BYTES) ThriftMetastoreUtil.fromMetastoreApiTable(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable) MapType(com.facebook.presto.common.type.MapType) Column(com.facebook.presto.hive.metastore.Column) HiveType(com.facebook.presto.hive.HiveType) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PrestoException(com.facebook.presto.spi.PrestoException) Partition(org.apache.hadoop.hive.metastore.api.Partition) Function(java.util.function.Function) OWNERSHIP(com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP) Inject(javax.inject.Inject) HashSet(java.util.HashSet) LockLevel(org.apache.hadoop.hive.metastore.api.LockLevel) ImmutableList(com.google.common.collect.ImmutableList) ALREADY_EXISTS(com.facebook.presto.spi.StandardErrorCode.ALREADY_EXISTS) Objects.requireNonNull(java.util.Objects.requireNonNull) MetastoreUtil.updateStatisticsParameters(com.facebook.presto.hive.metastore.MetastoreUtil.updateStatisticsParameters) MetastoreClientConfig(com.facebook.presto.hive.MetastoreClientConfig) Type(com.facebook.presto.common.type.Type) ThriftMetastoreUtil.fromRolePrincipalGrants(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromRolePrincipalGrants) USER(com.facebook.presto.spi.security.PrincipalType.USER) TABLE(org.apache.hadoop.hive.metastore.api.HiveObjectType.TABLE) Iterator(java.util.Iterator) ThriftMetastoreUtil.fromPrestoPrincipalType(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromPrestoPrincipalType) UnknownTableException(org.apache.hadoop.hive.metastore.api.UnknownTableException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) Database(org.apache.hadoop.hive.metastore.api.Database) Table(org.apache.hadoop.hive.metastore.api.Table) ThriftMetastoreUtil.fromMetastoreApiTable(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable) MetastoreUtil.getHiveBasicStatistics(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) SchemaTableName(com.facebook.presto.spi.SchemaTableName) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) OptionalLong(java.util.OptionalLong)

Example 10 with HiveBasicStatistics

use of com.facebook.presto.hive.HiveBasicStatistics in project presto by prestodb.

the class FileHiveMetastore method getPartitionStatistics.

@Override
public synchronized Map<String, PartitionStatistics> getPartitionStatistics(MetastoreContext metastoreContext, String databaseName, String tableName, Set<String> partitionNames) {
    Table table = getRequiredTable(metastoreContext, databaseName, tableName);
    ImmutableMap.Builder<String, PartitionStatistics> statistics = ImmutableMap.builder();
    for (String partitionName : partitionNames) {
        List<String> partitionValues = extractPartitionValues(partitionName);
        Path partitionDirectory = getPartitionMetadataDirectory(table, ImmutableList.copyOf(partitionValues));
        PartitionMetadata partitionMetadata = readSchemaFile("partition", partitionDirectory, partitionCodec).orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partitionValues));
        HiveBasicStatistics basicStatistics = getHiveBasicStatistics(partitionMetadata.getParameters());
        statistics.put(partitionName, new PartitionStatistics(basicStatistics, partitionMetadata.getColumnStatistics()));
    }
    return statistics.build();
}
Also used : Path(org.apache.hadoop.fs.Path) PartitionNotFoundException(com.facebook.presto.hive.PartitionNotFoundException) Table(com.facebook.presto.hive.metastore.Table) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) MetastoreUtil.getHiveBasicStatistics(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ImmutableMap(com.google.common.collect.ImmutableMap)

Aggregations

HiveBasicStatistics (com.facebook.presto.hive.HiveBasicStatistics)17 PartitionStatistics (com.facebook.presto.hive.metastore.PartitionStatistics)11 MetastoreUtil.getHiveBasicStatistics (com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics)9 SchemaTableName (com.facebook.presto.spi.SchemaTableName)7 TableNotFoundException (com.facebook.presto.spi.TableNotFoundException)7 HiveColumnStatistics (com.facebook.presto.hive.metastore.HiveColumnStatistics)6 PrestoException (com.facebook.presto.spi.PrestoException)6 Column (com.facebook.presto.hive.metastore.Column)5 Domain (com.facebook.presto.common.predicate.Domain)4 Type (com.facebook.presto.common.type.Type)4 HIVE_METASTORE_ERROR (com.facebook.presto.hive.HiveErrorCode.HIVE_METASTORE_ERROR)4 HiveType (com.facebook.presto.hive.HiveType)4 PartitionNotFoundException (com.facebook.presto.hive.PartitionNotFoundException)4 HivePrivilegeInfo (com.facebook.presto.hive.metastore.HivePrivilegeInfo)4 MetastoreContext (com.facebook.presto.hive.metastore.MetastoreContext)4 MetastoreUtil.convertPredicateToParts (com.facebook.presto.hive.metastore.MetastoreUtil.convertPredicateToParts)4 PartitionWithStatistics (com.facebook.presto.hive.metastore.PartitionWithStatistics)4 ThriftMetastoreUtil.fromMetastoreApiTable (com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable)4 OptionalLong (java.util.OptionalLong)4 Test (org.testng.annotations.Test)4