Search in sources :

Example 11 with HiveBasicStatistics

use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.

the class ThriftHiveMetastore method getPartitionStatistics.

@Override
public Map<String, PartitionStatistics> getPartitionStatistics(HiveIdentity identity, Table table, List<Partition> partitions) {
    List<String> dataColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(toImmutableList());
    List<String> partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName).collect(toImmutableList());
    Map<String, HiveBasicStatistics> partitionBasicStatistics = partitions.stream().collect(toImmutableMap(partition -> makePartName(partitionColumns, partition.getValues()), partition -> getHiveBasicStatistics(partition.getParameters())));
    Map<String, OptionalLong> partitionRowCounts = partitionBasicStatistics.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount()));
    Map<String, Map<String, HiveColumnStatistics>> partitionColumnStatistics = getPartitionColumnStatistics(identity, table.getDbName(), table.getTableName(), partitionBasicStatistics.keySet(), dataColumns, partitionRowCounts);
    ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
    for (String partitionName : partitionBasicStatistics.keySet()) {
        HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName);
        Map<String, HiveColumnStatistics> columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of());
        result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics));
    }
    return result.buildOrThrow();
}
Also used : ThriftMetastoreUtil.updateStatisticsParameters(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters) LockComponentBuilder(org.apache.hadoop.hive.metastore.LockComponentBuilder) USER(io.trino.spi.security.PrincipalType.USER) UNKNOWN_METHOD(org.apache.thrift.TApplicationException.UNKNOWN_METHOD) Throwables.throwIfUnchecked(com.google.common.base.Throwables.throwIfUnchecked) NoSuchTxnException(org.apache.hadoop.hive.metastore.api.NoSuchTxnException) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) Sets.difference(com.google.common.collect.Sets.difference) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) HiveIdentity(io.trino.plugin.hive.authentication.HiveIdentity) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) HivePartition(io.trino.plugin.hive.HivePartition) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) ThreadSafe(javax.annotation.concurrent.ThreadSafe) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) OWNERSHIP(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP) MetastoreUtil.partitionKeyFilterToStringList(io.trino.plugin.hive.metastore.MetastoreUtil.partitionKeyFilterToStringList) TRUE(java.lang.Boolean.TRUE) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) LockType(org.apache.hadoop.hive.metastore.api.LockType) TxnAbortedException(org.apache.hadoop.hive.metastore.api.TxnAbortedException) Verify.verifyNotNull(com.google.common.base.Verify.verifyNotNull) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) ConfigValSecurityException(org.apache.hadoop.hive.metastore.api.ConfigValSecurityException) HiveViewNotSupportedException(io.trino.plugin.hive.HiveViewNotSupportedException) Flatten(org.weakref.jmx.Flatten) ThriftMetastoreUtil.toMetastoreApiPartition(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPartition) HIVE_FILTER_FIELD_PARAMS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) PartitionNotFoundException(io.trino.plugin.hive.PartitionNotFoundException) ThriftMetastoreUtil.isAvroTableWithSchemaSet(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.isAvroTableWithSchemaSet) HIVE_TABLE_LOCK_NOT_ACQUIRED(io.trino.plugin.hive.HiveErrorCode.HIVE_TABLE_LOCK_NOT_ACQUIRED) ThriftMetastoreUtil.fromMetastoreApiPrincipalType(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiPrincipalType) DataOperationType(org.apache.hadoop.hive.metastore.api.DataOperationType) HiveType(io.trino.plugin.hive.HiveType) OptionalLong(java.util.OptionalLong) Managed(org.weakref.jmx.Managed) LockState(org.apache.hadoop.hive.metastore.api.LockState) SchemaAlreadyExistsException(io.trino.plugin.hive.SchemaAlreadyExistsException) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) PrivilegeGrantInfo(org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo) UnknownDBException(org.apache.hadoop.hive.metastore.api.UnknownDBException) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) TException(org.apache.thrift.TException) ThriftMetastoreAuthenticationType(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreAuthenticationConfig.ThriftMetastoreAuthenticationType) PrincipalType(org.apache.hadoop.hive.metastore.api.PrincipalType) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Throwables.throwIfInstanceOf(com.google.common.base.Throwables.throwIfInstanceOf) RoleGrant(io.trino.spi.security.RoleGrant) Table(org.apache.hadoop.hive.metastore.api.Table) System.nanoTime(java.lang.System.nanoTime) TableType(org.apache.hadoop.hive.metastore.TableType) HiveObjectRef(org.apache.hadoop.hive.metastore.api.HiveObjectRef) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) HivePrivilege(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) LockRequest(org.apache.hadoop.hive.metastore.api.LockRequest) AcidOperation(io.trino.plugin.hive.acid.AcidOperation) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) Duration(io.airlift.units.Duration) NonEvictableLoadingCache(io.trino.collect.cache.NonEvictableLoadingCache) AcidTransactionOwner(io.trino.plugin.hive.metastore.AcidTransactionOwner) ThriftMetastoreUtil.fromMetastoreApiTable(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Throwables.propagateIfPossible(com.google.common.base.Throwables.propagateIfPossible) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ALREADY_EXISTS(io.trino.spi.StandardErrorCode.ALREADY_EXISTS) Path(org.apache.hadoop.fs.Path) LockComponent(org.apache.hadoop.hive.metastore.api.LockComponent) ThriftMetastoreUtil.fromTrinoPrincipalType(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromTrinoPrincipalType) PrivilegeBag(org.apache.hadoop.hive.metastore.api.PrivilegeBag) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) NoSuchLockException(org.apache.hadoop.hive.metastore.api.NoSuchLockException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) LockRequestBuilder(org.apache.hadoop.hive.metastore.LockRequestBuilder) Predicate(java.util.function.Predicate) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) LockResponse(org.apache.hadoop.hive.metastore.api.LockResponse) TableAlreadyExistsException(io.trino.plugin.hive.TableAlreadyExistsException) TrinoException(io.trino.spi.TrinoException) String.format(java.lang.String.format) CacheLoader(com.google.common.cache.CacheLoader) SafeCaches.buildNonEvictableCache(io.trino.collect.cache.SafeCaches.buildNonEvictableCache) PRESTO_VIEW_FLAG(io.trino.plugin.hive.ViewReaderUtil.PRESTO_VIEW_FLAG) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) Pattern(java.util.regex.Pattern) HivePrivilegeInfo(io.trino.plugin.hive.metastore.HivePrivilegeInfo) RetryDriver(io.trino.plugin.hive.util.RetryDriver) HiveObjectPrivilege(org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege) Logger(io.airlift.log.Logger) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) Type(io.trino.spi.type.Type) Partition(org.apache.hadoop.hive.metastore.api.Partition) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) TxnToWriteId(org.apache.hadoop.hive.metastore.api.TxnToWriteId) Inject(javax.inject.Inject) HashSet(java.util.HashSet) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) LockLevel(org.apache.hadoop.hive.metastore.api.LockLevel) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) HIVE_METASTORE_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) Objects.requireNonNull(java.util.Objects.requireNonNull) TApplicationException(org.apache.thrift.TApplicationException) FALSE(java.lang.Boolean.FALSE) TABLE(org.apache.hadoop.hive.metastore.api.HiveObjectType.TABLE) Iterator(java.util.Iterator) UnknownTableException(org.apache.hadoop.hive.metastore.api.UnknownTableException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) TupleDomain(io.trino.spi.predicate.TupleDomain) ThriftMetastoreUtil.fromRolePrincipalGrants(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromRolePrincipalGrants) ThriftMetastoreUtil.createMetastoreColumnStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.createMetastoreColumnStatistics) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ThriftMetastoreUtil.parsePrivilege(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.parsePrivilege) Closeable(java.io.Closeable) Database(org.apache.hadoop.hive.metastore.api.Database) Collections(java.util.Collections) HiveConfig(io.trino.plugin.hive.HiveConfig) SECONDS(java.util.concurrent.TimeUnit.SECONDS) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) OptionalLong(java.util.OptionalLong) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 12 with HiveBasicStatistics

use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.

the class ThriftHiveMetastore method updatePartitionStatistics.

@Override
public void updatePartitionStatistics(HiveIdentity identity, Table table, String partitionName, Function<PartitionStatistics, PartitionStatistics> update) {
    List<Partition> partitions = getPartitionsByNames(identity, table.getDbName(), table.getTableName(), ImmutableList.of(partitionName));
    if (partitions.size() != 1) {
        throw new TrinoException(HIVE_METASTORE_ERROR, "Metastore returned multiple partitions for name: " + partitionName);
    }
    Partition originalPartition = getOnlyElement(partitions);
    PartitionStatistics currentStatistics = requireNonNull(getPartitionStatistics(identity, table, partitions).get(partitionName), "getPartitionStatistics() did not return statistics for partition");
    PartitionStatistics updatedStatistics = update.apply(currentStatistics);
    Partition modifiedPartition = originalPartition.deepCopy();
    HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
    modifiedPartition.setParameters(updateStatisticsParameters(modifiedPartition.getParameters(), basicStatistics));
    alterPartitionWithoutStatistics(identity, table.getDbName(), table.getTableName(), modifiedPartition);
    Map<String, HiveType> columns = modifiedPartition.getSd().getCols().stream().collect(toImmutableMap(FieldSchema::getName, schema -> HiveType.valueOf(schema.getType())));
    setPartitionColumnStatistics(identity, table.getDbName(), table.getTableName(), partitionName, columns, updatedStatistics.getColumnStatistics(), basicStatistics.getRowCount());
    Set<String> removedStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet());
    removedStatistics.forEach(column -> deletePartitionColumnStatistics(identity, table.getDbName(), table.getTableName(), partitionName, column));
}
Also used : ThriftMetastoreUtil.updateStatisticsParameters(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters) LockComponentBuilder(org.apache.hadoop.hive.metastore.LockComponentBuilder) USER(io.trino.spi.security.PrincipalType.USER) UNKNOWN_METHOD(org.apache.thrift.TApplicationException.UNKNOWN_METHOD) Throwables.throwIfUnchecked(com.google.common.base.Throwables.throwIfUnchecked) NoSuchTxnException(org.apache.hadoop.hive.metastore.api.NoSuchTxnException) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) Sets.difference(com.google.common.collect.Sets.difference) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Column(io.trino.plugin.hive.metastore.Column) Map(java.util.Map) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) HiveIdentity(io.trino.plugin.hive.authentication.HiveIdentity) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ConnectorIdentity(io.trino.spi.security.ConnectorIdentity) HivePartition(io.trino.plugin.hive.HivePartition) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) ThreadSafe(javax.annotation.concurrent.ThreadSafe) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) SchemaTableName(io.trino.spi.connector.SchemaTableName) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Stream(java.util.stream.Stream) OWNERSHIP(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP) MetastoreUtil.partitionKeyFilterToStringList(io.trino.plugin.hive.metastore.MetastoreUtil.partitionKeyFilterToStringList) TRUE(java.lang.Boolean.TRUE) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) LockType(org.apache.hadoop.hive.metastore.api.LockType) TxnAbortedException(org.apache.hadoop.hive.metastore.api.TxnAbortedException) Verify.verifyNotNull(com.google.common.base.Verify.verifyNotNull) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) ConfigValSecurityException(org.apache.hadoop.hive.metastore.api.ConfigValSecurityException) HiveViewNotSupportedException(io.trino.plugin.hive.HiveViewNotSupportedException) Flatten(org.weakref.jmx.Flatten) ThriftMetastoreUtil.toMetastoreApiPartition(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPartition) HIVE_FILTER_FIELD_PARAMS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS) Strings.isNullOrEmpty(com.google.common.base.Strings.isNullOrEmpty) PartitionNotFoundException(io.trino.plugin.hive.PartitionNotFoundException) ThriftMetastoreUtil.isAvroTableWithSchemaSet(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.isAvroTableWithSchemaSet) HIVE_TABLE_LOCK_NOT_ACQUIRED(io.trino.plugin.hive.HiveErrorCode.HIVE_TABLE_LOCK_NOT_ACQUIRED) ThriftMetastoreUtil.fromMetastoreApiPrincipalType(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiPrincipalType) DataOperationType(org.apache.hadoop.hive.metastore.api.DataOperationType) HiveType(io.trino.plugin.hive.HiveType) OptionalLong(java.util.OptionalLong) Managed(org.weakref.jmx.Managed) LockState(org.apache.hadoop.hive.metastore.api.LockState) SchemaAlreadyExistsException(io.trino.plugin.hive.SchemaAlreadyExistsException) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) PrivilegeGrantInfo(org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo) UnknownDBException(org.apache.hadoop.hive.metastore.api.UnknownDBException) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) TException(org.apache.thrift.TException) ThriftMetastoreAuthenticationType(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreAuthenticationConfig.ThriftMetastoreAuthenticationType) PrincipalType(org.apache.hadoop.hive.metastore.api.PrincipalType) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Throwables.throwIfInstanceOf(com.google.common.base.Throwables.throwIfInstanceOf) RoleGrant(io.trino.spi.security.RoleGrant) Table(org.apache.hadoop.hive.metastore.api.Table) System.nanoTime(java.lang.System.nanoTime) TableType(org.apache.hadoop.hive.metastore.TableType) HiveObjectRef(org.apache.hadoop.hive.metastore.api.HiveObjectRef) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) HivePrivilege(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) LockRequest(org.apache.hadoop.hive.metastore.api.LockRequest) AcidOperation(io.trino.plugin.hive.acid.AcidOperation) SchemaNotFoundException(io.trino.spi.connector.SchemaNotFoundException) Duration(io.airlift.units.Duration) NonEvictableLoadingCache(io.trino.collect.cache.NonEvictableLoadingCache) AcidTransactionOwner(io.trino.plugin.hive.metastore.AcidTransactionOwner) ThriftMetastoreUtil.fromMetastoreApiTable(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Throwables.propagateIfPossible(com.google.common.base.Throwables.propagateIfPossible) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ALREADY_EXISTS(io.trino.spi.StandardErrorCode.ALREADY_EXISTS) Path(org.apache.hadoop.fs.Path) LockComponent(org.apache.hadoop.hive.metastore.api.LockComponent) ThriftMetastoreUtil.fromTrinoPrincipalType(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromTrinoPrincipalType) PrivilegeBag(org.apache.hadoop.hive.metastore.api.PrivilegeBag) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) NoSuchLockException(org.apache.hadoop.hive.metastore.api.NoSuchLockException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) LockRequestBuilder(org.apache.hadoop.hive.metastore.LockRequestBuilder) Predicate(java.util.function.Predicate) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) LockResponse(org.apache.hadoop.hive.metastore.api.LockResponse) TableAlreadyExistsException(io.trino.plugin.hive.TableAlreadyExistsException) TrinoException(io.trino.spi.TrinoException) String.format(java.lang.String.format) CacheLoader(com.google.common.cache.CacheLoader) SafeCaches.buildNonEvictableCache(io.trino.collect.cache.SafeCaches.buildNonEvictableCache) PRESTO_VIEW_FLAG(io.trino.plugin.hive.ViewReaderUtil.PRESTO_VIEW_FLAG) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) Optional(java.util.Optional) CacheBuilder(com.google.common.cache.CacheBuilder) Pattern(java.util.regex.Pattern) HivePrivilegeInfo(io.trino.plugin.hive.metastore.HivePrivilegeInfo) RetryDriver(io.trino.plugin.hive.util.RetryDriver) HiveObjectPrivilege(org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege) Logger(io.airlift.log.Logger) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) Type(io.trino.spi.type.Type) Partition(org.apache.hadoop.hive.metastore.api.Partition) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) TxnToWriteId(org.apache.hadoop.hive.metastore.api.TxnToWriteId) Inject(javax.inject.Inject) HashSet(java.util.HashSet) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) LockLevel(org.apache.hadoop.hive.metastore.api.LockLevel) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) HIVE_METASTORE_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) Objects.requireNonNull(java.util.Objects.requireNonNull) TApplicationException(org.apache.thrift.TApplicationException) FALSE(java.lang.Boolean.FALSE) TABLE(org.apache.hadoop.hive.metastore.api.HiveObjectType.TABLE) Iterator(java.util.Iterator) UnknownTableException(org.apache.hadoop.hive.metastore.api.UnknownTableException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) TupleDomain(io.trino.spi.predicate.TupleDomain) ThriftMetastoreUtil.fromRolePrincipalGrants(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromRolePrincipalGrants) ThriftMetastoreUtil.createMetastoreColumnStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.createMetastoreColumnStatistics) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ThriftMetastoreUtil.parsePrivilege(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.parsePrivilege) Closeable(java.io.Closeable) Database(org.apache.hadoop.hive.metastore.api.Database) Collections(java.util.Collections) HiveConfig(io.trino.plugin.hive.HiveConfig) SECONDS(java.util.concurrent.TimeUnit.SECONDS) HivePartition(io.trino.plugin.hive.HivePartition) ThriftMetastoreUtil.toMetastoreApiPartition(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPartition) Partition(org.apache.hadoop.hive.metastore.api.Partition) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) TrinoException(io.trino.spi.TrinoException) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) HiveType(io.trino.plugin.hive.HiveType)

Example 13 with HiveBasicStatistics

use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.

the class MetastoreHiveStatisticsProvider method validatePartitionStatistics.

@VisibleForTesting
static void validatePartitionStatistics(SchemaTableName table, Map<String, PartitionStatistics> partitionStatistics) {
    partitionStatistics.forEach((partition, statistics) -> {
        HiveBasicStatistics basicStatistics = statistics.getBasicStatistics();
        OptionalLong rowCount = basicStatistics.getRowCount();
        rowCount.ifPresent(count -> checkStatistics(count >= 0, table, partition, "rowCount must be greater than or equal to zero: %s", count));
        basicStatistics.getFileCount().ifPresent(count -> checkStatistics(count >= 0, table, partition, "fileCount must be greater than or equal to zero: %s", count));
        basicStatistics.getInMemoryDataSizeInBytes().ifPresent(size -> checkStatistics(size >= 0, table, partition, "inMemoryDataSizeInBytes must be greater than or equal to zero: %s", size));
        basicStatistics.getOnDiskDataSizeInBytes().ifPresent(size -> checkStatistics(size >= 0, table, partition, "onDiskDataSizeInBytes must be greater than or equal to zero: %s", size));
        statistics.getColumnStatistics().forEach((column, columnStatistics) -> validateColumnStatistics(table, partition, column, rowCount, columnStatistics));
    });
}
Also used : OptionalLong(java.util.OptionalLong) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 14 with HiveBasicStatistics

use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.

@Test
public void testGetTableStatisticsUnpartitioned() {
    PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
    HiveColumnHandle columnHandle = createBaseColumn(COLUMN, 2, HIVE_LONG, BIGINT, REGULAR, Optional.empty());
    TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
    assertEquals(statisticsProvider.getTableStatistics(SESSION, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected);
}
Also used : DoubleRange(io.trino.spi.statistics.DoubleRange) MetastoreHiveStatisticsProvider.validatePartitionStatistics(io.trino.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) HivePartition(io.trino.plugin.hive.HivePartition) Test(org.testng.annotations.Test)

Example 15 with HiveBasicStatistics

use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.

the class SemiTransactionalHiveMetastore method updatePartitionStatistics.

// For HiveBasicStatistics, we only overwrite the original statistics if the new one is not empty.
// For HiveColumnStatistics, only overwrite the original statistics for columns present in the new ones and preserve the others.
private static PartitionStatistics updatePartitionStatistics(PartitionStatistics oldPartitionStats, PartitionStatistics newPartitionStats) {
    HiveBasicStatistics oldBasicStatistics = oldPartitionStats.getBasicStatistics();
    HiveBasicStatistics newBasicStatistics = newPartitionStats.getBasicStatistics();
    HiveBasicStatistics updatedBasicStatistics = new HiveBasicStatistics(firstPresent(newBasicStatistics.getFileCount(), oldBasicStatistics.getFileCount()), firstPresent(newBasicStatistics.getRowCount(), oldBasicStatistics.getRowCount()), firstPresent(newBasicStatistics.getInMemoryDataSizeInBytes(), oldBasicStatistics.getInMemoryDataSizeInBytes()), firstPresent(newBasicStatistics.getOnDiskDataSizeInBytes(), oldBasicStatistics.getOnDiskDataSizeInBytes()));
    Map<String, HiveColumnStatistics> updatedColumnStatistics = updateColumnStatistics(oldPartitionStats.getColumnStatistics(), newPartitionStats.getColumnStatistics());
    return new PartitionStatistics(updatedBasicStatistics, updatedColumnStatistics);
}
Also used : PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics)

Aggregations

HiveBasicStatistics (io.trino.plugin.hive.HiveBasicStatistics)22 PartitionStatistics (io.trino.plugin.hive.PartitionStatistics)13 ThriftMetastoreUtil.getHiveBasicStatistics (io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics)13 HiveColumnStatistics (io.trino.plugin.hive.metastore.HiveColumnStatistics)11 TrinoException (io.trino.spi.TrinoException)11 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)9 ImmutableMap (com.google.common.collect.ImmutableMap)9 Column (io.trino.plugin.hive.metastore.Column)8 List (java.util.List)8 Map (java.util.Map)8 ImmutableList (com.google.common.collect.ImmutableList)7 OptionalLong (java.util.OptionalLong)7 HIVE_METASTORE_ERROR (io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR)6 ColumnStatisticType (io.trino.spi.statistics.ColumnStatisticType)6 Type (io.trino.spi.type.Type)6 ArrayList (java.util.ArrayList)6 Optional (java.util.Optional)6 Set (java.util.Set)6 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)5 Sets.difference (com.google.common.collect.Sets.difference)5