Search in sources :

Example 51 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.

the class ThriftHiveMetastore method updateTableStatistics.

@Override
public synchronized void updateTableStatistics(MetastoreContext metastoreContext, String databaseName, String tableName, Function<PartitionStatistics, PartitionStatistics> update) {
    PartitionStatistics currentStatistics = getTableStatistics(metastoreContext, databaseName, tableName);
    PartitionStatistics updatedStatistics = update.apply(currentStatistics);
    Table originalTable = getTable(metastoreContext, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
    Table modifiedTable = originalTable.deepCopy();
    HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
    modifiedTable.setParameters(updateStatisticsParameters(modifiedTable.getParameters(), basicStatistics));
    alterTable(metastoreContext, databaseName, tableName, modifiedTable);
    com.facebook.presto.hive.metastore.Table table = fromMetastoreApiTable(modifiedTable, metastoreContext.getColumnConverter());
    OptionalLong rowCount = basicStatistics.getRowCount();
    List<ColumnStatisticsObj> metastoreColumnStatistics = updatedStatistics.getColumnStatistics().entrySet().stream().map(entry -> createMetastoreColumnStatistics(entry.getKey(), table.getColumn(entry.getKey()).get().getType(), entry.getValue(), rowCount)).collect(toImmutableList());
    if (!metastoreColumnStatistics.isEmpty()) {
        setTableColumnStatistics(metastoreContext, databaseName, tableName, metastoreColumnStatistics);
    }
    Set<String> removedColumnStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet());
    removedColumnStatistics.forEach(column -> deleteTableColumnStatistics(metastoreContext, databaseName, tableName, column));
}
Also used : SchemaAlreadyExistsException(com.facebook.presto.hive.SchemaAlreadyExistsException) EXCLUSIVE(org.apache.hadoop.hive.metastore.api.LockType.EXCLUSIVE) NUMBER_OF_NON_NULL_VALUES(com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics) PrestoPrincipal(com.facebook.presto.spi.security.PrestoPrincipal) MetastoreContext(com.facebook.presto.hive.metastore.MetastoreContext) Throwables.throwIfUnchecked(com.google.common.base.Throwables.throwIfUnchecked) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InetAddress(java.net.InetAddress) MetastoreUtil.getHiveBasicStatistics(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics) Sets.difference(com.google.common.collect.Sets.difference) MAX_VALUE_SIZE_IN_BYTES(com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE_SIZE_IN_BYTES) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) ThriftMetastoreUtil.toMetastoreApiPartition(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPartition) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) TableAlreadyExistsException(com.facebook.presto.hive.TableAlreadyExistsException) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) ThreadSafe(javax.annotation.concurrent.ThreadSafe) TypeUtils.isNumericType(com.facebook.presto.common.type.TypeUtils.isNumericType) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) MIN_VALUE(com.facebook.presto.spi.statistics.ColumnStatisticType.MIN_VALUE) NUMBER_OF_DISTINCT_VALUES(com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES) HivePrivilegeInfo(com.facebook.presto.hive.metastore.HivePrivilegeInfo) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) Iterables(com.google.common.collect.Iterables) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) Flatten(org.weakref.jmx.Flatten) ACQUIRED(org.apache.hadoop.hive.metastore.api.LockState.ACQUIRED) HIVE_FILTER_FIELD_PARAMS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS) Callable(java.util.concurrent.Callable) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) MetastoreUtil.convertPredicateToParts(com.facebook.presto.hive.metastore.MetastoreUtil.convertPredicateToParts) HiveBasicStatistics.createEmptyStatistics(com.facebook.presto.hive.HiveBasicStatistics.createEmptyStatistics) DATE(com.facebook.presto.common.type.DateType.DATE) OptionalLong(java.util.OptionalLong) Lists(com.google.common.collect.Lists) Managed(org.weakref.jmx.Managed) LockState(org.apache.hadoop.hive.metastore.api.LockState) UnlockRequest(org.apache.hadoop.hive.metastore.api.UnlockRequest) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) ArrayType(com.facebook.presto.common.type.ArrayType) CheckLockRequest(org.apache.hadoop.hive.metastore.api.CheckLockRequest) RetryDriver(com.facebook.presto.hive.RetryDriver) PrivilegeGrantInfo(org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo) UnknownDBException(org.apache.hadoop.hive.metastore.api.UnknownDBException) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) TException(org.apache.thrift.TException) PrincipalType(org.apache.hadoop.hive.metastore.api.PrincipalType) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Domain(com.facebook.presto.common.predicate.Domain) Table(org.apache.hadoop.hive.metastore.api.Table) HivePrivilege(com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege) ColumnStatisticType(com.facebook.presto.spi.statistics.ColumnStatisticType) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) TableType(org.apache.hadoop.hive.metastore.TableType) HiveObjectRef(org.apache.hadoop.hive.metastore.api.HiveObjectRef) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) RowType(com.facebook.presto.common.type.RowType) NUMBER_OF_TRUE_VALUES(com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_TRUE_VALUES) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) PartitionNotFoundException(com.facebook.presto.hive.PartitionNotFoundException) LockRequest(org.apache.hadoop.hive.metastore.api.LockRequest) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) HIVE_METASTORE_ERROR(com.facebook.presto.hive.HiveErrorCode.HIVE_METASTORE_ERROR) ThriftMetastoreUtil.fromMetastoreApiPrincipalType(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiPrincipalType) SchemaNotFoundException(com.facebook.presto.spi.SchemaNotFoundException) LockComponent(org.apache.hadoop.hive.metastore.api.LockComponent) PRESTO_VIEW_FLAG(com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_VIEW_FLAG) HiveViewNotSupportedException(com.facebook.presto.hive.HiveViewNotSupportedException) Collectors.toSet(java.util.stream.Collectors.toSet) PrivilegeBag(org.apache.hadoop.hive.metastore.api.PrivilegeBag) ImmutableSet(com.google.common.collect.ImmutableSet) WAITING(org.apache.hadoop.hive.metastore.api.LockState.WAITING) ImmutableMap(com.google.common.collect.ImmutableMap) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) LockResponse(org.apache.hadoop.hive.metastore.api.LockResponse) MAX_VALUE(com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE) Collectors(java.util.stream.Collectors) ThriftMetastoreUtil.createMetastoreColumnStatistics(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.createMetastoreColumnStatistics) String.format(java.lang.String.format) List(java.util.List) RoleGrant(com.facebook.presto.spi.security.RoleGrant) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) ThriftMetastoreUtil.parsePrivilege(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.parsePrivilege) HiveObjectPrivilege(org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege) TOTAL_SIZE_IN_BYTES(com.facebook.presto.spi.statistics.ColumnStatisticType.TOTAL_SIZE_IN_BYTES) ThriftMetastoreUtil.fromMetastoreApiTable(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable) MapType(com.facebook.presto.common.type.MapType) Column(com.facebook.presto.hive.metastore.Column) HiveType(com.facebook.presto.hive.HiveType) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) PrestoException(com.facebook.presto.spi.PrestoException) Partition(org.apache.hadoop.hive.metastore.api.Partition) Function(java.util.function.Function) OWNERSHIP(com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP) Inject(javax.inject.Inject) HashSet(java.util.HashSet) LockLevel(org.apache.hadoop.hive.metastore.api.LockLevel) ImmutableList(com.google.common.collect.ImmutableList) ALREADY_EXISTS(com.facebook.presto.spi.StandardErrorCode.ALREADY_EXISTS) Objects.requireNonNull(java.util.Objects.requireNonNull) MetastoreUtil.updateStatisticsParameters(com.facebook.presto.hive.metastore.MetastoreUtil.updateStatisticsParameters) MetastoreClientConfig(com.facebook.presto.hive.MetastoreClientConfig) Type(com.facebook.presto.common.type.Type) ThriftMetastoreUtil.fromRolePrincipalGrants(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromRolePrincipalGrants) USER(com.facebook.presto.spi.security.PrincipalType.USER) TABLE(org.apache.hadoop.hive.metastore.api.HiveObjectType.TABLE) Iterator(java.util.Iterator) ThriftMetastoreUtil.fromPrestoPrincipalType(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromPrestoPrincipalType) UnknownTableException(org.apache.hadoop.hive.metastore.api.UnknownTableException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) Database(org.apache.hadoop.hive.metastore.api.Database) Table(org.apache.hadoop.hive.metastore.api.Table) ThriftMetastoreUtil.fromMetastoreApiTable(com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable) MetastoreUtil.getHiveBasicStatistics(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics) HiveBasicStatistics(com.facebook.presto.hive.HiveBasicStatistics) SchemaTableName(com.facebook.presto.spi.SchemaTableName) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) PartitionStatistics(com.facebook.presto.hive.metastore.PartitionStatistics) OptionalLong(java.util.OptionalLong)

Example 52 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.

the class ThriftMetastoreUtil method createStringStatistics.

private static ColumnStatisticsObj createStringStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics, OptionalLong rowCount) {
    StringColumnStatsData data = new StringColumnStatsData();
    statistics.getNullsCount().ifPresent(data::setNumNulls);
    toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs);
    data.setMaxColLen(statistics.getMaxValueSizeInBytes().orElse(0));
    data.setAvgColLen(getAverageColumnLength(statistics.getTotalSizeInBytes(), rowCount, statistics.getNullsCount()).orElse(0));
    return new ColumnStatisticsObj(columnName, columnType.toString(), stringStats(data));
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData)

Example 53 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.

the class ThriftMetastoreUtil method createDecimalStatistics.

private static ColumnStatisticsObj createDecimalStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) {
    DecimalColumnStatsData data = new DecimalColumnStatsData();
    statistics.getDecimalStatistics().ifPresent(decimalStatistics -> {
        decimalStatistics.getMin().ifPresent(value -> data.setLowValue(toMetastoreDecimal(value)));
        decimalStatistics.getMax().ifPresent(value -> data.setHighValue(toMetastoreDecimal(value)));
    });
    statistics.getNullsCount().ifPresent(data::setNumNulls);
    toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs);
    return new ColumnStatisticsObj(columnName, columnType.toString(), decimalStats(data));
}
Also used : DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)

Example 54 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.

the class ThriftMetastoreUtil method createBinaryStatistics.

private static ColumnStatisticsObj createBinaryStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics, OptionalLong rowCount) {
    BinaryColumnStatsData data = new BinaryColumnStatsData();
    statistics.getNullsCount().ifPresent(data::setNumNulls);
    data.setMaxColLen(statistics.getMaxValueSizeInBytes().orElse(0));
    data.setAvgColLen(getAverageColumnLength(statistics.getTotalSizeInBytes(), rowCount, statistics.getNullsCount()).orElse(0));
    return new ColumnStatisticsObj(columnName, columnType.toString(), binaryStats(data));
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 55 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.

the class TestThriftHiveMetastoreUtil method testEmptyDecimalStatsToColumnStatistics.

@Test
public void testEmptyDecimalStatsToColumnStatistics() {
    DecimalColumnStatsData emptyDecimalColumnStatsData = new DecimalColumnStatsData();
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(emptyDecimalColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.of(new DecimalStatistics(Optional.empty(), Optional.empty())));
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.empty());
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
Also used : DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalStatistics(com.facebook.presto.hive.metastore.DecimalStatistics) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) Test(org.testng.annotations.Test)

Aggregations

ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)219 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)104 ArrayList (java.util.ArrayList)98 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)82 Test (org.junit.Test)79 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)68 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)43 Table (org.apache.hadoop.hive.metastore.api.Table)43 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)35 Partition (org.apache.hadoop.hive.metastore.api.Partition)35 List (java.util.List)34 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)30 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)29 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)29 HashMap (java.util.HashMap)28 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)28 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)27 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)25 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)23 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)22