Search in sources :

Example 1 with HiveColumnStatistics

use of io.prestosql.plugin.hive.metastore.HiveColumnStatistics in project hetu-core by openlookeng.

the class ThriftHiveMetastore method storePartitionColumnStatistics.

private void storePartitionColumnStatistics(HiveIdentity identity, String databaseName, String tableName, String partitionName, PartitionWithStatistics partitionWithStatistics) {
    PartitionStatistics statistics = partitionWithStatistics.getStatistics();
    Map<String, HiveColumnStatistics> columnStatistics = statistics.getColumnStatistics();
    if (columnStatistics.isEmpty()) {
        return;
    }
    Map<String, HiveType> columnTypes = partitionWithStatistics.getPartition().getColumns().stream().collect(toImmutableMap(Column::getName, Column::getType));
    setPartitionColumnStatistics(identity, databaseName, tableName, partitionName, columnTypes, columnStatistics, statistics.getBasicStatistics().getRowCount());
}
Also used : PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) HiveType(io.prestosql.plugin.hive.HiveType)

Example 2 with HiveColumnStatistics

use of io.prestosql.plugin.hive.metastore.HiveColumnStatistics in project hetu-core by openlookeng.

the class ThriftHiveMetastore method getTableStatistics.

@Override
public PartitionStatistics getTableStatistics(HiveIdentity identity, Table table) {
    List<String> dataColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(toImmutableList());
    HiveBasicStatistics basicStatistics = ThriftMetastoreUtil.getHiveBasicStatistics(table.getParameters());
    Map<String, HiveColumnStatistics> columnStatistics = getTableColumnStatistics(identity, table.getDbName(), table.getTableName(), dataColumns, basicStatistics.getRowCount());
    return new PartitionStatistics(basicStatistics, columnStatistics);
}
Also used : PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics)

Example 3 with HiveColumnStatistics

use of io.prestosql.plugin.hive.metastore.HiveColumnStatistics in project hetu-core by openlookeng.

the class ThriftHiveMetastore method getPartitionStatistics.

@Override
public Map<String, PartitionStatistics> getPartitionStatistics(HiveIdentity identity, Table table, List<Partition> partitions) {
    List<String> dataColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(toImmutableList());
    List<String> partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName).collect(toImmutableList());
    Map<String, HiveBasicStatistics> partitionBasicStatistics = partitions.stream().collect(toImmutableMap(partition -> makePartName(partitionColumns, partition.getValues()), partition -> ThriftMetastoreUtil.getHiveBasicStatistics(partition.getParameters())));
    Map<String, OptionalLong> partitionRowCounts = partitionBasicStatistics.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount()));
    Map<String, Map<String, HiveColumnStatistics>> partitionColumnStatistics = getPartitionColumnStatistics(identity, table.getDbName(), table.getTableName(), partitionBasicStatistics.keySet(), dataColumns, partitionRowCounts);
    ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
    for (String partitionName : partitionBasicStatistics.keySet()) {
        HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName);
        Map<String, HiveColumnStatistics> columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of());
        result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics));
    }
    return result.build();
}
Also used : LockComponentBuilder(org.apache.hadoop.hive.metastore.LockComponentBuilder) HiveViewNotSupportedException(io.prestosql.plugin.hive.HiveViewNotSupportedException) ShowLocksResponse(org.apache.hadoop.hive.metastore.api.ShowLocksResponse) Throwables.throwIfUnchecked(com.google.common.base.Throwables.throwIfUnchecked) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) RoleGrant(io.prestosql.spi.security.RoleGrant) NoSuchTxnException(org.apache.hadoop.hive.metastore.api.NoSuchTxnException) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) SchemaAlreadyExistsException(io.prestosql.spi.connector.SchemaAlreadyExistsException) Sets.difference(com.google.common.collect.Sets.difference) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Map(java.util.Map) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) HiveErrorCode(io.prestosql.plugin.hive.HiveErrorCode) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) ThreadSafe(javax.annotation.concurrent.ThreadSafe) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) TxnAbortedException(org.apache.hadoop.hive.metastore.api.TxnAbortedException) Verify.verifyNotNull(com.google.common.base.Verify.verifyNotNull) ALREADY_EXISTS(io.prestosql.spi.StandardErrorCode.ALREADY_EXISTS) Iterables(com.google.common.collect.Iterables) ConfigValSecurityException(org.apache.hadoop.hive.metastore.api.ConfigValSecurityException) Flatten(org.weakref.jmx.Flatten) HIVE_FILTER_FIELD_PARAMS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS) Callable(java.util.concurrent.Callable) ArrayList(java.util.ArrayList) DataOperationType(org.apache.hadoop.hive.metastore.api.DataOperationType) OptionalLong(java.util.OptionalLong) Managed(org.weakref.jmx.Managed) LockState(org.apache.hadoop.hive.metastore.api.LockState) PrivilegeGrantInfo(org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) UnknownDBException(org.apache.hadoop.hive.metastore.api.UnknownDBException) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) TException(org.apache.thrift.TException) PrincipalType(org.apache.hadoop.hive.metastore.api.PrincipalType) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) USER(io.prestosql.spi.security.PrincipalType.USER) Table(org.apache.hadoop.hive.metastore.api.Table) System.nanoTime(java.lang.System.nanoTime) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) HiveObjectRef(org.apache.hadoop.hive.metastore.api.HiveObjectRef) PartitionNotFoundException(io.prestosql.plugin.hive.PartitionNotFoundException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) LockRequest(org.apache.hadoop.hive.metastore.api.LockRequest) Duration(io.airlift.units.Duration) RetryDriver(io.prestosql.plugin.hive.util.RetryDriver) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Throwables.propagateIfPossible(com.google.common.base.Throwables.propagateIfPossible) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Locale(java.util.Locale) LockComponent(org.apache.hadoop.hive.metastore.api.LockComponent) Type(io.prestosql.spi.type.Type) Collectors.toSet(java.util.stream.Collectors.toSet) PrivilegeBag(org.apache.hadoop.hive.metastore.api.PrivilegeBag) PrestoException(io.prestosql.spi.PrestoException) NoSuchLockException(org.apache.hadoop.hive.metastore.api.NoSuchLockException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) LockRequestBuilder(org.apache.hadoop.hive.metastore.LockRequestBuilder) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) LockResponse(org.apache.hadoop.hive.metastore.api.LockResponse) HiveType(io.prestosql.plugin.hive.HiveType) ShowLocksRequest(org.apache.hadoop.hive.metastore.api.ShowLocksRequest) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) HivePartition(io.prestosql.plugin.hive.HivePartition) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) Optional(java.util.Optional) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) HiveObjectPrivilege(org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege) Logger(io.airlift.log.Logger) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) Partition(org.apache.hadoop.hive.metastore.api.Partition) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) Inject(javax.inject.Inject) HashSet(java.util.HashSet) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ImmutableList(com.google.common.collect.ImmutableList) HivePrivilegeInfo(io.prestosql.plugin.hive.metastore.HivePrivilegeInfo) Objects.requireNonNull(java.util.Objects.requireNonNull) TABLE(org.apache.hadoop.hive.metastore.api.HiveObjectType.TABLE) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) Iterator(java.util.Iterator) UnknownTableException(org.apache.hadoop.hive.metastore.api.UnknownTableException) PartitionWithStatistics(io.prestosql.plugin.hive.metastore.PartitionWithStatistics) SchemaNotFoundException(io.prestosql.spi.connector.SchemaNotFoundException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Column(io.prestosql.plugin.hive.metastore.Column) Closeable(java.io.Closeable) Database(org.apache.hadoop.hive.metastore.api.Database) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) OptionalLong(java.util.OptionalLong) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 4 with HiveColumnStatistics

use of io.prestosql.plugin.hive.metastore.HiveColumnStatistics in project hetu-core by openlookeng.

the class Statistics method createColumnStatisticsForEmptyPartition.

private static HiveColumnStatistics createColumnStatisticsForEmptyPartition(Type columnType, Set<ColumnStatisticType> columnStatisticTypes) {
    requireNonNull(columnType, "columnType is null");
    HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
    for (ColumnStatisticType columnStatisticType : columnStatisticTypes) {
        switch(columnStatisticType) {
            case MAX_VALUE_SIZE_IN_BYTES:
                result.setMaxValueSizeInBytes(0);
                break;
            case TOTAL_SIZE_IN_BYTES:
                result.setTotalSizeInBytes(0);
                break;
            case NUMBER_OF_DISTINCT_VALUES:
                result.setDistinctValuesCount(0);
                break;
            case NUMBER_OF_NON_NULL_VALUES:
                result.setNullsCount(0);
                break;
            case NUMBER_OF_TRUE_VALUES:
                result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(0L), OptionalLong.of(0L)));
                break;
            case MIN_VALUE:
            case MAX_VALUE:
                setMinMaxForEmptyPartition(columnType, result);
                break;
            default:
                throw new PrestoException(HiveErrorCode.HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE, "Unknown column statistics type: " + columnStatisticType.name());
        }
    }
    return result.build();
}
Also used : ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) BooleanStatistics(io.prestosql.plugin.hive.metastore.BooleanStatistics) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) PrestoException(io.prestosql.spi.PrestoException)

Example 5 with HiveColumnStatistics

use of io.prestosql.plugin.hive.metastore.HiveColumnStatistics in project hetu-core by openlookeng.

the class Statistics method createHiveColumnStatistics.

private static HiveColumnStatistics createHiveColumnStatistics(ConnectorSession session, Map<ColumnStatisticType, Block> computedStatistics, Type columnType, long rowCount) {
    HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
    // We ask the engine to compute either both or neither
    verify(computedStatistics.containsKey(MIN_VALUE) == computedStatistics.containsKey(MAX_VALUE));
    if (computedStatistics.containsKey(MIN_VALUE)) {
        setMinMax(session, columnType, computedStatistics.get(MIN_VALUE), computedStatistics.get(MAX_VALUE), result);
    }
    // MAX_VALUE_SIZE_IN_BYTES
    if (computedStatistics.containsKey(MAX_VALUE_SIZE_IN_BYTES)) {
        result.setMaxValueSizeInBytes(getIntegerValue(session, BIGINT, computedStatistics.get(MAX_VALUE_SIZE_IN_BYTES)));
    }
    // TOTAL_VALUES_SIZE_IN_BYTES
    if (computedStatistics.containsKey(TOTAL_SIZE_IN_BYTES)) {
        result.setTotalSizeInBytes(getIntegerValue(session, BIGINT, computedStatistics.get(TOTAL_SIZE_IN_BYTES)));
    }
    // NUMBER OF NULLS
    if (computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
        result.setNullsCount(rowCount - BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0));
    }
    // NDV
    if (computedStatistics.containsKey(NUMBER_OF_DISTINCT_VALUES) && computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
        // number of distinct value is estimated using HLL, and can be higher than the number of non null values
        long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0);
        long numberOfDistinctValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_DISTINCT_VALUES), 0);
        if (numberOfDistinctValues > numberOfNonNullValues) {
            result.setDistinctValuesCount(numberOfNonNullValues);
        } else {
            result.setDistinctValuesCount(numberOfDistinctValues);
        }
    }
    // NUMBER OF FALSE, NUMBER OF TRUE
    if (computedStatistics.containsKey(NUMBER_OF_TRUE_VALUES) && computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
        long numberOfTrue = BIGINT.getLong(computedStatistics.get(NUMBER_OF_TRUE_VALUES), 0);
        long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0);
        result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(numberOfTrue), OptionalLong.of(numberOfNonNullValues - numberOfTrue)));
    }
    return result.build();
}
Also used : BooleanStatistics(io.prestosql.plugin.hive.metastore.BooleanStatistics) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics)

Aggregations

HiveColumnStatistics (io.prestosql.plugin.hive.metastore.HiveColumnStatistics)24 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)17 Test (org.testng.annotations.Test)16 PartitionStatistics (io.prestosql.plugin.hive.PartitionStatistics)6 HiveBasicStatistics (io.prestosql.plugin.hive.HiveBasicStatistics)5 BooleanStatistics (io.prestosql.plugin.hive.metastore.BooleanStatistics)4 DateStatistics (io.prestosql.plugin.hive.metastore.DateStatistics)4 DecimalStatistics (io.prestosql.plugin.hive.metastore.DecimalStatistics)4 DoubleStatistics (io.prestosql.plugin.hive.metastore.DoubleStatistics)4 IntegerStatistics (io.prestosql.plugin.hive.metastore.IntegerStatistics)4 PrestoException (io.prestosql.spi.PrestoException)4 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)4 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)3 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 Logger (io.airlift.log.Logger)3 HiveErrorCode (io.prestosql.plugin.hive.HiveErrorCode)3 HivePartition (io.prestosql.plugin.hive.HivePartition)3 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)3 LocalDate (java.time.LocalDate)3