Search in sources :

Example 6 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.

the class ThriftHiveMetastore method getPartitionStatistics.

@Override
public Map<String, PartitionStatistics> getPartitionStatistics(HiveIdentity identity, Table table, List<Partition> partitions) {
    List<String> dataColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(toImmutableList());
    List<String> partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName).collect(toImmutableList());
    Map<String, HiveBasicStatistics> partitionBasicStatistics = partitions.stream().collect(toImmutableMap(partition -> makePartName(partitionColumns, partition.getValues()), partition -> ThriftMetastoreUtil.getHiveBasicStatistics(partition.getParameters())));
    Map<String, OptionalLong> partitionRowCounts = partitionBasicStatistics.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount()));
    Map<String, Map<String, HiveColumnStatistics>> partitionColumnStatistics = getPartitionColumnStatistics(identity, table.getDbName(), table.getTableName(), partitionBasicStatistics.keySet(), dataColumns, partitionRowCounts);
    ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
    for (String partitionName : partitionBasicStatistics.keySet()) {
        HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName);
        Map<String, HiveColumnStatistics> columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of());
        result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics));
    }
    return result.build();
}
Also used : LockComponentBuilder(org.apache.hadoop.hive.metastore.LockComponentBuilder) HiveViewNotSupportedException(io.prestosql.plugin.hive.HiveViewNotSupportedException) ShowLocksResponse(org.apache.hadoop.hive.metastore.api.ShowLocksResponse) Throwables.throwIfUnchecked(com.google.common.base.Throwables.throwIfUnchecked) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) RoleGrant(io.prestosql.spi.security.RoleGrant) NoSuchTxnException(org.apache.hadoop.hive.metastore.api.NoSuchTxnException) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) SchemaAlreadyExistsException(io.prestosql.spi.connector.SchemaAlreadyExistsException) Sets.difference(com.google.common.collect.Sets.difference) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Map(java.util.Map) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) HiveErrorCode(io.prestosql.plugin.hive.HiveErrorCode) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) ThreadSafe(javax.annotation.concurrent.ThreadSafe) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) TxnAbortedException(org.apache.hadoop.hive.metastore.api.TxnAbortedException) Verify.verifyNotNull(com.google.common.base.Verify.verifyNotNull) ALREADY_EXISTS(io.prestosql.spi.StandardErrorCode.ALREADY_EXISTS) Iterables(com.google.common.collect.Iterables) ConfigValSecurityException(org.apache.hadoop.hive.metastore.api.ConfigValSecurityException) Flatten(org.weakref.jmx.Flatten) HIVE_FILTER_FIELD_PARAMS(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.HIVE_FILTER_FIELD_PARAMS) Callable(java.util.concurrent.Callable) ArrayList(java.util.ArrayList) DataOperationType(org.apache.hadoop.hive.metastore.api.DataOperationType) OptionalLong(java.util.OptionalLong) Managed(org.weakref.jmx.Managed) LockState(org.apache.hadoop.hive.metastore.api.LockState) PrivilegeGrantInfo(org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) UnknownDBException(org.apache.hadoop.hive.metastore.api.UnknownDBException) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) TException(org.apache.thrift.TException) PrincipalType(org.apache.hadoop.hive.metastore.api.PrincipalType) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) USER(io.prestosql.spi.security.PrincipalType.USER) Table(org.apache.hadoop.hive.metastore.api.Table) System.nanoTime(java.lang.System.nanoTime) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) HiveObjectRef(org.apache.hadoop.hive.metastore.api.HiveObjectRef) PartitionNotFoundException(io.prestosql.plugin.hive.PartitionNotFoundException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) LockRequest(org.apache.hadoop.hive.metastore.api.LockRequest) Duration(io.airlift.units.Duration) RetryDriver(io.prestosql.plugin.hive.util.RetryDriver) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Throwables.propagateIfPossible(com.google.common.base.Throwables.propagateIfPossible) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Locale(java.util.Locale) LockComponent(org.apache.hadoop.hive.metastore.api.LockComponent) Type(io.prestosql.spi.type.Type) Collectors.toSet(java.util.stream.Collectors.toSet) PrivilegeBag(org.apache.hadoop.hive.metastore.api.PrivilegeBag) PrestoException(io.prestosql.spi.PrestoException) NoSuchLockException(org.apache.hadoop.hive.metastore.api.NoSuchLockException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) LockRequestBuilder(org.apache.hadoop.hive.metastore.LockRequestBuilder) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) LockResponse(org.apache.hadoop.hive.metastore.api.LockResponse) HiveType(io.prestosql.plugin.hive.HiveType) ShowLocksRequest(org.apache.hadoop.hive.metastore.api.ShowLocksRequest) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) HivePartition(io.prestosql.plugin.hive.HivePartition) List(java.util.List) PRESTO_VIEW_FLAG(io.prestosql.plugin.hive.HiveUtil.PRESTO_VIEW_FLAG) Optional(java.util.Optional) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) HiveObjectPrivilege(org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege) Logger(io.airlift.log.Logger) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) Partition(org.apache.hadoop.hive.metastore.api.Partition) Function(java.util.function.Function) ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) Inject(javax.inject.Inject) HashSet(java.util.HashSet) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ImmutableList(com.google.common.collect.ImmutableList) HivePrivilegeInfo(io.prestosql.plugin.hive.metastore.HivePrivilegeInfo) Objects.requireNonNull(java.util.Objects.requireNonNull) TABLE(org.apache.hadoop.hive.metastore.api.HiveObjectType.TABLE) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) Iterator(java.util.Iterator) UnknownTableException(org.apache.hadoop.hive.metastore.api.UnknownTableException) PartitionWithStatistics(io.prestosql.plugin.hive.metastore.PartitionWithStatistics) SchemaNotFoundException(io.prestosql.spi.connector.SchemaNotFoundException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Column(io.prestosql.plugin.hive.metastore.Column) Closeable(java.io.Closeable) Database(org.apache.hadoop.hive.metastore.api.Database) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) OptionalLong(java.util.OptionalLong) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 7 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.

the class ThriftHiveMetastore method updatePartitionStatistics.

@Override
public void updatePartitionStatistics(HiveIdentity identity, String databaseName, String tableName, String partitionName, Function<PartitionStatistics, PartitionStatistics> update) {
    List<Partition> partitions = getPartitionsByNames(identity, databaseName, tableName, ImmutableList.of(partitionName));
    if (partitions.size() != 1) {
        throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, "Metastore returned multiple partitions for name: " + partitionName);
    }
    Table table = getTable(identity, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
    PartitionStatistics currentStatistics = requireNonNull(getPartitionStatistics(identity, table, partitions).get(partitionName), "getPartitionStatistics() returned null");
    PartitionStatistics updatedStatistics = update.apply(currentStatistics);
    Partition originalPartition = getOnlyElement(partitions);
    Partition modifiedPartition = originalPartition.deepCopy();
    HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
    modifiedPartition.setParameters(ThriftMetastoreUtil.updateStatisticsParameters(modifiedPartition.getParameters(), basicStatistics));
    alterPartitionWithoutStatistics(identity, databaseName, tableName, modifiedPartition);
    updatePartitionColumnStatistics(identity, modifiedPartition, databaseName, tableName, partitionName, basicStatistics, currentStatistics, updatedStatistics);
}
Also used : HivePartition(io.prestosql.plugin.hive.HivePartition) Partition(org.apache.hadoop.hive.metastore.api.Partition) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Table(org.apache.hadoop.hive.metastore.api.Table) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) PrestoException(io.prestosql.spi.PrestoException) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName)

Example 8 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.

the class ThriftHiveMetastore method updatePartitionsStatistics.

@Override
public synchronized void updatePartitionsStatistics(HiveIdentity identity, String databaseName, String tableName, Map<String, Function<PartitionStatistics, PartitionStatistics>> partNamesUpdateFunctionMap) {
    ImmutableList.Builder<Partition> modifiedPartitionBuilder = ImmutableList.builder();
    ImmutableMap.Builder<String, PartitionInfo> partitionInfoMapBuilder = ImmutableMap.builder();
    Optional<Table> table = getTable(identity, databaseName, tableName);
    List<Partition> partitions = getPartitionsByNames(identity, databaseName, tableName, partNamesUpdateFunctionMap.keySet().stream().collect(Collectors.toList()));
    Map<String, PartitionStatistics> partitionsStatistics = getPartitionStatistics(identity, table.get(), partitions);
    if (partitions.size() != partitionsStatistics.size() || partitions.size() != partNamesUpdateFunctionMap.size()) {
        throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, "Metastore returned multiple partitions");
    }
    List<String> partColumns = table.get().getPartitionKeys().stream().map(FieldSchema::getName).collect(toImmutableList());
    for (int index = 0; index < partitions.size(); index++) {
        String partitionName = makePartName(partColumns, partitions.get(index).getValues());
        PartitionStatistics currentStatistics = requireNonNull(partitionsStatistics.get(partitionName), "getPartitionStatistics() returned null");
        PartitionStatistics updatedStatistics = partNamesUpdateFunctionMap.get(partitionName).apply(currentStatistics);
        Partition originalPartition = partitions.get(index);
        Partition modifiedPartition = originalPartition.deepCopy();
        HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
        modifiedPartition.setParameters(ThriftMetastoreUtil.updateStatisticsParameters(modifiedPartition.getParameters(), basicStatistics));
        originalPartition.setParameters(ThriftMetastoreUtil.updateStatisticsParameters(originalPartition.getParameters(), basicStatistics));
        modifiedPartitionBuilder.add(modifiedPartition);
        partitionInfoMapBuilder.put(partitionName, new PartitionInfo(basicStatistics, currentStatistics, originalPartition, updatedStatistics));
    }
    alterPartitionsWithoutStatistics(databaseName, tableName, modifiedPartitionBuilder.build());
    ImmutableMap<String, PartitionInfo> partitionInfoMap = partitionInfoMapBuilder.build();
    partitionInfoMap.forEach((partName, partInfo) -> updatePartitionColumnStatistics(identity, partInfo.modifiedPartition, databaseName, tableName, partName, partInfo.basicStatistics, partInfo.currentStatistics, partInfo.updatedStatistics));
}
Also used : HivePartition(io.prestosql.plugin.hive.HivePartition) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) PrestoException(io.prestosql.spi.PrestoException) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics)

Example 9 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.

the class GlueInputConverter method convertPartition.

public static PartitionInput convertPartition(PartitionWithStatistics partitionWithStatistics) {
    PartitionInput input = convertPartition(partitionWithStatistics.getPartition());
    PartitionStatistics statistics = partitionWithStatistics.getStatistics();
    if (!statistics.getColumnStatistics().isEmpty()) {
        throw new PrestoException(NOT_SUPPORTED, "Glue metastore does not support column level statistics");
    }
    input.setParameters(updateStatisticsParameters(input.getParameters(), statistics.getBasicStatistics()));
    return input;
}
Also used : PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) PrestoException(io.prestosql.spi.PrestoException) PartitionInput(com.amazonaws.services.glue.model.PartitionInput)

Example 10 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project hetu-core by openlookeng.

the class MetastoreHiveStatisticsProvider method getTableStatistics.

@Override
public TableStatistics getTableStatistics(ConnectorSession session, SchemaTableName schemaTableName, Map<String, ColumnHandle> columns, Map<String, Type> columnTypes, List<HivePartition> partitions, boolean includeColumnStatistics, Table table) {
    if (!isStatisticsEnabled(session)) {
        return TableStatistics.empty();
    }
    if (partitions.isEmpty()) {
        return createZeroStatistics(columns, columnTypes);
    }
    int sampleSize = getPartitionStatisticsSampleSize(session);
    List<HivePartition> partitionsSample = null;
    SamplePartition sample = samplePartitionCache.get(table);
    if (includeColumnStatistics || sample == null || sample.partitionCount != partitions.size()) {
        partitionsSample = getPartitionsSample(partitions, sampleSize);
        samplePartitionCache.put(table, new SamplePartition(partitions.size(), partitionsSample));
    } else if (sample != null) {
        partitionsSample = sample.partitionsSample;
    }
    try {
        Map<String, PartitionStatistics> statisticsSample = statisticsProvider.getPartitionsStatistics(session, schemaTableName, partitionsSample, table);
        if (!includeColumnStatistics) {
            OptionalDouble averageRows = calculateAverageRowsPerPartition(statisticsSample.values());
            TableStatistics.Builder result = TableStatistics.builder();
            if (averageRows.isPresent()) {
                result.setRowCount(Estimate.of(averageRows.getAsDouble() * partitions.size()));
            }
            result.setFileCount(calulateFileCount(statisticsSample.values()));
            result.setOnDiskDataSizeInBytes(calculateTotalOnDiskSizeInBytes(statisticsSample.values()));
            return result.build();
        } else {
            validatePartitionStatistics(schemaTableName, statisticsSample);
            return getTableStatistics(columns, columnTypes, partitions, statisticsSample);
        }
    } catch (PrestoException e) {
        if (e.getErrorCode().equals(HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode()) && isIgnoreCorruptedStatistics(session)) {
            log.error(e);
            return TableStatistics.empty();
        }
        throw e;
    }
}
Also used : PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) TableStatistics(io.prestosql.spi.statistics.TableStatistics) PrestoException(io.prestosql.spi.PrestoException) OptionalDouble(java.util.OptionalDouble) HivePartition(io.prestosql.plugin.hive.HivePartition)

Aggregations

PartitionStatistics (io.prestosql.plugin.hive.PartitionStatistics)62 PrestoException (io.prestosql.spi.PrestoException)32 HiveBasicStatistics (io.prestosql.plugin.hive.HiveBasicStatistics)31 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)31 HivePartition (io.prestosql.plugin.hive.HivePartition)20 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)18 TableNotFoundException (io.prestosql.spi.connector.TableNotFoundException)18 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)16 ImmutableMap (com.google.common.collect.ImmutableMap)16 PartitionNotFoundException (io.prestosql.plugin.hive.PartitionNotFoundException)16 ArrayList (java.util.ArrayList)15 HiveColumnStatistics (io.prestosql.plugin.hive.metastore.HiveColumnStatistics)14 List (java.util.List)14 OptionalLong (java.util.OptionalLong)14 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)12 ImmutableList (com.google.common.collect.ImmutableList)12 Logger (io.airlift.log.Logger)12 HiveErrorCode (io.prestosql.plugin.hive.HiveErrorCode)12 Type (io.prestosql.spi.type.Type)12 Map (java.util.Map)12