Search in sources :

Example 26 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project boostkit-bigdata by kunpengcompute.

the class ThriftHiveMetastore method updatePartitionStatistics.

@Override
public void updatePartitionStatistics(HiveIdentity identity, String databaseName, String tableName, String partitionName, Function<PartitionStatistics, PartitionStatistics> update) {
    List<Partition> partitions = getPartitionsByNames(identity, databaseName, tableName, ImmutableList.of(partitionName));
    if (partitions.size() != 1) {
        throw new PrestoException(HiveErrorCode.HIVE_METASTORE_ERROR, "Metastore returned multiple partitions for name: " + partitionName);
    }
    Table table = getTable(identity, databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
    PartitionStatistics currentStatistics = requireNonNull(getPartitionStatistics(identity, table, partitions).get(partitionName), "getPartitionStatistics() returned null");
    PartitionStatistics updatedStatistics = update.apply(currentStatistics);
    Partition originalPartition = getOnlyElement(partitions);
    Partition modifiedPartition = originalPartition.deepCopy();
    HiveBasicStatistics basicStatistics = updatedStatistics.getBasicStatistics();
    modifiedPartition.setParameters(ThriftMetastoreUtil.updateStatisticsParameters(modifiedPartition.getParameters(), basicStatistics));
    alterPartitionWithoutStatistics(identity, databaseName, tableName, modifiedPartition);
    updatePartitionColumnStatistics(identity, modifiedPartition, databaseName, tableName, partitionName, basicStatistics, currentStatistics, updatedStatistics);
}
Also used : HivePartition(io.prestosql.plugin.hive.HivePartition) Partition(org.apache.hadoop.hive.metastore.api.Partition) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Table(org.apache.hadoop.hive.metastore.api.Table) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) PrestoException(io.prestosql.spi.PrestoException) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName)

Example 27 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project boostkit-bigdata by kunpengcompute.

the class FileHiveMetastore method getPartitionStatistics.

private synchronized PartitionStatistics getPartitionStatistics(HiveIdentity identity, List<String> partitionValues, Table table) {
    Path partitionDirectory = getPartitionMetadataDirectory(table, ImmutableList.copyOf(partitionValues));
    PartitionMetadata partitionMetadata = readSchemaFile("partition", partitionDirectory, partitionCodec).orElseThrow(() -> new PartitionNotFoundException(table.getSchemaTableName(), partitionValues));
    HiveBasicStatistics basicStatistics = getHiveBasicStatistics(partitionMetadata.getParameters());
    return new PartitionStatistics(basicStatistics, partitionMetadata.getColumnStatistics());
}
Also used : Path(org.apache.hadoop.fs.Path) PartitionNotFoundException(io.prestosql.plugin.hive.PartitionNotFoundException) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) ThriftMetastoreUtil.getHiveBasicStatistics(io.prestosql.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics)

Example 28 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project boostkit-bigdata by kunpengcompute.

the class SemiTransactionalHiveMetastore method updatePartitionStatistics.

// For HiveBasicStatistics, we only overwrite the original statistics if the new one is not empty.
// For HiveColumnStatistics, we always overwrite every statistics.
// TODO: Collect file count, on-disk size and in-memory size during ANALYZE
private PartitionStatistics updatePartitionStatistics(PartitionStatistics oldPartitionStats, PartitionStatistics newPartitionStats) {
    HiveBasicStatistics oldBasicStatistics = oldPartitionStats.getBasicStatistics();
    HiveBasicStatistics newBasicStatistics = newPartitionStats.getBasicStatistics();
    HiveBasicStatistics updatedBasicStatistics = new HiveBasicStatistics(firstPresent(newBasicStatistics.getFileCount(), oldBasicStatistics.getFileCount()), firstPresent(newBasicStatistics.getRowCount(), oldBasicStatistics.getRowCount()), firstPresent(newBasicStatistics.getInMemoryDataSizeInBytes(), oldBasicStatistics.getInMemoryDataSizeInBytes()), firstPresent(newBasicStatistics.getOnDiskDataSizeInBytes(), oldBasicStatistics.getOnDiskDataSizeInBytes()));
    return new PartitionStatistics(updatedBasicStatistics, newPartitionStats.getColumnStatistics());
}
Also used : PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics)

Example 29 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project boostkit-bigdata by kunpengcompute.

the class InMemoryThriftMetastore method getPartitionStatistics.

private ImmutableMap<String, PartitionStatistics> getPartitionStatistics(HiveIdentity identity, String databaseName, String tableName, Set<String> partitionNames) {
    ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
    for (String partitionName : partitionNames) {
        PartitionName partitionKey = PartitionName.partition(databaseName, tableName, partitionName);
        PartitionStatistics statistics = partitionColumnStatistics.get(partitionKey);
        if (statistics == null) {
            statistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of());
        }
        result.put(partitionName, statistics);
    }
    return result.build();
}
Also used : PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 30 with PartitionStatistics

use of io.prestosql.plugin.hive.PartitionStatistics in project boostkit-bigdata by kunpengcompute.

the class InMemoryThriftMetastore method getTableStatistics.

private PartitionStatistics getTableStatistics(HiveIdentity identity, String databaseName, String tableName) {
    SchemaTableName schemaTableName = new SchemaTableName(databaseName, tableName);
    PartitionStatistics statistics = columnStatistics.get(schemaTableName);
    if (statistics == null) {
        statistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of());
    }
    return statistics;
}
Also used : PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName)

Aggregations

PartitionStatistics (io.prestosql.plugin.hive.PartitionStatistics)62 PrestoException (io.prestosql.spi.PrestoException)32 HiveBasicStatistics (io.prestosql.plugin.hive.HiveBasicStatistics)31 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)31 HivePartition (io.prestosql.plugin.hive.HivePartition)20 HiveIdentity (io.prestosql.plugin.hive.authentication.HiveIdentity)18 TableNotFoundException (io.prestosql.spi.connector.TableNotFoundException)18 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)16 ImmutableMap (com.google.common.collect.ImmutableMap)16 PartitionNotFoundException (io.prestosql.plugin.hive.PartitionNotFoundException)16 ArrayList (java.util.ArrayList)15 HiveColumnStatistics (io.prestosql.plugin.hive.metastore.HiveColumnStatistics)14 List (java.util.List)14 OptionalLong (java.util.OptionalLong)14 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)12 ImmutableList (com.google.common.collect.ImmutableList)12 Logger (io.airlift.log.Logger)12 HiveErrorCode (io.prestosql.plugin.hive.HiveErrorCode)12 Type (io.prestosql.spi.type.Type)12 Map (java.util.Map)12