Search in sources :

Example 26 with Partition

use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.

the class CachingHiveMetastore method getPartitionStatistics.

@Override
public Map<String, PartitionStatistics> getPartitionStatistics(Table table, List<Partition> partitions) {
    HiveTableName hiveTableName = hiveTableName(table.getDatabaseName(), table.getTableName());
    List<HivePartitionName> partitionNames = partitions.stream().map(partition -> hivePartitionName(hiveTableName, makePartitionName(table, partition))).collect(toImmutableList());
    Map<HivePartitionName, PartitionStatistics> statistics = getAll(partitionStatisticsCache, partitionNames);
    return statistics.entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getPartitionName().orElseThrow(), Entry::getValue));
}
Also used : CacheStatsMBean(io.airlift.jmx.CacheStatsMBean) HivePartitionManager.extractPartitionValues(io.trino.plugin.hive.HivePartitionManager.extractPartitionValues) LoadingCache(com.google.common.cache.LoadingCache) MetastoreUtil.makePartitionName(io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName) Database(io.trino.plugin.hive.metastore.Database) Throwables.throwIfUnchecked(com.google.common.base.Throwables.throwIfUnchecked) AcidOperation(io.trino.plugin.hive.acid.AcidOperation) Duration(io.airlift.units.Duration) AcidTransactionOwner(io.trino.plugin.hive.metastore.AcidTransactionOwner) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) EvictableCacheBuilder(io.trino.collect.cache.EvictableCacheBuilder) PartitionFilter(io.trino.plugin.hive.metastore.PartitionFilter) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Maps.immutableEntry(com.google.common.collect.Maps.immutableEntry) Map(java.util.Map) HiveTableName.hiveTableName(io.trino.plugin.hive.metastore.HiveTableName.hiveTableName) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) PartitionFilter.partitionFilter(io.trino.plugin.hive.metastore.PartitionFilter.partitionFilter) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) Functions.identity(com.google.common.base.Functions.identity) Table(io.trino.plugin.hive.metastore.Table) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HivePartition(io.trino.plugin.hive.HivePartition) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HivePartitionName.hivePartitionName(io.trino.plugin.hive.metastore.HivePartitionName.hivePartitionName) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) ThreadSafe(javax.annotation.concurrent.ThreadSafe) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) SchemaTableName(io.trino.spi.connector.SchemaTableName) CacheLoader(com.google.common.cache.CacheLoader) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Entry(java.util.Map.Entry) Optional(java.util.Optional) HivePrivilegeInfo(io.trino.plugin.hive.metastore.HivePrivilegeInfo) HiveTableName(io.trino.plugin.hive.metastore.HiveTableName) Partition(io.trino.plugin.hive.metastore.Partition) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) Iterables(com.google.common.collect.Iterables) Nested(org.weakref.jmx.Nested) ImmutableSetMultimap.toImmutableSetMultimap(com.google.common.collect.ImmutableSetMultimap.toImmutableSetMultimap) HivePartitionName(io.trino.plugin.hive.metastore.HivePartitionName) Type(io.trino.spi.type.Type) PartitionNotFoundException(io.trino.plugin.hive.PartitionNotFoundException) Function(java.util.function.Function) ArrayList(java.util.ArrayList) DataOperationType(org.apache.hadoop.hive.metastore.api.DataOperationType) HiveType(io.trino.plugin.hive.HiveType) OptionalLong(java.util.OptionalLong) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) ImmutableList(com.google.common.collect.ImmutableList) Managed(org.weakref.jmx.Managed) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) UserTableKey(io.trino.plugin.hive.metastore.UserTableKey) Executor(java.util.concurrent.Executor) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) TupleDomain(io.trino.spi.predicate.TupleDomain) Throwables.throwIfInstanceOf(com.google.common.base.Throwables.throwIfInstanceOf) SetMultimap(com.google.common.collect.SetMultimap) RoleGrant(io.trino.spi.security.RoleGrant) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) TablesWithParameterCacheKey(io.trino.plugin.hive.metastore.TablesWithParameterCacheKey) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) CacheLoader.asyncReloading(com.google.common.cache.CacheLoader.asyncReloading) HivePrivilege(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) HivePartitionName(io.trino.plugin.hive.metastore.HivePartitionName) HiveTableName(io.trino.plugin.hive.metastore.HiveTableName)

Example 27 with Partition

use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.

the class CachingHiveMetastore method loadPartitionsColumnStatistics.

private Map<HivePartitionName, PartitionStatistics> loadPartitionsColumnStatistics(Iterable<? extends HivePartitionName> keys) {
    SetMultimap<HiveTableName, HivePartitionName> tablePartitions = stream(keys).collect(toImmutableSetMultimap(HivePartitionName::getHiveTableName, Function.identity()));
    ImmutableMap.Builder<HivePartitionName, PartitionStatistics> result = ImmutableMap.builder();
    tablePartitions.keySet().forEach(tableName -> {
        Set<HivePartitionName> partitionNames = tablePartitions.get(tableName);
        Set<String> partitionNameStrings = partitionNames.stream().map(partitionName -> partitionName.getPartitionName().orElseThrow()).collect(toImmutableSet());
        Table table = getExistingTable(tableName.getDatabaseName(), tableName.getTableName());
        List<Partition> partitions = getExistingPartitionsByNames(table, ImmutableList.copyOf(partitionNameStrings));
        Map<String, PartitionStatistics> statisticsByPartitionName = delegate.getPartitionStatistics(table, partitions);
        for (HivePartitionName partitionName : partitionNames) {
            String stringNameForPartition = partitionName.getPartitionName().orElseThrow();
            result.put(partitionName, statisticsByPartitionName.get(stringNameForPartition));
        }
    });
    return result.buildOrThrow();
}
Also used : CacheStatsMBean(io.airlift.jmx.CacheStatsMBean) HivePartitionManager.extractPartitionValues(io.trino.plugin.hive.HivePartitionManager.extractPartitionValues) LoadingCache(com.google.common.cache.LoadingCache) MetastoreUtil.makePartitionName(io.trino.plugin.hive.metastore.MetastoreUtil.makePartitionName) Database(io.trino.plugin.hive.metastore.Database) Throwables.throwIfUnchecked(com.google.common.base.Throwables.throwIfUnchecked) AcidOperation(io.trino.plugin.hive.acid.AcidOperation) Duration(io.airlift.units.Duration) AcidTransactionOwner(io.trino.plugin.hive.metastore.AcidTransactionOwner) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) EvictableCacheBuilder(io.trino.collect.cache.EvictableCacheBuilder) PartitionFilter(io.trino.plugin.hive.metastore.PartitionFilter) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Maps.immutableEntry(com.google.common.collect.Maps.immutableEntry) Map(java.util.Map) HiveTableName.hiveTableName(io.trino.plugin.hive.metastore.HiveTableName.hiveTableName) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) PartitionFilter.partitionFilter(io.trino.plugin.hive.metastore.PartitionFilter.partitionFilter) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) Functions.identity(com.google.common.base.Functions.identity) Table(io.trino.plugin.hive.metastore.Table) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) HivePartition(io.trino.plugin.hive.HivePartition) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HivePartitionName.hivePartitionName(io.trino.plugin.hive.metastore.HivePartitionName.hivePartitionName) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) ThreadSafe(javax.annotation.concurrent.ThreadSafe) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) SchemaTableName(io.trino.spi.connector.SchemaTableName) CacheLoader(com.google.common.cache.CacheLoader) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Entry(java.util.Map.Entry) Optional(java.util.Optional) HivePrivilegeInfo(io.trino.plugin.hive.metastore.HivePrivilegeInfo) HiveTableName(io.trino.plugin.hive.metastore.HiveTableName) Partition(io.trino.plugin.hive.metastore.Partition) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) Iterables(com.google.common.collect.Iterables) Nested(org.weakref.jmx.Nested) ImmutableSetMultimap.toImmutableSetMultimap(com.google.common.collect.ImmutableSetMultimap.toImmutableSetMultimap) HivePartitionName(io.trino.plugin.hive.metastore.HivePartitionName) Type(io.trino.spi.type.Type) PartitionNotFoundException(io.trino.plugin.hive.PartitionNotFoundException) Function(java.util.function.Function) ArrayList(java.util.ArrayList) DataOperationType(org.apache.hadoop.hive.metastore.api.DataOperationType) HiveType(io.trino.plugin.hive.HiveType) OptionalLong(java.util.OptionalLong) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) ImmutableList(com.google.common.collect.ImmutableList) Managed(org.weakref.jmx.Managed) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) Objects.requireNonNull(java.util.Objects.requireNonNull) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) UserTableKey(io.trino.plugin.hive.metastore.UserTableKey) Executor(java.util.concurrent.Executor) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) TupleDomain(io.trino.spi.predicate.TupleDomain) Throwables.throwIfInstanceOf(com.google.common.base.Throwables.throwIfInstanceOf) SetMultimap(com.google.common.collect.SetMultimap) RoleGrant(io.trino.spi.security.RoleGrant) ExecutionException(java.util.concurrent.ExecutionException) Streams.stream(com.google.common.collect.Streams.stream) TablesWithParameterCacheKey(io.trino.plugin.hive.metastore.TablesWithParameterCacheKey) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) CacheLoader.asyncReloading(com.google.common.cache.CacheLoader.asyncReloading) HivePrivilege(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege) HivePartition(io.trino.plugin.hive.HivePartition) Partition(io.trino.plugin.hive.metastore.Partition) Table(io.trino.plugin.hive.metastore.Table) HivePartitionName(io.trino.plugin.hive.metastore.HivePartitionName) HiveTableName(io.trino.plugin.hive.metastore.HiveTableName) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics)

Example 28 with Partition

use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.

the class FileHiveMetastore method alterPartition.

@Override
public synchronized void alterPartition(String databaseName, String tableName, PartitionWithStatistics partitionWithStatistics) {
    Table table = getRequiredTable(databaseName, tableName);
    Partition partition = partitionWithStatistics.getPartition();
    verifiedPartition(table, partition);
    Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partition.getValues());
    writeSchemaFile(PARTITION, partitionMetadataDirectory, partitionCodec, new PartitionMetadata(table, partitionWithStatistics), true);
}
Also used : Path(org.apache.hadoop.fs.Path) Partition(io.trino.plugin.hive.metastore.Partition) Table(io.trino.plugin.hive.metastore.Table) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable)

Example 29 with Partition

use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.

the class DefaultGlueColumnStatisticsProvider method updatePartitionStatistics.

@Override
public void updatePartitionStatistics(Set<PartitionStatisticsUpdate> partitionStatisticsUpdates) {
    Map<Partition, Map<String, HiveColumnStatistics>> currentStatistics = getPartitionColumnStatistics(partitionStatisticsUpdates.stream().map(PartitionStatisticsUpdate::getPartition).collect(toImmutableList()));
    List<CompletableFuture<Void>> updateFutures = new ArrayList<>();
    for (PartitionStatisticsUpdate update : partitionStatisticsUpdates) {
        Partition partition = update.getPartition();
        Map<String, HiveColumnStatistics> updatedColumnStatistics = update.getColumnStatistics();
        HiveBasicStatistics partitionStats = getHiveBasicStatistics(partition.getParameters());
        List<ColumnStatistics> columnStats = toGlueColumnStatistics(partition, updatedColumnStatistics, partitionStats.getRowCount()).stream().filter(this::isGlueWritable).collect(toUnmodifiableList());
        List<List<ColumnStatistics>> columnChunks = Lists.partition(columnStats, GLUE_COLUMN_WRITE_STAT_PAGE_SIZE);
        columnChunks.forEach(columnChunk -> updateFutures.add(runAsync(() -> stats.getUpdateColumnStatisticsForPartition().call(() -> glueClient.updateColumnStatisticsForPartition(new UpdateColumnStatisticsForPartitionRequest().withCatalogId(catalogId).withDatabaseName(partition.getDatabaseName()).withTableName(partition.getTableName()).withPartitionValues(partition.getValues()).withColumnStatisticsList(columnChunk))), writeExecutor)));
        Set<String> removedStatistics = difference(currentStatistics.get(partition).keySet(), updatedColumnStatistics.keySet());
        removedStatistics.forEach(column -> updateFutures.add(runAsync(() -> stats.getDeleteColumnStatisticsForPartition().call(() -> glueClient.deleteColumnStatisticsForPartition(new DeleteColumnStatisticsForPartitionRequest().withCatalogId(catalogId).withDatabaseName(partition.getDatabaseName()).withTableName(partition.getTableName()).withPartitionValues(partition.getValues()).withColumnName(column))), writeExecutor)));
    }
    try {
        getFutureValue(allOf(updateFutures.toArray(CompletableFuture[]::new)));
    } catch (RuntimeException ex) {
        if (ex.getCause() != null && ex.getCause() instanceof EntityNotFoundException) {
            throw new TrinoException(HIVE_PARTITION_NOT_FOUND, ex.getCause());
        }
        throw new TrinoException(HIVE_METASTORE_ERROR, ex);
    }
}
Also used : GlueStatConverter.fromGlueColumnStatistics(io.trino.plugin.hive.metastore.glue.converter.GlueStatConverter.fromGlueColumnStatistics) ColumnStatistics(com.amazonaws.services.glue.model.ColumnStatistics) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) GlueStatConverter.toGlueColumnStatistics(io.trino.plugin.hive.metastore.glue.converter.GlueStatConverter.toGlueColumnStatistics) Partition(io.trino.plugin.hive.metastore.Partition) ArrayList(java.util.ArrayList) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) DeleteColumnStatisticsForPartitionRequest(com.amazonaws.services.glue.model.DeleteColumnStatisticsForPartitionRequest) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) CompletableFuture(java.util.concurrent.CompletableFuture) UpdateColumnStatisticsForPartitionRequest(com.amazonaws.services.glue.model.UpdateColumnStatisticsForPartitionRequest) TrinoException(io.trino.spi.TrinoException) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 30 with Partition

use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.

the class DefaultGlueColumnStatisticsProvider method getPartitionColumnStatistics.

@Override
public Map<Partition, Map<String, HiveColumnStatistics>> getPartitionColumnStatistics(Collection<Partition> partitions) {
    Map<Partition, List<CompletableFuture<GetColumnStatisticsForPartitionResult>>> resultsForPartition = new HashMap<>();
    for (Partition partition : partitions) {
        ImmutableList.Builder<CompletableFuture<GetColumnStatisticsForPartitionResult>> futures = ImmutableList.builder();
        List<List<Column>> columnChunks = Lists.partition(partition.getColumns(), GLUE_COLUMN_READ_STAT_PAGE_SIZE);
        for (List<Column> partialPartitionColumns : columnChunks) {
            List<String> columnsNames = partialPartitionColumns.stream().map(Column::getName).collect(toImmutableList());
            GetColumnStatisticsForPartitionRequest request = new GetColumnStatisticsForPartitionRequest().withCatalogId(catalogId).withDatabaseName(partition.getDatabaseName()).withTableName(partition.getTableName()).withColumnNames(columnsNames).withPartitionValues(partition.getValues());
            futures.add(supplyAsync(() -> stats.getGetColumnStatisticsForPartition().call(() -> glueClient.getColumnStatisticsForPartition(request)), readExecutor));
        }
        resultsForPartition.put(partition, futures.build());
    }
    try {
        ImmutableMap.Builder<Partition, Map<String, HiveColumnStatistics>> partitionStatistics = ImmutableMap.builder();
        resultsForPartition.forEach((partition, futures) -> {
            HiveBasicStatistics tableStatistics = getHiveBasicStatistics(partition.getParameters());
            ImmutableMap.Builder<String, HiveColumnStatistics> columnStatsMapBuilder = ImmutableMap.builder();
            for (CompletableFuture<GetColumnStatisticsForPartitionResult> getColumnStatisticsResultFuture : futures) {
                GetColumnStatisticsForPartitionResult getColumnStatisticsResult = getFutureValue(getColumnStatisticsResultFuture);
                getColumnStatisticsResult.getColumnStatisticsList().forEach(columnStatistics -> columnStatsMapBuilder.put(columnStatistics.getColumnName(), fromGlueColumnStatistics(columnStatistics.getStatisticsData(), tableStatistics.getRowCount())));
            }
            partitionStatistics.put(partition, columnStatsMapBuilder.buildOrThrow());
        });
        return partitionStatistics.buildOrThrow();
    } catch (RuntimeException ex) {
        if (ex.getCause() != null && ex.getCause() instanceof EntityNotFoundException) {
            throw new TrinoException(HIVE_PARTITION_NOT_FOUND, ex.getCause());
        }
        throw new TrinoException(HIVE_METASTORE_ERROR, ex);
    }
}
Also used : Partition(io.trino.plugin.hive.metastore.Partition) HashMap(java.util.HashMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) ImmutableMap(com.google.common.collect.ImmutableMap) GetColumnStatisticsForPartitionRequest(com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionRequest) CompletableFuture(java.util.concurrent.CompletableFuture) GetColumnStatisticsForPartitionResult(com.amazonaws.services.glue.model.GetColumnStatisticsForPartitionResult) Column(io.trino.plugin.hive.metastore.Column) TrinoException(io.trino.spi.TrinoException) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) ArrayList(java.util.ArrayList) Collectors.toUnmodifiableList(java.util.stream.Collectors.toUnmodifiableList) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Aggregations

Partition (io.trino.plugin.hive.metastore.Partition)42 Table (io.trino.plugin.hive.metastore.Table)33 TrinoException (io.trino.spi.TrinoException)31 List (java.util.List)28 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)27 ImmutableMap (com.google.common.collect.ImmutableMap)27 Map (java.util.Map)26 Optional (java.util.Optional)26 ImmutableList (com.google.common.collect.ImmutableList)25 ArrayList (java.util.ArrayList)23 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)21 TableNotFoundException (io.trino.spi.connector.TableNotFoundException)21 SchemaTableName (io.trino.spi.connector.SchemaTableName)20 Objects.requireNonNull (java.util.Objects.requireNonNull)20 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)19 Column (io.trino.plugin.hive.metastore.Column)19 HivePrincipal (io.trino.plugin.hive.metastore.HivePrincipal)19 TupleDomain (io.trino.spi.predicate.TupleDomain)19 Type (io.trino.spi.type.Type)19 OptionalLong (java.util.OptionalLong)19