Search in sources :

Example 26 with PartitionStatistics

use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.

the class GlueInputConverter method convertPartition.

public static PartitionInput convertPartition(PartitionWithStatistics partitionWithStatistics) {
    PartitionInput input = convertPartition(partitionWithStatistics.getPartition());
    PartitionStatistics statistics = partitionWithStatistics.getStatistics();
    input.setParameters(updateStatisticsParameters(input.getParameters(), statistics.getBasicStatistics()));
    return input;
}
Also used : PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) PartitionInput(com.amazonaws.services.glue.model.PartitionInput)

Example 27 with PartitionStatistics

use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.

the class GlueHiveMetastore method updatePartitionStatisticsBatch.

private void updatePartitionStatisticsBatch(Table table, Map<String, Function<PartitionStatistics, PartitionStatistics>> updates) {
    ImmutableList.Builder<BatchUpdatePartitionRequestEntry> partitionUpdateRequests = ImmutableList.builder();
    ImmutableSet.Builder<GlueColumnStatisticsProvider.PartitionStatisticsUpdate> columnStatisticsUpdates = ImmutableSet.builder();
    Map<List<String>, String> partitionValuesToName = updates.keySet().stream().collect(toImmutableMap(HiveUtil::toPartitionValues, identity()));
    List<Partition> partitions = batchGetPartition(table, ImmutableList.copyOf(updates.keySet()));
    Map<Partition, Map<String, HiveColumnStatistics>> statisticsPerPartition = columnStatisticsProvider.getPartitionColumnStatistics(partitions);
    statisticsPerPartition.forEach((partition, columnStatistics) -> {
        Function<PartitionStatistics, PartitionStatistics> update = updates.get(partitionValuesToName.get(partition.getValues()));
        PartitionStatistics currentStatistics = new PartitionStatistics(getHiveBasicStatistics(partition.getParameters()), columnStatistics);
        PartitionStatistics updatedStatistics = update.apply(currentStatistics);
        Map<String, String> updatedStatisticsParameters = updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics());
        partition = Partition.builder(partition).setParameters(updatedStatisticsParameters).build();
        Map<String, HiveColumnStatistics> updatedColumnStatistics = updatedStatistics.getColumnStatistics();
        PartitionInput partitionInput = GlueInputConverter.convertPartition(partition);
        partitionInput.setParameters(partition.getParameters());
        partitionUpdateRequests.add(new BatchUpdatePartitionRequestEntry().withPartitionValueList(partition.getValues()).withPartitionInput(partitionInput));
        columnStatisticsUpdates.add(new GlueColumnStatisticsProvider.PartitionStatisticsUpdate(partition, updatedColumnStatistics));
    });
    List<List<BatchUpdatePartitionRequestEntry>> partitionUpdateRequestsPartitioned = Lists.partition(partitionUpdateRequests.build(), BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE);
    List<Future<BatchUpdatePartitionResult>> partitionUpdateRequestsFutures = new ArrayList<>();
    partitionUpdateRequestsPartitioned.forEach(partitionUpdateRequestsPartition -> {
        // Update basic statistics
        long startTimestamp = System.currentTimeMillis();
        partitionUpdateRequestsFutures.add(glueClient.batchUpdatePartitionAsync(new BatchUpdatePartitionRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withEntries(partitionUpdateRequestsPartition), new StatsRecordingAsyncHandler(stats.getBatchUpdatePartition(), startTimestamp)));
    });
    try {
        // Update column statistics
        columnStatisticsProvider.updatePartitionStatistics(columnStatisticsUpdates.build());
        // Don't block on the batch update call until the column statistics have finished updating
        partitionUpdateRequestsFutures.forEach(MoreFutures::getFutureValue);
    } catch (AmazonServiceException e) {
        throw new TrinoException(HIVE_METASTORE_ERROR, e);
    }
}
Also used : ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) PartitionInput(com.amazonaws.services.glue.model.PartitionInput) BatchUpdatePartitionRequestEntry(com.amazonaws.services.glue.model.BatchUpdatePartitionRequestEntry) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) PartitionValueList(com.amazonaws.services.glue.model.PartitionValueList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) GlueInputConverter.convertPartition(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition) Partition(io.trino.plugin.hive.metastore.Partition) BatchUpdatePartitionRequest(com.amazonaws.services.glue.model.BatchUpdatePartitionRequest) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) AmazonServiceException(com.amazonaws.AmazonServiceException) Future(java.util.concurrent.Future) TrinoException(io.trino.spi.TrinoException) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Collectors.toMap(java.util.stream.Collectors.toMap) ImmutableMap(com.google.common.collect.ImmutableMap) MoreFutures(io.airlift.concurrent.MoreFutures)

Example 28 with PartitionStatistics

use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.

the class FileHiveMetastore method updateTableStatistics.

@Override
public synchronized void updateTableStatistics(String databaseName, String tableName, AcidTransaction transaction, Function<PartitionStatistics, PartitionStatistics> update) {
    PartitionStatistics originalStatistics = getTableStatistics(databaseName, tableName);
    PartitionStatistics updatedStatistics = update.apply(originalStatistics);
    Path tableMetadataDirectory = getTableMetadataDirectory(databaseName, tableName);
    TableMetadata tableMetadata = readSchemaFile(TABLE, tableMetadataDirectory, tableCodec).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
    checkVersion(tableMetadata.getWriterVersion());
    TableMetadata updatedMetadata = tableMetadata.withParameters(currentVersion, updateStatisticsParameters(tableMetadata.getParameters(), updatedStatistics.getBasicStatistics())).withColumnStatistics(currentVersion, updatedStatistics.getColumnStatistics());
    writeSchemaFile(TABLE, tableMetadataDirectory, tableCodec, updatedMetadata, true);
}
Also used : Path(org.apache.hadoop.fs.Path) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) SchemaTableName(io.trino.spi.connector.SchemaTableName)

Example 29 with PartitionStatistics

use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.

the class FileHiveMetastore method updatePartitionStatistics.

@Override
public synchronized void updatePartitionStatistics(Table table, Map<String, Function<PartitionStatistics, PartitionStatistics>> updates) {
    updates.forEach((partitionName, update) -> {
        PartitionStatistics originalStatistics = getPartitionStatisticsInternal(table, extractPartitionValues(partitionName));
        PartitionStatistics updatedStatistics = update.apply(originalStatistics);
        List<String> partitionValues = extractPartitionValues(partitionName);
        Path partitionDirectory = getPartitionMetadataDirectory(table, partitionValues);
        PartitionMetadata partitionMetadata = readSchemaFile(PARTITION, partitionDirectory, partitionCodec).orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(table.getDatabaseName(), table.getTableName()), partitionValues));
        PartitionMetadata updatedMetadata = partitionMetadata.withParameters(updateStatisticsParameters(partitionMetadata.getParameters(), updatedStatistics.getBasicStatistics())).withColumnStatistics(updatedStatistics.getColumnStatistics());
        writeSchemaFile(PARTITION, partitionDirectory, partitionCodec, updatedMetadata, true);
    });
}
Also used : Path(org.apache.hadoop.fs.Path) PartitionNotFoundException(io.trino.plugin.hive.PartitionNotFoundException) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) SchemaTableName(io.trino.spi.connector.SchemaTableName)

Example 30 with PartitionStatistics

use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.

the class AlluxioHiveMetastore method getPartitionStatistics.

@Override
public Map<String, PartitionStatistics> getPartitionStatistics(Table table, List<Partition> partitions) {
    try {
        List<String> dataColumns = table.getDataColumns().stream().map(Column::getName).collect(toImmutableList());
        List<String> partitionColumns = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
        Map<String, HiveBasicStatistics> partitionBasicStatistics = partitions.stream().collect(toImmutableMap(partition -> makePartName(partitionColumns, partition.getValues()), partition -> getHiveBasicStatistics(partition.getParameters())));
        Map<String, OptionalLong> partitionRowCounts = partitionBasicStatistics.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount()));
        Map<String, List<ColumnStatisticsInfo>> colStatsMap = client.getPartitionColumnStatistics(table.getDatabaseName(), table.getTableName(), partitionBasicStatistics.keySet().stream().collect(toImmutableList()), dataColumns);
        Map<String, Map<String, HiveColumnStatistics>> partitionColumnStatistics = colStatsMap.entrySet().stream().filter(entry -> !entry.getValue().isEmpty()).collect(toImmutableMap(Map.Entry::getKey, entry -> groupStatisticsByColumn(entry.getValue(), partitionRowCounts.getOrDefault(entry.getKey(), OptionalLong.empty()))));
        ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
        for (String partitionName : partitionBasicStatistics.keySet()) {
            HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName);
            Map<String, HiveColumnStatistics> columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of());
            result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics));
        }
        return result.buildOrThrow();
    } catch (Exception e) {
        throw new TrinoException(HIVE_METASTORE_ERROR, e);
    }
}
Also used : ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) Type(io.trino.spi.type.Type) Database(io.trino.plugin.hive.metastore.Database) Function(java.util.function.Function) ColumnStatisticType(io.trino.spi.statistics.ColumnStatisticType) ArrayList(java.util.ArrayList) HiveType(io.trino.plugin.hive.HiveType) OptionalLong(java.util.OptionalLong) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ThriftMetastoreUtil(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) Column(io.trino.plugin.hive.metastore.Column) HIVE_METASTORE_ERROR(io.trino.plugin.hive.HiveErrorCode.HIVE_METASTORE_ERROR) TableMasterClient(alluxio.client.table.TableMasterClient) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Constraint(alluxio.grpc.table.Constraint) AlluxioStatusException(alluxio.exception.status.AlluxioStatusException) TableInfo(alluxio.grpc.table.TableInfo) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) AcidTransaction(io.trino.plugin.hive.acid.AcidTransaction) Table(io.trino.plugin.hive.metastore.Table) ImmutableMap(com.google.common.collect.ImmutableMap) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TrinoException(io.trino.spi.TrinoException) TupleDomain(io.trino.spi.predicate.TupleDomain) NotFoundException(alluxio.exception.status.NotFoundException) Collectors(java.util.stream.Collectors) RoleGrant(io.trino.spi.security.RoleGrant) PartitionInfo(alluxio.grpc.table.layout.hive.PartitionInfo) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Optional(java.util.Optional) HivePrivilegeInfo(io.trino.plugin.hive.metastore.HivePrivilegeInfo) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) Collections(java.util.Collections) HivePrivilege(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege) Partition(io.trino.plugin.hive.metastore.Partition) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ThriftMetastoreUtil.getHiveBasicStatistics(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) AlluxioStatusException(alluxio.exception.status.AlluxioStatusException) TrinoException(io.trino.spi.TrinoException) NotFoundException(alluxio.exception.status.NotFoundException) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) OptionalLong(java.util.OptionalLong) TrinoException(io.trino.spi.TrinoException) ArrayList(java.util.ArrayList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap)

Aggregations

PartitionStatistics (io.trino.plugin.hive.PartitionStatistics)36 SchemaTableName (io.trino.spi.connector.SchemaTableName)21 HiveBasicStatistics (io.trino.plugin.hive.HiveBasicStatistics)16 HiveColumnStatistics (io.trino.plugin.hive.metastore.HiveColumnStatistics)16 TrinoException (io.trino.spi.TrinoException)15 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)13 ImmutableMap (com.google.common.collect.ImmutableMap)13 List (java.util.List)12 Map (java.util.Map)11 OptionalLong (java.util.OptionalLong)11 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)10 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)10 TableNotFoundException (io.trino.spi.connector.TableNotFoundException)10 Type (io.trino.spi.type.Type)10 ArrayList (java.util.ArrayList)10 Objects.requireNonNull (java.util.Objects.requireNonNull)10 Optional (java.util.Optional)10 Set (java.util.Set)10 ImmutableList (com.google.common.collect.ImmutableList)9 PartitionNotFoundException (io.trino.plugin.hive.PartitionNotFoundException)8