Search in sources :

Example 1 with BatchUpdatePartitionRequest

use of com.amazonaws.services.glue.model.BatchUpdatePartitionRequest in project trino by trinodb.

the class GlueHiveMetastore method updatePartitionStatisticsBatch.

private void updatePartitionStatisticsBatch(Table table, Map<String, Function<PartitionStatistics, PartitionStatistics>> updates) {
    ImmutableList.Builder<BatchUpdatePartitionRequestEntry> partitionUpdateRequests = ImmutableList.builder();
    ImmutableSet.Builder<GlueColumnStatisticsProvider.PartitionStatisticsUpdate> columnStatisticsUpdates = ImmutableSet.builder();
    Map<List<String>, String> partitionValuesToName = updates.keySet().stream().collect(toImmutableMap(HiveUtil::toPartitionValues, identity()));
    List<Partition> partitions = batchGetPartition(table, ImmutableList.copyOf(updates.keySet()));
    Map<Partition, Map<String, HiveColumnStatistics>> statisticsPerPartition = columnStatisticsProvider.getPartitionColumnStatistics(partitions);
    statisticsPerPartition.forEach((partition, columnStatistics) -> {
        Function<PartitionStatistics, PartitionStatistics> update = updates.get(partitionValuesToName.get(partition.getValues()));
        PartitionStatistics currentStatistics = new PartitionStatistics(getHiveBasicStatistics(partition.getParameters()), columnStatistics);
        PartitionStatistics updatedStatistics = update.apply(currentStatistics);
        Map<String, String> updatedStatisticsParameters = updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics());
        partition = Partition.builder(partition).setParameters(updatedStatisticsParameters).build();
        Map<String, HiveColumnStatistics> updatedColumnStatistics = updatedStatistics.getColumnStatistics();
        PartitionInput partitionInput = GlueInputConverter.convertPartition(partition);
        partitionInput.setParameters(partition.getParameters());
        partitionUpdateRequests.add(new BatchUpdatePartitionRequestEntry().withPartitionValueList(partition.getValues()).withPartitionInput(partitionInput));
        columnStatisticsUpdates.add(new GlueColumnStatisticsProvider.PartitionStatisticsUpdate(partition, updatedColumnStatistics));
    });
    List<List<BatchUpdatePartitionRequestEntry>> partitionUpdateRequestsPartitioned = Lists.partition(partitionUpdateRequests.build(), BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE);
    List<Future<BatchUpdatePartitionResult>> partitionUpdateRequestsFutures = new ArrayList<>();
    partitionUpdateRequestsPartitioned.forEach(partitionUpdateRequestsPartition -> {
        // Update basic statistics
        long startTimestamp = System.currentTimeMillis();
        partitionUpdateRequestsFutures.add(glueClient.batchUpdatePartitionAsync(new BatchUpdatePartitionRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withEntries(partitionUpdateRequestsPartition), new StatsRecordingAsyncHandler(stats.getBatchUpdatePartition(), startTimestamp)));
    });
    try {
        // Update column statistics
        columnStatisticsProvider.updatePartitionStatistics(columnStatisticsUpdates.build());
        // Don't block on the batch update call until the column statistics have finished updating
        partitionUpdateRequestsFutures.forEach(MoreFutures::getFutureValue);
    } catch (AmazonServiceException e) {
        throw new TrinoException(HIVE_METASTORE_ERROR, e);
    }
}
Also used : ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) PartitionInput(com.amazonaws.services.glue.model.PartitionInput) BatchUpdatePartitionRequestEntry(com.amazonaws.services.glue.model.BatchUpdatePartitionRequestEntry) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) PartitionValueList(com.amazonaws.services.glue.model.PartitionValueList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) GlueInputConverter.convertPartition(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition) Partition(io.trino.plugin.hive.metastore.Partition) BatchUpdatePartitionRequest(com.amazonaws.services.glue.model.BatchUpdatePartitionRequest) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) AmazonServiceException(com.amazonaws.AmazonServiceException) Future(java.util.concurrent.Future) TrinoException(io.trino.spi.TrinoException) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Collectors.toMap(java.util.stream.Collectors.toMap) ImmutableMap(com.google.common.collect.ImmutableMap) MoreFutures(io.airlift.concurrent.MoreFutures)

Aggregations

AmazonServiceException (com.amazonaws.AmazonServiceException)1 BatchUpdatePartitionRequest (com.amazonaws.services.glue.model.BatchUpdatePartitionRequest)1 BatchUpdatePartitionRequestEntry (com.amazonaws.services.glue.model.BatchUpdatePartitionRequestEntry)1 PartitionInput (com.amazonaws.services.glue.model.PartitionInput)1 PartitionValueList (com.amazonaws.services.glue.model.PartitionValueList)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)1 MoreFutures (io.airlift.concurrent.MoreFutures)1 PartitionStatistics (io.trino.plugin.hive.PartitionStatistics)1 HiveColumnStatistics (io.trino.plugin.hive.metastore.HiveColumnStatistics)1 Partition (io.trino.plugin.hive.metastore.Partition)1 GlueInputConverter.convertPartition (io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition)1 TrinoException (io.trino.spi.TrinoException)1 ArrayList (java.util.ArrayList)1 List (java.util.List)1 Map (java.util.Map)1