use of com.amazonaws.services.glue.model.BatchUpdatePartitionRequestEntry in project trino by trinodb.
the class GlueHiveMetastore method updatePartitionStatisticsBatch.
private void updatePartitionStatisticsBatch(Table table, Map<String, Function<PartitionStatistics, PartitionStatistics>> updates) {
ImmutableList.Builder<BatchUpdatePartitionRequestEntry> partitionUpdateRequests = ImmutableList.builder();
ImmutableSet.Builder<GlueColumnStatisticsProvider.PartitionStatisticsUpdate> columnStatisticsUpdates = ImmutableSet.builder();
Map<List<String>, String> partitionValuesToName = updates.keySet().stream().collect(toImmutableMap(HiveUtil::toPartitionValues, identity()));
List<Partition> partitions = batchGetPartition(table, ImmutableList.copyOf(updates.keySet()));
Map<Partition, Map<String, HiveColumnStatistics>> statisticsPerPartition = columnStatisticsProvider.getPartitionColumnStatistics(partitions);
statisticsPerPartition.forEach((partition, columnStatistics) -> {
Function<PartitionStatistics, PartitionStatistics> update = updates.get(partitionValuesToName.get(partition.getValues()));
PartitionStatistics currentStatistics = new PartitionStatistics(getHiveBasicStatistics(partition.getParameters()), columnStatistics);
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
Map<String, String> updatedStatisticsParameters = updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics());
partition = Partition.builder(partition).setParameters(updatedStatisticsParameters).build();
Map<String, HiveColumnStatistics> updatedColumnStatistics = updatedStatistics.getColumnStatistics();
PartitionInput partitionInput = GlueInputConverter.convertPartition(partition);
partitionInput.setParameters(partition.getParameters());
partitionUpdateRequests.add(new BatchUpdatePartitionRequestEntry().withPartitionValueList(partition.getValues()).withPartitionInput(partitionInput));
columnStatisticsUpdates.add(new GlueColumnStatisticsProvider.PartitionStatisticsUpdate(partition, updatedColumnStatistics));
});
List<List<BatchUpdatePartitionRequestEntry>> partitionUpdateRequestsPartitioned = Lists.partition(partitionUpdateRequests.build(), BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE);
List<Future<BatchUpdatePartitionResult>> partitionUpdateRequestsFutures = new ArrayList<>();
partitionUpdateRequestsPartitioned.forEach(partitionUpdateRequestsPartition -> {
// Update basic statistics
long startTimestamp = System.currentTimeMillis();
partitionUpdateRequestsFutures.add(glueClient.batchUpdatePartitionAsync(new BatchUpdatePartitionRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withEntries(partitionUpdateRequestsPartition), new StatsRecordingAsyncHandler(stats.getBatchUpdatePartition(), startTimestamp)));
});
try {
// Update column statistics
columnStatisticsProvider.updatePartitionStatistics(columnStatisticsUpdates.build());
// Don't block on the batch update call until the column statistics have finished updating
partitionUpdateRequestsFutures.forEach(MoreFutures::getFutureValue);
} catch (AmazonServiceException e) {
throw new TrinoException(HIVE_METASTORE_ERROR, e);
}
}
Aggregations