use of io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition in project trino by trinodb.
the class GlueHiveMetastore method addPartitions.
@Override
public void addPartitions(String databaseName, String tableName, List<PartitionWithStatistics> partitions) {
try {
stats.getCreatePartitions().call(() -> {
List<Future<BatchCreatePartitionResult>> futures = new ArrayList<>();
for (List<PartitionWithStatistics> partitionBatch : Lists.partition(partitions, BATCH_CREATE_PARTITION_MAX_PAGE_SIZE)) {
List<PartitionInput> partitionInputs = mappedCopy(partitionBatch, partition -> GlueInputConverter.convertPartition(partition));
long startTime = System.currentTimeMillis();
futures.add(glueClient.batchCreatePartitionAsync(new BatchCreatePartitionRequest().withCatalogId(catalogId).withDatabaseName(databaseName).withTableName(tableName).withPartitionInputList(partitionInputs), new StatsRecordingAsyncHandler(stats.getBatchCreatePartition(), startTime)));
}
for (Future<BatchCreatePartitionResult> future : futures) {
try {
BatchCreatePartitionResult result = future.get();
propagatePartitionErrorToTrinoException(databaseName, tableName, result.getErrors());
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new TrinoException(HIVE_METASTORE_ERROR, e);
}
}
Set<GlueColumnStatisticsProvider.PartitionStatisticsUpdate> updates = partitions.stream().map(partitionWithStatistics -> new GlueColumnStatisticsProvider.PartitionStatisticsUpdate(partitionWithStatistics.getPartition(), partitionWithStatistics.getStatistics().getColumnStatistics())).collect(toImmutableSet());
columnStatisticsProvider.updatePartitionStatistics(updates);
return null;
});
} catch (AmazonServiceException | ExecutionException e) {
throw new TrinoException(HIVE_METASTORE_ERROR, e);
}
}
use of io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter.convertPartition in project trino by trinodb.
the class GlueHiveMetastore method updatePartitionStatisticsBatch.
private void updatePartitionStatisticsBatch(Table table, Map<String, Function<PartitionStatistics, PartitionStatistics>> updates) {
ImmutableList.Builder<BatchUpdatePartitionRequestEntry> partitionUpdateRequests = ImmutableList.builder();
ImmutableSet.Builder<GlueColumnStatisticsProvider.PartitionStatisticsUpdate> columnStatisticsUpdates = ImmutableSet.builder();
Map<List<String>, String> partitionValuesToName = updates.keySet().stream().collect(toImmutableMap(HiveUtil::toPartitionValues, identity()));
List<Partition> partitions = batchGetPartition(table, ImmutableList.copyOf(updates.keySet()));
Map<Partition, Map<String, HiveColumnStatistics>> statisticsPerPartition = columnStatisticsProvider.getPartitionColumnStatistics(partitions);
statisticsPerPartition.forEach((partition, columnStatistics) -> {
Function<PartitionStatistics, PartitionStatistics> update = updates.get(partitionValuesToName.get(partition.getValues()));
PartitionStatistics currentStatistics = new PartitionStatistics(getHiveBasicStatistics(partition.getParameters()), columnStatistics);
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
Map<String, String> updatedStatisticsParameters = updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics());
partition = Partition.builder(partition).setParameters(updatedStatisticsParameters).build();
Map<String, HiveColumnStatistics> updatedColumnStatistics = updatedStatistics.getColumnStatistics();
PartitionInput partitionInput = GlueInputConverter.convertPartition(partition);
partitionInput.setParameters(partition.getParameters());
partitionUpdateRequests.add(new BatchUpdatePartitionRequestEntry().withPartitionValueList(partition.getValues()).withPartitionInput(partitionInput));
columnStatisticsUpdates.add(new GlueColumnStatisticsProvider.PartitionStatisticsUpdate(partition, updatedColumnStatistics));
});
List<List<BatchUpdatePartitionRequestEntry>> partitionUpdateRequestsPartitioned = Lists.partition(partitionUpdateRequests.build(), BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE);
List<Future<BatchUpdatePartitionResult>> partitionUpdateRequestsFutures = new ArrayList<>();
partitionUpdateRequestsPartitioned.forEach(partitionUpdateRequestsPartition -> {
// Update basic statistics
long startTimestamp = System.currentTimeMillis();
partitionUpdateRequestsFutures.add(glueClient.batchUpdatePartitionAsync(new BatchUpdatePartitionRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withEntries(partitionUpdateRequestsPartition), new StatsRecordingAsyncHandler(stats.getBatchUpdatePartition(), startTimestamp)));
});
try {
// Update column statistics
columnStatisticsProvider.updatePartitionStatistics(columnStatisticsUpdates.build());
// Don't block on the batch update call until the column statistics have finished updating
partitionUpdateRequestsFutures.forEach(MoreFutures::getFutureValue);
} catch (AmazonServiceException e) {
throw new TrinoException(HIVE_METASTORE_ERROR, e);
}
}
Aggregations