use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.
the class CachingHiveMetastore method getPartitionStatistics.
@Override
public Map<String, PartitionStatistics> getPartitionStatistics(Table table, List<Partition> partitions) {
HiveTableName hiveTableName = hiveTableName(table.getDatabaseName(), table.getTableName());
List<HivePartitionName> partitionNames = partitions.stream().map(partition -> hivePartitionName(hiveTableName, makePartitionName(table, partition))).collect(toImmutableList());
Map<HivePartitionName, PartitionStatistics> statistics = getAll(partitionStatisticsCache, partitionNames);
return statistics.entrySet().stream().collect(toImmutableMap(entry -> entry.getKey().getPartitionName().orElseThrow(), Entry::getValue));
}
use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.
the class CachingHiveMetastore method loadPartitionsColumnStatistics.
private Map<HivePartitionName, PartitionStatistics> loadPartitionsColumnStatistics(Iterable<? extends HivePartitionName> keys) {
SetMultimap<HiveTableName, HivePartitionName> tablePartitions = stream(keys).collect(toImmutableSetMultimap(HivePartitionName::getHiveTableName, Function.identity()));
ImmutableMap.Builder<HivePartitionName, PartitionStatistics> result = ImmutableMap.builder();
tablePartitions.keySet().forEach(tableName -> {
Set<HivePartitionName> partitionNames = tablePartitions.get(tableName);
Set<String> partitionNameStrings = partitionNames.stream().map(partitionName -> partitionName.getPartitionName().orElseThrow()).collect(toImmutableSet());
Table table = getExistingTable(tableName.getDatabaseName(), tableName.getTableName());
List<Partition> partitions = getExistingPartitionsByNames(table, ImmutableList.copyOf(partitionNameStrings));
Map<String, PartitionStatistics> statisticsByPartitionName = delegate.getPartitionStatistics(table, partitions);
for (HivePartitionName partitionName : partitionNames) {
String stringNameForPartition = partitionName.getPartitionName().orElseThrow();
result.put(partitionName, statisticsByPartitionName.get(stringNameForPartition));
}
});
return result.buildOrThrow();
}
use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.
the class FileHiveMetastore method alterPartition.
@Override
public synchronized void alterPartition(String databaseName, String tableName, PartitionWithStatistics partitionWithStatistics) {
Table table = getRequiredTable(databaseName, tableName);
Partition partition = partitionWithStatistics.getPartition();
verifiedPartition(table, partition);
Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partition.getValues());
writeSchemaFile(PARTITION, partitionMetadataDirectory, partitionCodec, new PartitionMetadata(table, partitionWithStatistics), true);
}
use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.
the class DefaultGlueColumnStatisticsProvider method updatePartitionStatistics.
@Override
public void updatePartitionStatistics(Set<PartitionStatisticsUpdate> partitionStatisticsUpdates) {
Map<Partition, Map<String, HiveColumnStatistics>> currentStatistics = getPartitionColumnStatistics(partitionStatisticsUpdates.stream().map(PartitionStatisticsUpdate::getPartition).collect(toImmutableList()));
List<CompletableFuture<Void>> updateFutures = new ArrayList<>();
for (PartitionStatisticsUpdate update : partitionStatisticsUpdates) {
Partition partition = update.getPartition();
Map<String, HiveColumnStatistics> updatedColumnStatistics = update.getColumnStatistics();
HiveBasicStatistics partitionStats = getHiveBasicStatistics(partition.getParameters());
List<ColumnStatistics> columnStats = toGlueColumnStatistics(partition, updatedColumnStatistics, partitionStats.getRowCount()).stream().filter(this::isGlueWritable).collect(toUnmodifiableList());
List<List<ColumnStatistics>> columnChunks = Lists.partition(columnStats, GLUE_COLUMN_WRITE_STAT_PAGE_SIZE);
columnChunks.forEach(columnChunk -> updateFutures.add(runAsync(() -> stats.getUpdateColumnStatisticsForPartition().call(() -> glueClient.updateColumnStatisticsForPartition(new UpdateColumnStatisticsForPartitionRequest().withCatalogId(catalogId).withDatabaseName(partition.getDatabaseName()).withTableName(partition.getTableName()).withPartitionValues(partition.getValues()).withColumnStatisticsList(columnChunk))), writeExecutor)));
Set<String> removedStatistics = difference(currentStatistics.get(partition).keySet(), updatedColumnStatistics.keySet());
removedStatistics.forEach(column -> updateFutures.add(runAsync(() -> stats.getDeleteColumnStatisticsForPartition().call(() -> glueClient.deleteColumnStatisticsForPartition(new DeleteColumnStatisticsForPartitionRequest().withCatalogId(catalogId).withDatabaseName(partition.getDatabaseName()).withTableName(partition.getTableName()).withPartitionValues(partition.getValues()).withColumnName(column))), writeExecutor)));
}
try {
getFutureValue(allOf(updateFutures.toArray(CompletableFuture[]::new)));
} catch (RuntimeException ex) {
if (ex.getCause() != null && ex.getCause() instanceof EntityNotFoundException) {
throw new TrinoException(HIVE_PARTITION_NOT_FOUND, ex.getCause());
}
throw new TrinoException(HIVE_METASTORE_ERROR, ex);
}
}
use of io.trino.plugin.hive.metastore.Partition in project trino by trinodb.
the class DefaultGlueColumnStatisticsProvider method getPartitionColumnStatistics.
@Override
public Map<Partition, Map<String, HiveColumnStatistics>> getPartitionColumnStatistics(Collection<Partition> partitions) {
Map<Partition, List<CompletableFuture<GetColumnStatisticsForPartitionResult>>> resultsForPartition = new HashMap<>();
for (Partition partition : partitions) {
ImmutableList.Builder<CompletableFuture<GetColumnStatisticsForPartitionResult>> futures = ImmutableList.builder();
List<List<Column>> columnChunks = Lists.partition(partition.getColumns(), GLUE_COLUMN_READ_STAT_PAGE_SIZE);
for (List<Column> partialPartitionColumns : columnChunks) {
List<String> columnsNames = partialPartitionColumns.stream().map(Column::getName).collect(toImmutableList());
GetColumnStatisticsForPartitionRequest request = new GetColumnStatisticsForPartitionRequest().withCatalogId(catalogId).withDatabaseName(partition.getDatabaseName()).withTableName(partition.getTableName()).withColumnNames(columnsNames).withPartitionValues(partition.getValues());
futures.add(supplyAsync(() -> stats.getGetColumnStatisticsForPartition().call(() -> glueClient.getColumnStatisticsForPartition(request)), readExecutor));
}
resultsForPartition.put(partition, futures.build());
}
try {
ImmutableMap.Builder<Partition, Map<String, HiveColumnStatistics>> partitionStatistics = ImmutableMap.builder();
resultsForPartition.forEach((partition, futures) -> {
HiveBasicStatistics tableStatistics = getHiveBasicStatistics(partition.getParameters());
ImmutableMap.Builder<String, HiveColumnStatistics> columnStatsMapBuilder = ImmutableMap.builder();
for (CompletableFuture<GetColumnStatisticsForPartitionResult> getColumnStatisticsResultFuture : futures) {
GetColumnStatisticsForPartitionResult getColumnStatisticsResult = getFutureValue(getColumnStatisticsResultFuture);
getColumnStatisticsResult.getColumnStatisticsList().forEach(columnStatistics -> columnStatsMapBuilder.put(columnStatistics.getColumnName(), fromGlueColumnStatistics(columnStatistics.getStatisticsData(), tableStatistics.getRowCount())));
}
partitionStatistics.put(partition, columnStatsMapBuilder.buildOrThrow());
});
return partitionStatistics.buildOrThrow();
} catch (RuntimeException ex) {
if (ex.getCause() != null && ex.getCause() instanceof EntityNotFoundException) {
throw new TrinoException(HIVE_PARTITION_NOT_FOUND, ex.getCause());
}
throw new TrinoException(HIVE_METASTORE_ERROR, ex);
}
}
Aggregations