use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.
the class GlueInputConverter method convertPartition.
public static PartitionInput convertPartition(PartitionWithStatistics partitionWithStatistics) {
PartitionInput input = convertPartition(partitionWithStatistics.getPartition());
PartitionStatistics statistics = partitionWithStatistics.getStatistics();
input.setParameters(updateStatisticsParameters(input.getParameters(), statistics.getBasicStatistics()));
return input;
}
use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.
the class GlueHiveMetastore method updatePartitionStatisticsBatch.
private void updatePartitionStatisticsBatch(Table table, Map<String, Function<PartitionStatistics, PartitionStatistics>> updates) {
ImmutableList.Builder<BatchUpdatePartitionRequestEntry> partitionUpdateRequests = ImmutableList.builder();
ImmutableSet.Builder<GlueColumnStatisticsProvider.PartitionStatisticsUpdate> columnStatisticsUpdates = ImmutableSet.builder();
Map<List<String>, String> partitionValuesToName = updates.keySet().stream().collect(toImmutableMap(HiveUtil::toPartitionValues, identity()));
List<Partition> partitions = batchGetPartition(table, ImmutableList.copyOf(updates.keySet()));
Map<Partition, Map<String, HiveColumnStatistics>> statisticsPerPartition = columnStatisticsProvider.getPartitionColumnStatistics(partitions);
statisticsPerPartition.forEach((partition, columnStatistics) -> {
Function<PartitionStatistics, PartitionStatistics> update = updates.get(partitionValuesToName.get(partition.getValues()));
PartitionStatistics currentStatistics = new PartitionStatistics(getHiveBasicStatistics(partition.getParameters()), columnStatistics);
PartitionStatistics updatedStatistics = update.apply(currentStatistics);
Map<String, String> updatedStatisticsParameters = updateStatisticsParameters(partition.getParameters(), updatedStatistics.getBasicStatistics());
partition = Partition.builder(partition).setParameters(updatedStatisticsParameters).build();
Map<String, HiveColumnStatistics> updatedColumnStatistics = updatedStatistics.getColumnStatistics();
PartitionInput partitionInput = GlueInputConverter.convertPartition(partition);
partitionInput.setParameters(partition.getParameters());
partitionUpdateRequests.add(new BatchUpdatePartitionRequestEntry().withPartitionValueList(partition.getValues()).withPartitionInput(partitionInput));
columnStatisticsUpdates.add(new GlueColumnStatisticsProvider.PartitionStatisticsUpdate(partition, updatedColumnStatistics));
});
List<List<BatchUpdatePartitionRequestEntry>> partitionUpdateRequestsPartitioned = Lists.partition(partitionUpdateRequests.build(), BATCH_UPDATE_PARTITION_MAX_PAGE_SIZE);
List<Future<BatchUpdatePartitionResult>> partitionUpdateRequestsFutures = new ArrayList<>();
partitionUpdateRequestsPartitioned.forEach(partitionUpdateRequestsPartition -> {
// Update basic statistics
long startTimestamp = System.currentTimeMillis();
partitionUpdateRequestsFutures.add(glueClient.batchUpdatePartitionAsync(new BatchUpdatePartitionRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withEntries(partitionUpdateRequestsPartition), new StatsRecordingAsyncHandler(stats.getBatchUpdatePartition(), startTimestamp)));
});
try {
// Update column statistics
columnStatisticsProvider.updatePartitionStatistics(columnStatisticsUpdates.build());
// Don't block on the batch update call until the column statistics have finished updating
partitionUpdateRequestsFutures.forEach(MoreFutures::getFutureValue);
} catch (AmazonServiceException e) {
throw new TrinoException(HIVE_METASTORE_ERROR, e);
}
}
use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.
the class FileHiveMetastore method updateTableStatistics.
@Override
public synchronized void updateTableStatistics(String databaseName, String tableName, AcidTransaction transaction, Function<PartitionStatistics, PartitionStatistics> update) {
PartitionStatistics originalStatistics = getTableStatistics(databaseName, tableName);
PartitionStatistics updatedStatistics = update.apply(originalStatistics);
Path tableMetadataDirectory = getTableMetadataDirectory(databaseName, tableName);
TableMetadata tableMetadata = readSchemaFile(TABLE, tableMetadataDirectory, tableCodec).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName)));
checkVersion(tableMetadata.getWriterVersion());
TableMetadata updatedMetadata = tableMetadata.withParameters(currentVersion, updateStatisticsParameters(tableMetadata.getParameters(), updatedStatistics.getBasicStatistics())).withColumnStatistics(currentVersion, updatedStatistics.getColumnStatistics());
writeSchemaFile(TABLE, tableMetadataDirectory, tableCodec, updatedMetadata, true);
}
use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.
the class FileHiveMetastore method updatePartitionStatistics.
@Override
public synchronized void updatePartitionStatistics(Table table, Map<String, Function<PartitionStatistics, PartitionStatistics>> updates) {
updates.forEach((partitionName, update) -> {
PartitionStatistics originalStatistics = getPartitionStatisticsInternal(table, extractPartitionValues(partitionName));
PartitionStatistics updatedStatistics = update.apply(originalStatistics);
List<String> partitionValues = extractPartitionValues(partitionName);
Path partitionDirectory = getPartitionMetadataDirectory(table, partitionValues);
PartitionMetadata partitionMetadata = readSchemaFile(PARTITION, partitionDirectory, partitionCodec).orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(table.getDatabaseName(), table.getTableName()), partitionValues));
PartitionMetadata updatedMetadata = partitionMetadata.withParameters(updateStatisticsParameters(partitionMetadata.getParameters(), updatedStatistics.getBasicStatistics())).withColumnStatistics(updatedStatistics.getColumnStatistics());
writeSchemaFile(PARTITION, partitionDirectory, partitionCodec, updatedMetadata, true);
});
}
use of io.trino.plugin.hive.PartitionStatistics in project trino by trinodb.
the class AlluxioHiveMetastore method getPartitionStatistics.
@Override
public Map<String, PartitionStatistics> getPartitionStatistics(Table table, List<Partition> partitions) {
try {
List<String> dataColumns = table.getDataColumns().stream().map(Column::getName).collect(toImmutableList());
List<String> partitionColumns = table.getPartitionColumns().stream().map(Column::getName).collect(toImmutableList());
Map<String, HiveBasicStatistics> partitionBasicStatistics = partitions.stream().collect(toImmutableMap(partition -> makePartName(partitionColumns, partition.getValues()), partition -> getHiveBasicStatistics(partition.getParameters())));
Map<String, OptionalLong> partitionRowCounts = partitionBasicStatistics.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount()));
Map<String, List<ColumnStatisticsInfo>> colStatsMap = client.getPartitionColumnStatistics(table.getDatabaseName(), table.getTableName(), partitionBasicStatistics.keySet().stream().collect(toImmutableList()), dataColumns);
Map<String, Map<String, HiveColumnStatistics>> partitionColumnStatistics = colStatsMap.entrySet().stream().filter(entry -> !entry.getValue().isEmpty()).collect(toImmutableMap(Map.Entry::getKey, entry -> groupStatisticsByColumn(entry.getValue(), partitionRowCounts.getOrDefault(entry.getKey(), OptionalLong.empty()))));
ImmutableMap.Builder<String, PartitionStatistics> result = ImmutableMap.builder();
for (String partitionName : partitionBasicStatistics.keySet()) {
HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName);
Map<String, HiveColumnStatistics> columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of());
result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics));
}
return result.buildOrThrow();
} catch (Exception e) {
throw new TrinoException(HIVE_METASTORE_ERROR, e);
}
}
Aggregations