use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.
the class FileHiveMetastore method getPartitionStatisticsInternal.
private synchronized PartitionStatistics getPartitionStatisticsInternal(Table table, List<String> partitionValues) {
Path partitionDirectory = getPartitionMetadataDirectory(table, ImmutableList.copyOf(partitionValues));
PartitionMetadata partitionMetadata = readSchemaFile(PARTITION, partitionDirectory, partitionCodec).orElseThrow(() -> new PartitionNotFoundException(table.getSchemaTableName(), partitionValues));
HiveBasicStatistics basicStatistics = getHiveBasicStatistics(partitionMetadata.getParameters());
return new PartitionStatistics(basicStatistics, partitionMetadata.getColumnStatistics());
}
use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.
the class DefaultGlueColumnStatisticsProvider method getTableColumnStatistics.
@Override
public Map<String, HiveColumnStatistics> getTableColumnStatistics(Table table) {
try {
List<String> columnNames = getAllColumns(table);
List<List<String>> columnChunks = Lists.partition(columnNames, GLUE_COLUMN_READ_STAT_PAGE_SIZE);
List<CompletableFuture<GetColumnStatisticsForTableResult>> getStatsFutures = columnChunks.stream().map(partialColumns -> supplyAsync(() -> {
GetColumnStatisticsForTableRequest request = new GetColumnStatisticsForTableRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withColumnNames(partialColumns);
return stats.getGetColumnStatisticsForTable().call(() -> glueClient.getColumnStatisticsForTable(request));
}, readExecutor)).collect(toImmutableList());
HiveBasicStatistics tableStatistics = getHiveBasicStatistics(table.getParameters());
ImmutableMap.Builder<String, HiveColumnStatistics> columnStatsMapBuilder = ImmutableMap.builder();
for (CompletableFuture<GetColumnStatisticsForTableResult> future : getStatsFutures) {
GetColumnStatisticsForTableResult tableColumnsStats = getFutureValue(future, TrinoException.class);
for (ColumnStatistics columnStatistics : tableColumnsStats.getColumnStatisticsList()) {
columnStatsMapBuilder.put(columnStatistics.getColumnName(), fromGlueColumnStatistics(columnStatistics.getStatisticsData(), tableStatistics.getRowCount()));
}
}
return columnStatsMapBuilder.buildOrThrow();
} catch (RuntimeException ex) {
throw new TrinoException(HIVE_METASTORE_ERROR, ex);
}
}
use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.
the class DefaultGlueColumnStatisticsProvider method updatePartitionStatistics.
@Override
public void updatePartitionStatistics(Set<PartitionStatisticsUpdate> partitionStatisticsUpdates) {
Map<Partition, Map<String, HiveColumnStatistics>> currentStatistics = getPartitionColumnStatistics(partitionStatisticsUpdates.stream().map(PartitionStatisticsUpdate::getPartition).collect(toImmutableList()));
List<CompletableFuture<Void>> updateFutures = new ArrayList<>();
for (PartitionStatisticsUpdate update : partitionStatisticsUpdates) {
Partition partition = update.getPartition();
Map<String, HiveColumnStatistics> updatedColumnStatistics = update.getColumnStatistics();
HiveBasicStatistics partitionStats = getHiveBasicStatistics(partition.getParameters());
List<ColumnStatistics> columnStats = toGlueColumnStatistics(partition, updatedColumnStatistics, partitionStats.getRowCount()).stream().filter(this::isGlueWritable).collect(toUnmodifiableList());
List<List<ColumnStatistics>> columnChunks = Lists.partition(columnStats, GLUE_COLUMN_WRITE_STAT_PAGE_SIZE);
columnChunks.forEach(columnChunk -> updateFutures.add(runAsync(() -> stats.getUpdateColumnStatisticsForPartition().call(() -> glueClient.updateColumnStatisticsForPartition(new UpdateColumnStatisticsForPartitionRequest().withCatalogId(catalogId).withDatabaseName(partition.getDatabaseName()).withTableName(partition.getTableName()).withPartitionValues(partition.getValues()).withColumnStatisticsList(columnChunk))), writeExecutor)));
Set<String> removedStatistics = difference(currentStatistics.get(partition).keySet(), updatedColumnStatistics.keySet());
removedStatistics.forEach(column -> updateFutures.add(runAsync(() -> stats.getDeleteColumnStatisticsForPartition().call(() -> glueClient.deleteColumnStatisticsForPartition(new DeleteColumnStatisticsForPartitionRequest().withCatalogId(catalogId).withDatabaseName(partition.getDatabaseName()).withTableName(partition.getTableName()).withPartitionValues(partition.getValues()).withColumnName(column))), writeExecutor)));
}
try {
getFutureValue(allOf(updateFutures.toArray(CompletableFuture[]::new)));
} catch (RuntimeException ex) {
if (ex.getCause() != null && ex.getCause() instanceof EntityNotFoundException) {
throw new TrinoException(HIVE_PARTITION_NOT_FOUND, ex.getCause());
}
throw new TrinoException(HIVE_METASTORE_ERROR, ex);
}
}
use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.
the class DefaultGlueColumnStatisticsProvider method getPartitionColumnStatistics.
@Override
public Map<Partition, Map<String, HiveColumnStatistics>> getPartitionColumnStatistics(Collection<Partition> partitions) {
Map<Partition, List<CompletableFuture<GetColumnStatisticsForPartitionResult>>> resultsForPartition = new HashMap<>();
for (Partition partition : partitions) {
ImmutableList.Builder<CompletableFuture<GetColumnStatisticsForPartitionResult>> futures = ImmutableList.builder();
List<List<Column>> columnChunks = Lists.partition(partition.getColumns(), GLUE_COLUMN_READ_STAT_PAGE_SIZE);
for (List<Column> partialPartitionColumns : columnChunks) {
List<String> columnsNames = partialPartitionColumns.stream().map(Column::getName).collect(toImmutableList());
GetColumnStatisticsForPartitionRequest request = new GetColumnStatisticsForPartitionRequest().withCatalogId(catalogId).withDatabaseName(partition.getDatabaseName()).withTableName(partition.getTableName()).withColumnNames(columnsNames).withPartitionValues(partition.getValues());
futures.add(supplyAsync(() -> stats.getGetColumnStatisticsForPartition().call(() -> glueClient.getColumnStatisticsForPartition(request)), readExecutor));
}
resultsForPartition.put(partition, futures.build());
}
try {
ImmutableMap.Builder<Partition, Map<String, HiveColumnStatistics>> partitionStatistics = ImmutableMap.builder();
resultsForPartition.forEach((partition, futures) -> {
HiveBasicStatistics tableStatistics = getHiveBasicStatistics(partition.getParameters());
ImmutableMap.Builder<String, HiveColumnStatistics> columnStatsMapBuilder = ImmutableMap.builder();
for (CompletableFuture<GetColumnStatisticsForPartitionResult> getColumnStatisticsResultFuture : futures) {
GetColumnStatisticsForPartitionResult getColumnStatisticsResult = getFutureValue(getColumnStatisticsResultFuture);
getColumnStatisticsResult.getColumnStatisticsList().forEach(columnStatistics -> columnStatsMapBuilder.put(columnStatistics.getColumnName(), fromGlueColumnStatistics(columnStatistics.getStatisticsData(), tableStatistics.getRowCount())));
}
partitionStatistics.put(partition, columnStatsMapBuilder.buildOrThrow());
});
return partitionStatistics.buildOrThrow();
} catch (RuntimeException ex) {
if (ex.getCause() != null && ex.getCause() instanceof EntityNotFoundException) {
throw new TrinoException(HIVE_PARTITION_NOT_FOUND, ex.getCause());
}
throw new TrinoException(HIVE_METASTORE_ERROR, ex);
}
}
use of io.trino.plugin.hive.HiveBasicStatistics in project trino by trinodb.
the class DefaultGlueColumnStatisticsProvider method updateTableColumnStatistics.
@Override
public void updateTableColumnStatistics(Table table, Map<String, HiveColumnStatistics> updatedTableColumnStatistics) {
try {
HiveBasicStatistics tableStats = getHiveBasicStatistics(table.getParameters());
List<ColumnStatistics> columnStats = toGlueColumnStatistics(table, updatedTableColumnStatistics, tableStats.getRowCount()).stream().filter(this::isGlueWritable).collect(toUnmodifiableList());
List<List<ColumnStatistics>> columnChunks = Lists.partition(columnStats, GLUE_COLUMN_WRITE_STAT_PAGE_SIZE);
List<CompletableFuture<Void>> updateFutures = columnChunks.stream().map(columnChunk -> runAsync(() -> stats.getUpdateColumnStatisticsForTable().call(() -> glueClient.updateColumnStatisticsForTable(new UpdateColumnStatisticsForTableRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withColumnStatisticsList(columnChunk))), this.writeExecutor)).collect(toUnmodifiableList());
Map<String, HiveColumnStatistics> currentTableColumnStatistics = this.getTableColumnStatistics(table);
Set<String> removedStatistics = difference(currentTableColumnStatistics.keySet(), updatedTableColumnStatistics.keySet());
List<CompletableFuture<Void>> deleteFutures = removedStatistics.stream().map(column -> runAsync(() -> stats.getDeleteColumnStatisticsForTable().call(() -> glueClient.deleteColumnStatisticsForTable(new DeleteColumnStatisticsForTableRequest().withCatalogId(catalogId).withDatabaseName(table.getDatabaseName()).withTableName(table.getTableName()).withColumnName(column))), this.writeExecutor)).collect(toUnmodifiableList());
ImmutableList<CompletableFuture<Void>> updateOperationsFutures = ImmutableList.<CompletableFuture<Void>>builder().addAll(updateFutures).addAll(deleteFutures).build();
getFutureValue(allOf(updateOperationsFutures.toArray(CompletableFuture[]::new)));
} catch (RuntimeException ex) {
throw new TrinoException(HIVE_METASTORE_ERROR, ex);
}
}
Aggregations