use of io.trino.spi.statistics.ColumnStatisticType in project trino by trinodb.
the class StatisticsAggregationPlanner method createStatisticsAggregation.
public TableStatisticAggregation createStatisticsAggregation(TableStatisticsMetadata statisticsMetadata, Map<String, Symbol> columnToSymbolMap) {
StatisticAggregationsDescriptor.Builder<Symbol> descriptor = StatisticAggregationsDescriptor.builder();
List<String> groupingColumns = statisticsMetadata.getGroupingColumns();
List<Symbol> groupingSymbols = groupingColumns.stream().map(columnToSymbolMap::get).collect(toImmutableList());
for (int i = 0; i < groupingSymbols.size(); i++) {
descriptor.addGrouping(groupingColumns.get(i), groupingSymbols.get(i));
}
ImmutableMap.Builder<Symbol, AggregationNode.Aggregation> aggregations = ImmutableMap.builder();
for (TableStatisticType type : statisticsMetadata.getTableStatistics()) {
if (type != ROW_COUNT) {
throw new TrinoException(NOT_SUPPORTED, "Table-wide statistic type not supported: " + type);
}
AggregationNode.Aggregation aggregation = new AggregationNode.Aggregation(metadata.resolveFunction(session, QualifiedName.of("count"), ImmutableList.of()), ImmutableList.of(), false, Optional.empty(), Optional.empty(), Optional.empty());
Symbol symbol = symbolAllocator.newSymbol("rowCount", BIGINT);
aggregations.put(symbol, aggregation);
descriptor.addTableStatistic(ROW_COUNT, symbol);
}
for (ColumnStatisticMetadata columnStatisticMetadata : statisticsMetadata.getColumnStatistics()) {
String columnName = columnStatisticMetadata.getColumnName();
ColumnStatisticType statisticType = columnStatisticMetadata.getStatisticType();
Symbol inputSymbol = columnToSymbolMap.get(columnName);
verifyNotNull(inputSymbol, "inputSymbol is null");
Type inputType = symbolAllocator.getTypes().get(inputSymbol);
verifyNotNull(inputType, "inputType is null for symbol: %s", inputSymbol);
ColumnStatisticsAggregation aggregation = createColumnAggregation(statisticType, inputSymbol, inputType);
Symbol symbol = symbolAllocator.newSymbol(statisticType + ":" + columnName, aggregation.getOutputType());
aggregations.put(symbol, aggregation.getAggregation());
descriptor.addColumnStatistic(columnStatisticMetadata, symbol);
}
StatisticAggregations aggregation = new StatisticAggregations(aggregations.buildOrThrow(), groupingSymbols);
return new TableStatisticAggregation(aggregation, descriptor.build());
}
use of io.trino.spi.statistics.ColumnStatisticType in project trino by trinodb.
the class Statistics method createColumnStatisticsForEmptyPartition.
private static HiveColumnStatistics createColumnStatisticsForEmptyPartition(Type columnType, Set<ColumnStatisticType> columnStatisticTypes) {
requireNonNull(columnType, "columnType is null");
HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
for (ColumnStatisticType columnStatisticType : columnStatisticTypes) {
setColumnStatisticsForEmptyPartition(columnType, result, columnStatisticType);
}
return result.build();
}
use of io.trino.spi.statistics.ColumnStatisticType in project trino by trinodb.
the class TestStatisticAggregationsDescriptor method testColumnStatisticMetadataKeySerializationRoundTrip.
@Test
public void testColumnStatisticMetadataKeySerializationRoundTrip() {
for (String column : COLUMNS) {
for (ColumnStatisticType type : ColumnStatisticType.values()) {
ColumnStatisticMetadata expected = new ColumnStatisticMetadata(column, type);
assertEquals(deserialize(serialize(expected)), expected);
}
}
}
use of io.trino.spi.statistics.ColumnStatisticType in project trino by trinodb.
the class TestStatisticAggregationsDescriptor method createTestDescriptor.
private static StatisticAggregationsDescriptor<Symbol> createTestDescriptor() {
StatisticAggregationsDescriptor.Builder<Symbol> builder = StatisticAggregationsDescriptor.builder();
SymbolAllocator symbolAllocator = new SymbolAllocator();
for (String column : COLUMNS) {
for (ColumnStatisticType type : ColumnStatisticType.values()) {
builder.addColumnStatistic(new ColumnStatisticMetadata(column, type), testSymbol(symbolAllocator));
}
builder.addGrouping(column, testSymbol(symbolAllocator));
}
builder.addTableStatistic(ROW_COUNT, testSymbol(symbolAllocator));
return builder.build();
}
use of io.trino.spi.statistics.ColumnStatisticType in project trino by trinodb.
the class HiveMetadata method finishStatisticsCollection.
@Override
public void finishStatisticsCollection(ConnectorSession session, ConnectorTableHandle tableHandle, Collection<ComputedStatistics> computedStatistics) {
HiveTableHandle handle = (HiveTableHandle) tableHandle;
SchemaTableName tableName = handle.getSchemaTableName();
Table table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(handle.getSchemaTableName()));
List<Column> partitionColumns = table.getPartitionColumns();
List<String> partitionColumnNames = partitionColumns.stream().map(Column::getName).collect(toImmutableList());
HiveTimestampPrecision timestampPrecision = getTimestampPrecision(session);
List<HiveColumnHandle> hiveColumnHandles = hiveColumnHandles(table, typeManager, timestampPrecision);
Map<String, Type> columnTypes = hiveColumnHandles.stream().filter(columnHandle -> !columnHandle.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager, timestampPrecision)));
Map<List<String>, ComputedStatistics> computedStatisticsMap = createComputedStatisticsToPartitionMap(computedStatistics, partitionColumnNames, columnTypes);
if (partitionColumns.isEmpty()) {
// commit analyze to unpartitioned table
metastore.setTableStatistics(table, createPartitionStatistics(columnTypes, computedStatisticsMap.get(ImmutableList.<String>of())));
} else {
List<List<String>> partitionValuesList;
if (handle.getAnalyzePartitionValues().isPresent()) {
partitionValuesList = handle.getAnalyzePartitionValues().get();
} else {
partitionValuesList = metastore.getPartitionNames(handle.getSchemaName(), handle.getTableName()).orElseThrow(() -> new TableNotFoundException(((HiveTableHandle) tableHandle).getSchemaTableName())).stream().map(HiveUtil::toPartitionValues).collect(toImmutableList());
}
ImmutableMap.Builder<List<String>, PartitionStatistics> partitionStatistics = ImmutableMap.builder();
Map<String, Set<ColumnStatisticType>> columnStatisticTypes = hiveColumnHandles.stream().filter(columnHandle -> !partitionColumnNames.contains(columnHandle.getName())).filter(column -> !column.isHidden()).collect(toImmutableMap(HiveColumnHandle::getName, column -> ImmutableSet.copyOf(metastore.getSupportedColumnStatistics(column.getType()))));
Supplier<PartitionStatistics> emptyPartitionStatistics = Suppliers.memoize(() -> createEmptyPartitionStatistics(columnTypes, columnStatisticTypes));
int usedComputedStatistics = 0;
for (List<String> partitionValues : partitionValuesList) {
ComputedStatistics collectedStatistics = computedStatisticsMap.get(partitionValues);
if (collectedStatistics == null) {
partitionStatistics.put(partitionValues, emptyPartitionStatistics.get());
} else {
usedComputedStatistics++;
partitionStatistics.put(partitionValues, createPartitionStatistics(columnTypes, collectedStatistics));
}
}
verify(usedComputedStatistics == computedStatistics.size(), "All computed statistics must be used");
metastore.setPartitionStatistics(table, partitionStatistics.buildOrThrow());
}
}
Aggregations