use of io.trino.spi.statistics.ColumnStatisticMetadata in project trino by trinodb.
the class TestHiveGlueMetastore method testUpdatePartitionedStatisticsOnCreate.
@Test
public void testUpdatePartitionedStatisticsOnCreate() {
SchemaTableName tableName = temporaryTable("update_partitioned_statistics_create");
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
List<ColumnMetadata> columns = ImmutableList.of(new ColumnMetadata("a_column", BigintType.BIGINT), new ColumnMetadata("part_column", BigintType.BIGINT));
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE, ImmutableList.of("part_column")));
ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
// write data
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle);
MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT, BigintType.BIGINT).row(1L, 1L).row(2L, 1L).row(3L, 1L).row(4L, 2L).row(5L, 2L).build();
sink.appendPage(data.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
// prepare statistics
ComputedStatistics statistics1 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(1))).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(3)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(3)).build();
ComputedStatistics statistics2 = ComputedStatistics.builder(ImmutableList.of("part_column"), ImmutableList.of(singleValueBlock(2))).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(2)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(4)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(2)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(2)).build();
// finish CTAS
metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics1, statistics2));
transaction.commit();
} finally {
dropTable(tableName);
}
}
use of io.trino.spi.statistics.ColumnStatisticMetadata in project trino by trinodb.
the class TestHiveGlueMetastore method testUpdateStatisticsOnCreate.
@Test
public void testUpdateStatisticsOnCreate() {
SchemaTableName tableName = temporaryTable("update_statistics_create");
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
List<ColumnMetadata> columns = ImmutableList.of(new ColumnMetadata("a_column", BigintType.BIGINT));
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE));
ConnectorOutputTableHandle createTableHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
// write data
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, createTableHandle);
MaterializedResult data = MaterializedResult.resultBuilder(session, BigintType.BIGINT).row(1L).row(2L).row(3L).row(4L).row(5L).build();
sink.appendPage(data.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
// prepare statistics
ComputedStatistics statistics = ComputedStatistics.builder(ImmutableList.of(), ImmutableList.of()).addTableStatistic(TableStatisticType.ROW_COUNT, singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleValueBlock(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleValueBlock(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleValueBlock(5)).build();
// finish CTAS
metadata.finishCreateTable(session, createTableHandle, fragments, ImmutableList.of(statistics));
transaction.commit();
} finally {
dropTable(tableName);
}
}
use of io.trino.spi.statistics.ColumnStatisticMetadata in project trino by trinodb.
the class DeltaLakeMetadata method getStatisticsCollectionMetadata.
@Override
public TableStatisticsMetadata getStatisticsCollectionMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) {
ImmutableSet.Builder<ColumnStatisticMetadata> columnStatistics = ImmutableSet.builder();
Optional<Set<String>> analyzeColumnNames = DeltaLakeTableProperties.getAnalyzeColumns(tableMetadata.getProperties());
tableMetadata.getColumns().stream().filter(DeltaLakeMetadata::shouldCollectExtendedStatistics).filter(columnMetadata -> analyzeColumnNames.map(columnNames -> columnNames.contains(columnMetadata.getName())).orElse(true)).map(columnMetadata -> new ColumnStatisticMetadata(columnMetadata.getName(), NUMBER_OF_DISTINCT_VALUES_SUMMARY)).forEach(columnStatistics::add);
// collect max(file modification time) for sake of incremental ANALYZE
columnStatistics.add(new ColumnStatisticMetadata(FILE_MODIFIED_TIME_COLUMN_NAME, MAX_VALUE));
return new TableStatisticsMetadata(columnStatistics.build(), ImmutableSet.of(), ImmutableList.of());
}
use of io.trino.spi.statistics.ColumnStatisticMetadata in project trino by trinodb.
the class StatisticsAggregationPlanner method createStatisticsAggregation.
public TableStatisticAggregation createStatisticsAggregation(TableStatisticsMetadata statisticsMetadata, Map<String, Symbol> columnToSymbolMap) {
StatisticAggregationsDescriptor.Builder<Symbol> descriptor = StatisticAggregationsDescriptor.builder();
List<String> groupingColumns = statisticsMetadata.getGroupingColumns();
List<Symbol> groupingSymbols = groupingColumns.stream().map(columnToSymbolMap::get).collect(toImmutableList());
for (int i = 0; i < groupingSymbols.size(); i++) {
descriptor.addGrouping(groupingColumns.get(i), groupingSymbols.get(i));
}
ImmutableMap.Builder<Symbol, AggregationNode.Aggregation> aggregations = ImmutableMap.builder();
for (TableStatisticType type : statisticsMetadata.getTableStatistics()) {
if (type != ROW_COUNT) {
throw new TrinoException(NOT_SUPPORTED, "Table-wide statistic type not supported: " + type);
}
AggregationNode.Aggregation aggregation = new AggregationNode.Aggregation(metadata.resolveFunction(session, QualifiedName.of("count"), ImmutableList.of()), ImmutableList.of(), false, Optional.empty(), Optional.empty(), Optional.empty());
Symbol symbol = symbolAllocator.newSymbol("rowCount", BIGINT);
aggregations.put(symbol, aggregation);
descriptor.addTableStatistic(ROW_COUNT, symbol);
}
for (ColumnStatisticMetadata columnStatisticMetadata : statisticsMetadata.getColumnStatistics()) {
String columnName = columnStatisticMetadata.getColumnName();
ColumnStatisticType statisticType = columnStatisticMetadata.getStatisticType();
Symbol inputSymbol = columnToSymbolMap.get(columnName);
verifyNotNull(inputSymbol, "inputSymbol is null");
Type inputType = symbolAllocator.getTypes().get(inputSymbol);
verifyNotNull(inputType, "inputType is null for symbol: %s", inputSymbol);
ColumnStatisticsAggregation aggregation = createColumnAggregation(statisticType, inputSymbol, inputType);
Symbol symbol = symbolAllocator.newSymbol(statisticType + ":" + columnName, aggregation.getOutputType());
aggregations.put(symbol, aggregation.getAggregation());
descriptor.addColumnStatistic(columnStatisticMetadata, symbol);
}
StatisticAggregations aggregation = new StatisticAggregations(aggregations.buildOrThrow(), groupingSymbols);
return new TableStatisticAggregation(aggregation, descriptor.build());
}
use of io.trino.spi.statistics.ColumnStatisticMetadata in project trino by trinodb.
the class TestStatistics method testFromComputedStatistics.
@Test
public void testFromComputedStatistics() {
Function<Integer, Block> singleIntegerValueBlock = value -> BigintType.BIGINT.createBlockBuilder(null, 1).writeLong(value).build();
ComputedStatistics statistics = ComputedStatistics.builder(ImmutableList.of(), ImmutableList.of()).addTableStatistic(TableStatisticType.ROW_COUNT, singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleIntegerValueBlock.apply(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("b_column", NUMBER_OF_NON_NULL_VALUES), singleIntegerValueBlock.apply(4)).build();
Map<String, Type> columnTypes = ImmutableMap.of("a_column", INTEGER, "b_column", VARCHAR);
Map<String, HiveColumnStatistics> columnStatistics = Statistics.fromComputedStatistics(statistics.getColumnStatistics(), columnTypes, 5);
assertThat(columnStatistics).hasSize(2);
assertThat(columnStatistics.keySet()).contains("a_column", "b_column");
assertThat(columnStatistics.get("a_column")).isEqualTo(HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.of(1), OptionalLong.of(5))).setNullsCount(0).setDistinctValuesCount(5).build());
assertThat(columnStatistics.get("b_column")).isEqualTo(HiveColumnStatistics.builder().setNullsCount(1).build());
}
Aggregations