use of io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE in project trino by trinodb.
the class DeltaLakeMetadata method getStatisticsCollectionMetadata.
@Override
public TableStatisticsMetadata getStatisticsCollectionMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) {
ImmutableSet.Builder<ColumnStatisticMetadata> columnStatistics = ImmutableSet.builder();
Optional<Set<String>> analyzeColumnNames = DeltaLakeTableProperties.getAnalyzeColumns(tableMetadata.getProperties());
tableMetadata.getColumns().stream().filter(DeltaLakeMetadata::shouldCollectExtendedStatistics).filter(columnMetadata -> analyzeColumnNames.map(columnNames -> columnNames.contains(columnMetadata.getName())).orElse(true)).map(columnMetadata -> new ColumnStatisticMetadata(columnMetadata.getName(), NUMBER_OF_DISTINCT_VALUES_SUMMARY)).forEach(columnStatistics::add);
// collect max(file modification time) for sake of incremental ANALYZE
columnStatistics.add(new ColumnStatisticMetadata(FILE_MODIFIED_TIME_COLUMN_NAME, MAX_VALUE));
return new TableStatisticsMetadata(columnStatistics.build(), ImmutableSet.of(), ImmutableList.of());
}
use of io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE in project trino by trinodb.
the class TestStatistics method testFromComputedStatistics.
@Test
public void testFromComputedStatistics() {
Function<Integer, Block> singleIntegerValueBlock = value -> BigintType.BIGINT.createBlockBuilder(null, 1).writeLong(value).build();
ComputedStatistics statistics = ComputedStatistics.builder(ImmutableList.of(), ImmutableList.of()).addTableStatistic(TableStatisticType.ROW_COUNT, singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MIN_VALUE), singleIntegerValueBlock.apply(1)).addColumnStatistic(new ColumnStatisticMetadata("a_column", MAX_VALUE), singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_DISTINCT_VALUES), singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("a_column", NUMBER_OF_NON_NULL_VALUES), singleIntegerValueBlock.apply(5)).addColumnStatistic(new ColumnStatisticMetadata("b_column", NUMBER_OF_NON_NULL_VALUES), singleIntegerValueBlock.apply(4)).build();
Map<String, Type> columnTypes = ImmutableMap.of("a_column", INTEGER, "b_column", VARCHAR);
Map<String, HiveColumnStatistics> columnStatistics = Statistics.fromComputedStatistics(statistics.getColumnStatistics(), columnTypes, 5);
assertThat(columnStatistics).hasSize(2);
assertThat(columnStatistics.keySet()).contains("a_column", "b_column");
assertThat(columnStatistics.get("a_column")).isEqualTo(HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.of(1), OptionalLong.of(5))).setNullsCount(0).setDistinctValuesCount(5).build());
assertThat(columnStatistics.get("b_column")).isEqualTo(HiveColumnStatistics.builder().setNullsCount(1).build());
}
Aggregations