use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.
the class MetastoreHiveStatisticsProvider method calculateDataSize.
@VisibleForTesting
static Estimate calculateDataSize(String column, Collection<PartitionStatistics> partitionStatistics, double totalRowCount) {
List<PartitionStatistics> statisticsWithKnownRowCountAndDataSize = partitionStatistics.stream().filter(statistics -> {
if (statistics.getBasicStatistics().getRowCount().isEmpty()) {
return false;
}
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
if (columnStatistics == null) {
return false;
}
return columnStatistics.getTotalSizeInBytes().isPresent();
}).collect(toImmutableList());
if (statisticsWithKnownRowCountAndDataSize.isEmpty()) {
return Estimate.unknown();
}
long knownRowCount = 0;
long knownDataSize = 0;
for (PartitionStatistics statistics : statisticsWithKnownRowCountAndDataSize) {
long rowCount = statistics.getBasicStatistics().getRowCount().orElseThrow(() -> new VerifyException("rowCount is not present"));
verify(rowCount >= 0, "rowCount must be greater than or equal to zero");
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
verifyNotNull(columnStatistics, "columnStatistics is null");
long dataSize = columnStatistics.getTotalSizeInBytes().orElseThrow(() -> new VerifyException("totalSizeInBytes is not present"));
verify(dataSize >= 0, "dataSize must be greater than or equal to zero");
knownRowCount += rowCount;
knownDataSize += dataSize;
}
if (totalRowCount == 0) {
return Estimate.zero();
}
if (knownRowCount == 0) {
return Estimate.unknown();
}
double averageValueDataSizeInBytes = ((double) knownDataSize) / knownRowCount;
return Estimate.of(averageValueDataSizeInBytes * totalRowCount);
}
use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.
the class MetastoreHiveStatisticsProvider method createZeroStatistics.
private TableStatistics createZeroStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes) {
TableStatistics.Builder result = TableStatistics.builder();
result.setRowCount(Estimate.of(0));
columns.forEach((columnName, columnHandle) -> {
Type columnType = columnTypes.get(columnName);
verifyNotNull(columnType, "columnType is missing for column: %s", columnName);
ColumnStatistics.Builder columnStatistics = ColumnStatistics.builder();
columnStatistics.setNullsFraction(Estimate.of(0));
columnStatistics.setDistinctValuesCount(Estimate.of(0));
if (hasDataSize(columnType)) {
columnStatistics.setDataSize(Estimate.of(0));
}
result.setColumnStatistics(columnHandle, columnStatistics.build());
});
return result.build();
}
use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.
the class TestDeltaLakeMetastoreStatistics method testStatisticsNaNWithMultipleFiles.
@Test
public void testStatisticsNaNWithMultipleFiles() {
// Stats with NaN values cannot be used. This transaction combines a file with NaN min/max values with one with 0.0 min/max values
DeltaLakeTableHandle tableHandle = registerTable("nan_multi_file");
TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
ColumnStatistics columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
assertEquals(columnStatistics.getRange(), Optional.empty());
}
use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.
the class TestDeltaLakeMetastoreStatistics method testStatisticsZeroAndNegativeInfinity.
@Test
public void testStatisticsZeroAndNegativeInfinity() {
DeltaLakeTableHandle tableHandle = registerTable("zero_negative_infinity");
TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
ColumnStatistics columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
assertEquals(columnStatistics.getRange().get().getMin(), NEGATIVE_INFINITY);
assertEquals(columnStatistics.getRange().get().getMax(), 0.0);
}
use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.
the class TestDeltaLakeMetastoreStatistics method testStatisticsZeroAndNaN.
@Test
public void testStatisticsZeroAndNaN() {
// Stats with NaN values cannot be used
DeltaLakeTableHandle tableHandle = registerTable("zero_nan");
TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
ColumnStatistics columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
assertEquals(columnStatistics.getRange().get().getMin(), 0.0);
assertEquals(columnStatistics.getRange().get().getMax(), POSITIVE_INFINITY);
}
Aggregations