use of io.trino.plugin.deltalake.DeltaLakeColumnHandle in project trino by trinodb.
the class HiveMetastoreBackedDeltaLakeMetastore method createZeroStatistics.
private TableStatistics createZeroStatistics(List<DeltaLakeColumnHandle> columns) {
TableStatistics.Builder statsBuilder = new TableStatistics.Builder().setRowCount(Estimate.of(0));
for (DeltaLakeColumnHandle column : columns) {
ColumnStatistics.Builder columnStatistics = ColumnStatistics.builder();
columnStatistics.setNullsFraction(Estimate.of(0));
columnStatistics.setDistinctValuesCount(Estimate.of(0));
statsBuilder.setColumnStatistics(column, columnStatistics.build());
}
return statsBuilder.build();
}
use of io.trino.plugin.deltalake.DeltaLakeColumnHandle in project trino by trinodb.
the class BenchmarkDeltaLakeStatistics method benchmark.
@Benchmark
public long benchmark(BenchmarkData benchmarkData) {
long result = 1;
for (DeltaLakeFileStatistics statistics : benchmarkData.fileStatistics) {
for (int i = 0; i < benchmarkData.queries; i++) {
DeltaLakeColumnHandle column = benchmarkData.columns.get(benchmarkData.random.nextInt(benchmarkData.columnsCount));
result += (long) statistics.getMaxColumnValue(column).get();
result += (long) statistics.getMinColumnValue(column).get();
result += statistics.getNullCount(column.getName()).get();
}
}
return result;
}
use of io.trino.plugin.deltalake.DeltaLakeColumnHandle in project trino by trinodb.
the class TestDeltaLakeFileStatistics method testStatisticsValues.
private static void testStatisticsValues(DeltaLakeFileStatistics fileStatistics) {
assertEquals(fileStatistics.getNumRecords(), Optional.of(1L));
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("byt", TINYINT, REGULAR)), Optional.of(42L));
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("dat", DATE, REGULAR)), Optional.of(LocalDate.parse("5000-01-01").toEpochDay()));
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("dec_long", DecimalType.createDecimalType(25, 3), REGULAR)), Optional.of(encodeScaledValue(new BigDecimal("999999999999.123"), 3)));
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("dec_short", DecimalType.createDecimalType(5, 1), REGULAR)), Optional.of(new BigDecimal("10.1").unscaledValue().longValueExact()));
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("dou", DoubleType.DOUBLE, REGULAR)), Optional.of(0.321));
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("fl", REAL, REGULAR)), Optional.of((long) floatToIntBits(0.123f)));
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("in", INTEGER, REGULAR)), Optional.of(20000000L));
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("l", BIGINT, REGULAR)), Optional.of(10000000L));
Type rowType = RowType.rowType(RowType.field("s1", INTEGER), RowType.field("s3", VarcharType.createUnboundedVarcharType()));
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("row", rowType, REGULAR)), Optional.empty());
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("arr", new ArrayType(INTEGER), REGULAR)), Optional.empty());
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("m", new MapType(INTEGER, VarcharType.createUnboundedVarcharType(), new TypeOperators()), REGULAR)), Optional.empty());
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("sh", SMALLINT, REGULAR)), Optional.of(123L));
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("str", VarcharType.createUnboundedVarcharType(), REGULAR)), Optional.of(utf8Slice("a")));
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("ts", TIMESTAMP_TZ_MILLIS, REGULAR)), Optional.of(packDateTimeWithZone(LocalDateTime.parse("2960-10-31T01:00:00.000").toInstant(UTC).toEpochMilli(), UTC_KEY)));
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("bool", BOOLEAN, REGULAR)), Optional.empty());
assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("bin", VARBINARY, REGULAR)), Optional.empty());
}
use of io.trino.plugin.deltalake.DeltaLakeColumnHandle in project trino by trinodb.
the class TestDeltaLakeMetastoreStatistics method testStatisticsParquetParsedStatisticsNullCount.
@Test
public void testStatisticsParquetParsedStatisticsNullCount() {
// The transaction log for this table was created so that the checkpoints only write struct statistics, not json statistics
// The table has one INTEGER column 'i' where 3 of the 9 values are null
DeltaLakeTableHandle tableHandle = registerTable("parquet_struct_statistics_null_count");
TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
assertEquals(stats.getRowCount(), Estimate.of(9));
Map<ColumnHandle, ColumnStatistics> statisticsMap = stats.getColumnStatistics();
ColumnStatistics columnStats = statisticsMap.get(new DeltaLakeColumnHandle("i", INTEGER, REGULAR));
assertEquals(columnStats.getNullsFraction(), Estimate.of(3.0 / 9.0));
}
Aggregations