Search in sources :

Example 6 with DeltaLakeColumnHandle

use of io.trino.plugin.deltalake.DeltaLakeColumnHandle in project trino by trinodb.

the class HiveMetastoreBackedDeltaLakeMetastore method createZeroStatistics.

private TableStatistics createZeroStatistics(List<DeltaLakeColumnHandle> columns) {
    TableStatistics.Builder statsBuilder = new TableStatistics.Builder().setRowCount(Estimate.of(0));
    for (DeltaLakeColumnHandle column : columns) {
        ColumnStatistics.Builder columnStatistics = ColumnStatistics.builder();
        columnStatistics.setNullsFraction(Estimate.of(0));
        columnStatistics.setDistinctValuesCount(Estimate.of(0));
        statsBuilder.setColumnStatistics(column, columnStatistics.build());
    }
    return statsBuilder.build();
}
Also used : DeltaLakeColumnStatistics(io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics) ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle)

Example 7 with DeltaLakeColumnHandle

use of io.trino.plugin.deltalake.DeltaLakeColumnHandle in project trino by trinodb.

the class BenchmarkDeltaLakeStatistics method benchmark.

@Benchmark
public long benchmark(BenchmarkData benchmarkData) {
    long result = 1;
    for (DeltaLakeFileStatistics statistics : benchmarkData.fileStatistics) {
        for (int i = 0; i < benchmarkData.queries; i++) {
            DeltaLakeColumnHandle column = benchmarkData.columns.get(benchmarkData.random.nextInt(benchmarkData.columnsCount));
            result += (long) statistics.getMaxColumnValue(column).get();
            result += (long) statistics.getMinColumnValue(column).get();
            result += statistics.getNullCount(column.getName()).get();
        }
    }
    return result;
}
Also used : DeltaLakeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle) Benchmark(org.openjdk.jmh.annotations.Benchmark)

Example 8 with DeltaLakeColumnHandle

use of io.trino.plugin.deltalake.DeltaLakeColumnHandle in project trino by trinodb.

the class TestDeltaLakeFileStatistics method testStatisticsValues.

private static void testStatisticsValues(DeltaLakeFileStatistics fileStatistics) {
    assertEquals(fileStatistics.getNumRecords(), Optional.of(1L));
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("byt", TINYINT, REGULAR)), Optional.of(42L));
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("dat", DATE, REGULAR)), Optional.of(LocalDate.parse("5000-01-01").toEpochDay()));
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("dec_long", DecimalType.createDecimalType(25, 3), REGULAR)), Optional.of(encodeScaledValue(new BigDecimal("999999999999.123"), 3)));
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("dec_short", DecimalType.createDecimalType(5, 1), REGULAR)), Optional.of(new BigDecimal("10.1").unscaledValue().longValueExact()));
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("dou", DoubleType.DOUBLE, REGULAR)), Optional.of(0.321));
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("fl", REAL, REGULAR)), Optional.of((long) floatToIntBits(0.123f)));
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("in", INTEGER, REGULAR)), Optional.of(20000000L));
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("l", BIGINT, REGULAR)), Optional.of(10000000L));
    Type rowType = RowType.rowType(RowType.field("s1", INTEGER), RowType.field("s3", VarcharType.createUnboundedVarcharType()));
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("row", rowType, REGULAR)), Optional.empty());
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("arr", new ArrayType(INTEGER), REGULAR)), Optional.empty());
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("m", new MapType(INTEGER, VarcharType.createUnboundedVarcharType(), new TypeOperators()), REGULAR)), Optional.empty());
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("sh", SMALLINT, REGULAR)), Optional.of(123L));
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("str", VarcharType.createUnboundedVarcharType(), REGULAR)), Optional.of(utf8Slice("a")));
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("ts", TIMESTAMP_TZ_MILLIS, REGULAR)), Optional.of(packDateTimeWithZone(LocalDateTime.parse("2960-10-31T01:00:00.000").toInstant(UTC).toEpochMilli(), UTC_KEY)));
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("bool", BOOLEAN, REGULAR)), Optional.empty());
    assertEquals(fileStatistics.getMinColumnValue(new DeltaLakeColumnHandle("bin", VARBINARY, REGULAR)), Optional.empty());
}
Also used : ArrayType(io.trino.spi.type.ArrayType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) DecimalType(io.trino.spi.type.DecimalType) DoubleType(io.trino.spi.type.DoubleType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) MapType(io.trino.spi.type.MapType) DeltaLakeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle) BigDecimal(java.math.BigDecimal) MapType(io.trino.spi.type.MapType) TypeOperators(io.trino.spi.type.TypeOperators)

Example 9 with DeltaLakeColumnHandle

use of io.trino.plugin.deltalake.DeltaLakeColumnHandle in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method testStatisticsParquetParsedStatisticsNullCount.

@Test
public void testStatisticsParquetParsedStatisticsNullCount() {
    // The transaction log for this table was created so that the checkpoints only write struct statistics, not json statistics
    // The table has one INTEGER column 'i' where 3 of the 9 values are null
    DeltaLakeTableHandle tableHandle = registerTable("parquet_struct_statistics_null_count");
    TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
    assertEquals(stats.getRowCount(), Estimate.of(9));
    Map<ColumnHandle, ColumnStatistics> statisticsMap = stats.getColumnStatistics();
    ColumnStatistics columnStats = statisticsMap.get(new DeltaLakeColumnHandle("i", INTEGER, REGULAR));
    assertEquals(columnStats.getNullsFraction(), Estimate.of(3.0 / 9.0));
}
Also used : ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) DeltaLakeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeTableHandle(io.trino.plugin.deltalake.DeltaLakeTableHandle) DeltaLakeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle) Test(org.testng.annotations.Test)

Aggregations

DeltaLakeColumnHandle (io.trino.plugin.deltalake.DeltaLakeColumnHandle)9 ColumnStatistics (io.trino.spi.statistics.ColumnStatistics)5 TableStatistics (io.trino.spi.statistics.TableStatistics)5 Test (org.testng.annotations.Test)5 DeltaLakeTableHandle (io.trino.plugin.deltalake.DeltaLakeTableHandle)4 ColumnHandle (io.trino.spi.connector.ColumnHandle)3 ArrayType (io.trino.spi.type.ArrayType)3 MapType (io.trino.spi.type.MapType)3 RowType (io.trino.spi.type.RowType)3 TypeOperators (io.trino.spi.type.TypeOperators)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)2 REGULAR (io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR)2 DeltaLakeColumnStatistics (io.trino.plugin.deltalake.statistics.DeltaLakeColumnStatistics)2 ColumnMetadata (io.trino.spi.connector.ColumnMetadata)2 IOException (java.io.IOException)2 URL (java.net.URL)2 List (java.util.List)2 Optional (java.util.Optional)2 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)1