Search in sources :

Example 6 with ColumnStatistics

use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method testStatisticsZeroAndInfinity.

@Test
public void testStatisticsZeroAndInfinity() {
    DeltaLakeTableHandle tableHandle = registerTable("zero_infinity");
    TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
    ColumnStatistics columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
    assertEquals(columnStatistics.getRange().get().getMin(), 0.0);
    assertEquals(columnStatistics.getRange().get().getMax(), POSITIVE_INFINITY);
}
Also used : ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeTableHandle(io.trino.plugin.deltalake.DeltaLakeTableHandle) Test(org.testng.annotations.Test)

Example 7 with ColumnStatistics

use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method testStatisticsMultipleFiles.

@Test
public void testStatisticsMultipleFiles() {
    DeltaLakeTableHandle tableHandle = registerTable("basic_multi_file");
    TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
    ColumnStatistics columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
    assertEquals(columnStatistics.getRange().get().getMin(), -42.0);
    assertEquals(columnStatistics.getRange().get().getMax(), 42.0);
    DeltaLakeTableHandle tableHandleWithUnenforcedConstraint = new DeltaLakeTableHandle(tableHandle.getSchemaName(), tableHandle.getTableName(), tableHandle.getLocation(), Optional.of(tableHandle.getMetadataEntry()), TupleDomain.all(), TupleDomain.withColumnDomains(ImmutableMap.of((DeltaLakeColumnHandle) COLUMN_HANDLE, Domain.singleValue(DOUBLE, 42.0))), tableHandle.getWriteType(), tableHandle.getProjectedColumns(), tableHandle.getUpdatedColumns(), tableHandle.getUpdateRowIdColumns(), tableHandle.getAnalyzeHandle(), 0);
    stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandleWithUnenforcedConstraint, Constraint.alwaysTrue());
    columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
    assertEquals(columnStatistics.getRange().get().getMin(), 0.0);
    assertEquals(columnStatistics.getRange().get().getMax(), 42.0);
}
Also used : ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeTableHandle(io.trino.plugin.deltalake.DeltaLakeTableHandle) Test(org.testng.annotations.Test)

Example 8 with ColumnStatistics

use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method testStatisticsParquetParsedStatistics.

@Test
public void testStatisticsParquetParsedStatistics() {
    // The transaction log for this table was created so that the checkpoints only write struct statistics, not json statistics
    DeltaLakeTableHandle tableHandle = registerTable("parquet_struct_statistics");
    TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
    assertEquals(stats.getRowCount(), Estimate.of(9));
    Map<ColumnHandle, ColumnStatistics> statisticsMap = stats.getColumnStatistics();
    ColumnStatistics columnStats = statisticsMap.get(new DeltaLakeColumnHandle("dec_short", DecimalType.createDecimalType(5, 1), REGULAR));
    assertEquals(columnStats.getNullsFraction(), Estimate.zero());
    assertEquals(columnStats.getRange().get().getMin(), -10.1);
    assertEquals(columnStats.getRange().get().getMax(), 10.1);
    columnStats = statisticsMap.get(new DeltaLakeColumnHandle("dec_long", DecimalType.createDecimalType(25, 3), REGULAR));
    assertEquals(columnStats.getNullsFraction(), Estimate.zero());
    assertEquals(columnStats.getRange().get().getMin(), -999999999999.123);
    assertEquals(columnStats.getRange().get().getMax(), 999999999999.123);
    columnStats = statisticsMap.get(new DeltaLakeColumnHandle("l", BIGINT, REGULAR));
    assertEquals(columnStats.getNullsFraction(), Estimate.zero());
    assertEquals(columnStats.getRange().get().getMin(), -10000000.0);
    assertEquals(columnStats.getRange().get().getMax(), 10000000.0);
    columnStats = statisticsMap.get(new DeltaLakeColumnHandle("in", INTEGER, REGULAR));
    assertEquals(columnStats.getNullsFraction(), Estimate.zero());
    assertEquals(columnStats.getRange().get().getMin(), -20000000.0);
    assertEquals(columnStats.getRange().get().getMax(), 20000000.0);
    columnStats = statisticsMap.get(new DeltaLakeColumnHandle("sh", SMALLINT, REGULAR));
    assertEquals(columnStats.getNullsFraction(), Estimate.zero());
    assertEquals(columnStats.getRange().get().getMin(), -123.0);
    assertEquals(columnStats.getRange().get().getMax(), 123.0);
    columnStats = statisticsMap.get(new DeltaLakeColumnHandle("byt", TINYINT, REGULAR));
    assertEquals(columnStats.getNullsFraction(), Estimate.zero());
    assertEquals(columnStats.getRange().get().getMin(), -42.0);
    assertEquals(columnStats.getRange().get().getMax(), 42.0);
    columnStats = statisticsMap.get(new DeltaLakeColumnHandle("fl", REAL, REGULAR));
    assertEquals(columnStats.getNullsFraction(), Estimate.zero());
    assertEquals((float) columnStats.getRange().get().getMin(), -0.123f);
    assertEquals((float) columnStats.getRange().get().getMax(), 0.123f);
    columnStats = statisticsMap.get(new DeltaLakeColumnHandle("dou", DOUBLE, REGULAR));
    assertEquals(columnStats.getNullsFraction(), Estimate.zero());
    assertEquals(columnStats.getRange().get().getMin(), -0.321);
    assertEquals(columnStats.getRange().get().getMax(), 0.321);
    columnStats = statisticsMap.get(new DeltaLakeColumnHandle("dat", DATE, REGULAR));
    assertEquals(columnStats.getNullsFraction(), Estimate.zero());
    assertEquals(columnStats.getRange().get().getMin(), (double) LocalDate.parse("1900-01-01").toEpochDay());
    assertEquals(columnStats.getRange().get().getMax(), (double) LocalDate.parse("5000-01-01").toEpochDay());
}
Also used : ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) DeltaLakeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeTableHandle(io.trino.plugin.deltalake.DeltaLakeTableHandle) DeltaLakeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle) Test(org.testng.annotations.Test)

Example 9 with ColumnStatistics

use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method testStatisticsInf.

@Test
public void testStatisticsInf() {
    DeltaLakeTableHandle tableHandle = registerTable("positive_infinity");
    TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
    ColumnStatistics columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
    assertEquals(columnStatistics.getRange().get().getMin(), POSITIVE_INFINITY);
    assertEquals(columnStatistics.getRange().get().getMax(), POSITIVE_INFINITY);
}
Also used : ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeTableHandle(io.trino.plugin.deltalake.DeltaLakeTableHandle) Test(org.testng.annotations.Test)

Example 10 with ColumnStatistics

use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method testStatisticsNaN.

@Test
public void testStatisticsNaN() {
    DeltaLakeTableHandle tableHandle = registerTable("nan");
    TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
    assertEquals(stats.getRowCount(), Estimate.of(1));
    assertEquals(stats.getColumnStatistics().size(), 1);
    ColumnStatistics columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
    assertEquals(columnStatistics.getRange(), Optional.empty());
}
Also used : ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeTableHandle(io.trino.plugin.deltalake.DeltaLakeTableHandle) Test(org.testng.annotations.Test)

Aggregations

ColumnStatistics (io.trino.spi.statistics.ColumnStatistics)24 TableStatistics (io.trino.spi.statistics.TableStatistics)23 Test (org.testng.annotations.Test)15 DeltaLakeTableHandle (io.trino.plugin.deltalake.DeltaLakeTableHandle)14 ColumnHandle (io.trino.spi.connector.ColumnHandle)10 Type (io.trino.spi.type.Type)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 HiveColumnStatistics (io.trino.plugin.hive.metastore.HiveColumnStatistics)5 CharType (io.trino.spi.type.CharType)5 VarcharType (io.trino.spi.type.VarcharType)5 Map (java.util.Map)5 SchemaTableName (io.trino.spi.connector.SchemaTableName)4 DecimalType (io.trino.spi.type.DecimalType)4 Objects.requireNonNull (java.util.Objects.requireNonNull)4 MoreObjects.toStringHelper (com.google.common.base.MoreObjects.toStringHelper)3 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)3 Preconditions.checkState (com.google.common.base.Preconditions.checkState)3 Verify.verify (com.google.common.base.Verify.verify)3 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)3