Search in sources :

Example 1 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class MetastoreHiveStatisticsProvider method createZeroStatistics.

private TableStatistics createZeroStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes) {
    TableStatistics.Builder result = TableStatistics.builder();
    result.setRowCount(Estimate.of(0));
    columns.forEach((columnName, columnHandle) -> {
        Type columnType = columnTypes.get(columnName);
        verifyNotNull(columnType, "columnType is missing for column: %s", columnName);
        ColumnStatistics.Builder columnStatistics = ColumnStatistics.builder();
        columnStatistics.setNullsFraction(Estimate.of(0));
        columnStatistics.setDistinctValuesCount(Estimate.of(0));
        if (hasDataSize(columnType)) {
            columnStatistics.setDataSize(Estimate.of(0));
        }
        result.setColumnStatistics(columnHandle, columnStatistics.build());
    });
    return result.build();
}
Also used : HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) DecimalType(io.trino.spi.type.DecimalType) Type(io.trino.spi.type.Type) VarcharType(io.trino.spi.type.VarcharType) CharType(io.trino.spi.type.CharType) TableStatistics(io.trino.spi.statistics.TableStatistics)

Example 2 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class AbstractTestHive method testPartitionStatisticsSampling.

protected void testPartitionStatisticsSampling(List<ColumnMetadata> columns, PartitionStatistics statistics) throws Exception {
    SchemaTableName tableName = temporaryTable("test_partition_statistics_sampling");
    try {
        createDummyPartitionedTable(tableName, columns);
        HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient());
        metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-01", actualStatistics -> statistics);
        metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-02", actualStatistics -> statistics);
        try (Transaction transaction = newTransaction()) {
            ConnectorSession session = newSession();
            ConnectorMetadata metadata = transaction.getMetadata();
            ConnectorTableHandle tableHandle = metadata.getTableHandle(session, tableName);
            TableStatistics unsampledStatistics = metadata.getTableStatistics(sampleSize(2), tableHandle, Constraint.alwaysTrue());
            TableStatistics sampledStatistics = metadata.getTableStatistics(sampleSize(1), tableHandle, Constraint.alwaysTrue());
            assertEquals(sampledStatistics, unsampledStatistics);
        }
    } finally {
        dropTable(tableName);
    }
}
Also used : ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) TableStatistics(io.trino.spi.statistics.TableStatistics) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle)

Example 3 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method testStatisticsNaNWithMultipleFiles.

@Test
public void testStatisticsNaNWithMultipleFiles() {
    // Stats with NaN values cannot be used. This transaction combines a file with NaN min/max values with one with 0.0 min/max values
    DeltaLakeTableHandle tableHandle = registerTable("nan_multi_file");
    TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
    ColumnStatistics columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
    assertEquals(columnStatistics.getRange(), Optional.empty());
}
Also used : ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeTableHandle(io.trino.plugin.deltalake.DeltaLakeTableHandle) Test(org.testng.annotations.Test)

Example 4 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method testStatisticsZeroAndNegativeInfinity.

@Test
public void testStatisticsZeroAndNegativeInfinity() {
    DeltaLakeTableHandle tableHandle = registerTable("zero_negative_infinity");
    TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
    ColumnStatistics columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
    assertEquals(columnStatistics.getRange().get().getMin(), NEGATIVE_INFINITY);
    assertEquals(columnStatistics.getRange().get().getMax(), 0.0);
}
Also used : ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeTableHandle(io.trino.plugin.deltalake.DeltaLakeTableHandle) Test(org.testng.annotations.Test)

Example 5 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method testStatisticsZeroAndNaN.

@Test
public void testStatisticsZeroAndNaN() {
    // Stats with NaN values cannot be used
    DeltaLakeTableHandle tableHandle = registerTable("zero_nan");
    TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
    ColumnStatistics columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
    assertEquals(columnStatistics.getRange().get().getMin(), 0.0);
    assertEquals(columnStatistics.getRange().get().getMax(), POSITIVE_INFINITY);
}
Also used : ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeTableHandle(io.trino.plugin.deltalake.DeltaLakeTableHandle) Test(org.testng.annotations.Test)

Aggregations

TableStatistics (io.trino.spi.statistics.TableStatistics)35 ColumnStatistics (io.trino.spi.statistics.ColumnStatistics)23 Test (org.testng.annotations.Test)20 DeltaLakeTableHandle (io.trino.plugin.deltalake.DeltaLakeTableHandle)15 ColumnHandle (io.trino.spi.connector.ColumnHandle)13 SchemaTableName (io.trino.spi.connector.SchemaTableName)9 Map (java.util.Map)7 Constraint (io.trino.spi.connector.Constraint)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)5 TupleDomain (io.trino.spi.predicate.TupleDomain)5 Type (io.trino.spi.type.Type)5 IOException (java.io.IOException)5 Objects.requireNonNull (java.util.Objects.requireNonNull)5 Optional (java.util.Optional)5 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 DeltaLakeColumnHandle (io.trino.plugin.deltalake.DeltaLakeColumnHandle)4 DoubleRange (io.trino.spi.statistics.DoubleRange)4 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)3 ImmutableList (com.google.common.collect.ImmutableList)3