Search in sources :

Example 26 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.

@Test
public void testGetTableStatisticsUnpartitioned() {
    PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
    HiveColumnHandle columnHandle = createBaseColumn(COLUMN, 2, HIVE_LONG, BIGINT, REGULAR, Optional.empty());
    TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
    assertEquals(statisticsProvider.getTableStatistics(SESSION, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected);
}
Also used : DoubleRange(io.trino.spi.statistics.DoubleRange) MetastoreHiveStatisticsProvider.validatePartitionStatistics(io.trino.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) HivePartition(io.trino.plugin.hive.HivePartition) Test(org.testng.annotations.Test)

Example 27 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class MetadataManager method getTableStatistics.

@Override
public TableStatistics getTableStatistics(Session session, TableHandle tableHandle, Constraint constraint) {
    CatalogName catalogName = tableHandle.getCatalogName();
    ConnectorMetadata metadata = getMetadata(session, catalogName);
    TableStatistics tableStatistics = metadata.getTableStatistics(session.toConnectorSession(catalogName), tableHandle.getConnectorHandle(), constraint);
    verifyNotNull(tableStatistics, "%s returned null tableStatistics for %s", metadata, tableHandle);
    return tableStatistics;
}
Also used : CatalogName(io.trino.connector.CatalogName) TableStatistics(io.trino.spi.statistics.TableStatistics) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata)

Example 28 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class BaseIcebergConnectorTest method testStatisticsConstraints.

@Test
public void testStatisticsConstraints() {
    String tableName = "iceberg.tpch.test_simple_partitioned_table_statistics";
    assertUpdate("CREATE TABLE iceberg.tpch.test_simple_partitioned_table_statistics (col1 BIGINT, col2 BIGINT) WITH (partitioning = ARRAY['col1'])");
    String insertStart = "INSERT INTO iceberg.tpch.test_simple_partitioned_table_statistics";
    assertUpdate(insertStart + " VALUES (1, 101), (2, 102), (3, 103), (4, 104)", 4);
    TableStatistics tableStatistics = getTableStatistics(tableName, new Constraint(TupleDomain.all()));
    IcebergColumnHandle col1Handle = getColumnHandleFromStatistics(tableStatistics, "col1");
    IcebergColumnHandle col2Handle = getColumnHandleFromStatistics(tableStatistics, "col2");
    // Constraint.predicate is currently not supported, because it's never provided by the engine.
    // TODO add (restore) test coverage when this changes.
    // predicate on a partition column
    assertThatThrownBy(() -> getTableStatistics(tableName, new Constraint(TupleDomain.all(), new TestRelationalNumberPredicate("col1", 3, i1 -> i1 >= 0), Set.of(col1Handle)))).isInstanceOf(VerifyException.class).hasMessage("Unexpected Constraint predicate");
    // predicate on a non-partition column
    assertThatThrownBy(() -> getTableStatistics(tableName, new Constraint(TupleDomain.all(), new TestRelationalNumberPredicate("col2", 102, i -> i >= 0), Set.of(col2Handle)))).isInstanceOf(VerifyException.class).hasMessage("Unexpected Constraint predicate");
    dropTable(tableName);
}
Also used : SkipException(org.testng.SkipException) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.testng.annotations.Test) TestTable(io.trino.testing.sql.TestTable) BROADCAST(io.trino.sql.planner.OptimizerConfig.JoinDistributionType.BROADCAST) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Matcher(java.util.regex.Matcher) Map(java.util.Map) MaterializedRow(io.trino.testing.MaterializedRow) Path(java.nio.file.Path) Assert.assertFalse(org.testng.Assert.assertFalse) Assert.assertEquals(io.trino.testing.assertions.Assert.assertEquals) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Assert.assertNotEquals(org.testng.Assert.assertNotEquals) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) Schema(org.apache.avro.Schema) DataProviders(io.trino.testing.DataProviders) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) DataFileWriter(org.apache.avro.file.DataFileWriter) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) Collectors.joining(java.util.stream.Collectors.joining) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) TestingSession.testSessionBuilder(io.trino.testing.TestingSession.testSessionBuilder) Stream(java.util.stream.Stream) ORC(io.trino.plugin.iceberg.IcebergFileFormat.ORC) Session(io.trino.Session) NullableValue(io.trino.spi.predicate.NullableValue) GenericData(org.apache.avro.generic.GenericData) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) String.join(java.lang.String.join) ColumnHandle(io.trino.spi.connector.ColumnHandle) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) LongStream(java.util.stream.LongStream) OperatorStats(io.trino.operator.OperatorStats) Language(org.intellij.lang.annotations.Language) Files(java.nio.file.Files) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) File(java.io.File) BaseConnectorTest(io.trino.testing.BaseConnectorTest) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) TableHandle(io.trino.metadata.TableHandle) QualifiedObjectName(io.trino.metadata.QualifiedObjectName) Paths(java.nio.file.Paths) QueryRunner(io.trino.testing.QueryRunner) Domain.multipleValues(io.trino.spi.predicate.Domain.multipleValues) QueryId(io.trino.spi.QueryId) TransactionBuilder.transaction(io.trino.transaction.TransactionBuilder.transaction) IntStream.range(java.util.stream.IntStream.range) MaterializedResult(io.trino.testing.MaterializedResult) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) PREFERRED_WRITE_PARTITIONING_MIN_NUMBER_OF_PARTITIONS(io.trino.SystemSessionProperties.PREFERRED_WRITE_PARTITIONING_MIN_NUMBER_OF_PARTITIONS) MoreCollectors.onlyElement(com.google.common.collect.MoreCollectors.onlyElement) ICEBERG_DOMAIN_COMPACTION_THRESHOLD(io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) QueryAssertions.assertEqualsIgnoreOrder(io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder) Locale(java.util.Locale) Iterables.concat(com.google.common.collect.Iterables.concat) TestingConnectorBehavior(io.trino.testing.TestingConnectorBehavior) TestTable.randomTableSuffix(io.trino.testing.sql.TestTable.randomTableSuffix) TpchTable(io.trino.tpch.TpchTable) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Collections.nCopies(java.util.Collections.nCopies) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) ICEBERG_CATALOG(io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG) DataSize(io.airlift.units.DataSize) List(java.util.List) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) BIGINT(io.trino.spi.type.BigintType.BIGINT) Domain.singleValue(io.trino.spi.predicate.Domain.singleValue) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) DataFileReader(org.apache.avro.file.DataFileReader) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) IcebergQueryRunner.createIcebergQueryRunner(io.trino.plugin.iceberg.IcebergQueryRunner.createIcebergQueryRunner) LINE_ITEM(io.trino.tpch.TpchTable.LINE_ITEM) IntStream(java.util.stream.IntStream) Constraint(io.trino.spi.connector.Constraint) DataProvider(org.testng.annotations.DataProvider) PARQUET(io.trino.plugin.iceberg.IcebergFileFormat.PARQUET) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Objects.requireNonNull(java.util.Objects.requireNonNull) TableStatistics(io.trino.spi.statistics.TableStatistics) NoSuchElementException(java.util.NoSuchElementException) VerifyException(com.google.common.base.VerifyException) OutputStream(java.io.OutputStream) ResultWithQueryId(io.trino.testing.ResultWithQueryId) TupleDomain(io.trino.spi.predicate.TupleDomain) Consumer(java.util.function.Consumer) Assert.assertEventually(io.trino.testing.assertions.Assert.assertEventually) JOIN_DISTRIBUTION_TYPE(io.trino.SystemSessionProperties.JOIN_DISTRIBUTION_TYPE) Metadata(io.trino.metadata.Metadata) Assert.assertTrue(org.testng.Assert.assertTrue) MaterializedResult.resultBuilder(io.trino.testing.MaterializedResult.resultBuilder) Constraint(io.trino.spi.connector.Constraint) VerifyException(com.google.common.base.VerifyException) TableStatistics(io.trino.spi.statistics.TableStatistics) Test(org.testng.annotations.Test) BaseConnectorTest(io.trino.testing.BaseConnectorTest)

Example 29 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method testStatisticsParquetParsedStatisticsNullCount.

@Test
public void testStatisticsParquetParsedStatisticsNullCount() {
    // The transaction log for this table was created so that the checkpoints only write struct statistics, not json statistics
    // The table has one INTEGER column 'i' where 3 of the 9 values are null
    DeltaLakeTableHandle tableHandle = registerTable("parquet_struct_statistics_null_count");
    TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
    assertEquals(stats.getRowCount(), Estimate.of(9));
    Map<ColumnHandle, ColumnStatistics> statisticsMap = stats.getColumnStatistics();
    ColumnStatistics columnStats = statisticsMap.get(new DeltaLakeColumnHandle("i", INTEGER, REGULAR));
    assertEquals(columnStats.getNullsFraction(), Estimate.of(3.0 / 9.0));
}
Also used : ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) DeltaLakeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeTableHandle(io.trino.plugin.deltalake.DeltaLakeTableHandle) DeltaLakeColumnHandle(io.trino.plugin.deltalake.DeltaLakeColumnHandle) Test(org.testng.annotations.Test)

Example 30 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class TestDeltaLakeMetastoreStatistics method testStatisticsNegativeInfinityAndNaN.

@Test
public void testStatisticsNegativeInfinityAndNaN() {
    // Stats with NaN values cannot be used
    DeltaLakeTableHandle tableHandle = registerTable("negative_infinity_nan");
    TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
    ColumnStatistics columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
    assertEquals(columnStatistics.getRange().get().getMin(), NEGATIVE_INFINITY);
    assertEquals(columnStatistics.getRange().get().getMax(), POSITIVE_INFINITY);
}
Also used : ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) DeltaLakeTableHandle(io.trino.plugin.deltalake.DeltaLakeTableHandle) Test(org.testng.annotations.Test)

Aggregations

TableStatistics (io.trino.spi.statistics.TableStatistics)35 ColumnStatistics (io.trino.spi.statistics.ColumnStatistics)23 Test (org.testng.annotations.Test)20 DeltaLakeTableHandle (io.trino.plugin.deltalake.DeltaLakeTableHandle)15 ColumnHandle (io.trino.spi.connector.ColumnHandle)13 SchemaTableName (io.trino.spi.connector.SchemaTableName)9 Map (java.util.Map)7 Constraint (io.trino.spi.connector.Constraint)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)5 TupleDomain (io.trino.spi.predicate.TupleDomain)5 Type (io.trino.spi.type.Type)5 IOException (java.io.IOException)5 Objects.requireNonNull (java.util.Objects.requireNonNull)5 Optional (java.util.Optional)5 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 DeltaLakeColumnHandle (io.trino.plugin.deltalake.DeltaLakeColumnHandle)4 DoubleRange (io.trino.spi.statistics.DoubleRange)4 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)3 ImmutableList (com.google.common.collect.ImmutableList)3