Search in sources :

Example 16 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class CachingJdbcClient method getTableStatistics.

@Override
public TableStatistics getTableStatistics(ConnectorSession session, JdbcTableHandle handle, TupleDomain<ColumnHandle> tupleDomain) {
    TableStatisticsCacheKey key = new TableStatisticsCacheKey(handle, tupleDomain);
    TableStatistics cachedStatistics = statisticsCache.getIfPresent(key);
    if (cachedStatistics != null) {
        if (cacheMissing || !cachedStatistics.equals(TableStatistics.empty())) {
            return cachedStatistics;
        }
        statisticsCache.invalidate(key);
    }
    return get(statisticsCache, key, () -> delegate.getTableStatistics(session, handle, tupleDomain));
}
Also used : TableStatistics(io.trino.spi.statistics.TableStatistics)

Example 17 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class BlackHoleMetadata method getTableStatistics.

@Override
public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint constraint) {
    BlackHoleTableHandle table = (BlackHoleTableHandle) tableHandle;
    TableStatistics.Builder tableStats = TableStatistics.builder();
    double rows = (double) table.getSplitCount() * table.getPagesPerSplit() * table.getRowsPerPage();
    tableStats.setRowCount(Estimate.of(rows));
    for (BlackHoleColumnHandle column : table.getColumnHandles()) {
        ColumnStatistics.Builder stats = ColumnStatistics.builder().setDistinctValuesCount(Estimate.of(1)).setNullsFraction(Estimate.of(0));
        if (isNumericType(column.getColumnType())) {
            stats.setRange(new DoubleRange(0, 0));
        }
        tableStats.setColumnStatistics(column, stats.build());
    }
    return tableStats.build();
}
Also used : ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) DoubleRange(io.trino.spi.statistics.DoubleRange) TableStatistics(io.trino.spi.statistics.TableStatistics)

Example 18 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatistics.

@Test
public void testGetTableStatistics() {
    String partitionName = "p1=string1/p2=1234";
    PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(partitionName, statistics));
    HiveColumnHandle columnHandle = createBaseColumn(COLUMN, 2, HIVE_LONG, BIGINT, REGULAR, Optional.empty());
    TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(PARTITION_COLUMN_1, ColumnStatistics.builder().setDataSize(Estimate.of(7000)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(PARTITION_COLUMN_2, ColumnStatistics.builder().setRange(new DoubleRange(1234, 1234)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
    assertEquals(statisticsProvider.getTableStatistics(SESSION, TABLE, ImmutableMap.of("p1", PARTITION_COLUMN_1, "p2", PARTITION_COLUMN_2, COLUMN, columnHandle), ImmutableMap.of("p1", VARCHAR, "p2", BIGINT, COLUMN, BIGINT), ImmutableList.of(partition(partitionName))), expected);
}
Also used : DoubleRange(io.trino.spi.statistics.DoubleRange) MetastoreHiveStatisticsProvider.validatePartitionStatistics(io.trino.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) Test(org.testng.annotations.Test)

Example 19 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class TableStatisticsMaker method makeTableStatistics.

private TableStatistics makeTableStatistics(IcebergTableHandle tableHandle, Constraint constraint) {
    if (tableHandle.getSnapshotId().isEmpty() || constraint.getSummary().isNone()) {
        return TableStatistics.empty();
    }
    TupleDomain<IcebergColumnHandle> intersection = constraint.getSummary().transformKeys(IcebergColumnHandle.class::cast).intersect(tableHandle.getEnforcedPredicate());
    if (intersection.isNone()) {
        return TableStatistics.empty();
    }
    Schema icebergTableSchema = icebergTable.schema();
    List<Types.NestedField> columns = icebergTableSchema.columns();
    Map<Integer, Type.PrimitiveType> idToTypeMapping = primitiveFieldTypes(icebergTableSchema);
    List<PartitionField> partitionFields = icebergTable.spec().fields();
    List<Type> icebergPartitionTypes = partitionTypes(partitionFields, idToTypeMapping);
    List<IcebergColumnHandle> columnHandles = getColumns(icebergTableSchema, typeManager);
    Map<Integer, IcebergColumnHandle> idToColumnHandle = columnHandles.stream().collect(toUnmodifiableMap(IcebergColumnHandle::getId, identity()));
    ImmutableMap.Builder<Integer, ColumnFieldDetails> idToDetailsBuilder = ImmutableMap.builder();
    for (int index = 0; index < partitionFields.size(); index++) {
        PartitionField field = partitionFields.get(index);
        Type type = icebergPartitionTypes.get(index);
        idToDetailsBuilder.put(field.fieldId(), new ColumnFieldDetails(field, idToColumnHandle.get(field.sourceId()), type, toTrinoType(type, typeManager), type.typeId().javaClass()));
    }
    Map<Integer, ColumnFieldDetails> idToDetails = idToDetailsBuilder.buildOrThrow();
    TableScan tableScan = icebergTable.newScan().filter(toIcebergExpression(intersection)).useSnapshot(tableHandle.getSnapshotId().get()).includeColumnStats();
    IcebergStatistics.Builder icebergStatisticsBuilder = new IcebergStatistics.Builder(columns, typeManager);
    try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
        for (FileScanTask fileScanTask : fileScanTasks) {
            DataFile dataFile = fileScanTask.file();
            if (!dataFileMatches(dataFile, constraint, partitionFields, idToDetails)) {
                continue;
            }
            icebergStatisticsBuilder.acceptDataFile(dataFile, fileScanTask.spec());
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
    IcebergStatistics summary = icebergStatisticsBuilder.build();
    if (summary.getFileCount() == 0) {
        return TableStatistics.empty();
    }
    ImmutableMap.Builder<ColumnHandle, ColumnStatistics> columnHandleBuilder = ImmutableMap.builder();
    double recordCount = summary.getRecordCount();
    for (IcebergColumnHandle columnHandle : idToColumnHandle.values()) {
        int fieldId = columnHandle.getId();
        ColumnStatistics.Builder columnBuilder = new ColumnStatistics.Builder();
        Long nullCount = summary.getNullCounts().get(fieldId);
        if (nullCount != null) {
            columnBuilder.setNullsFraction(Estimate.of(nullCount / recordCount));
        }
        if (summary.getColumnSizes() != null) {
            Long columnSize = summary.getColumnSizes().get(fieldId);
            if (columnSize != null) {
                columnBuilder.setDataSize(Estimate.of(columnSize));
            }
        }
        Object min = summary.getMinValues().get(fieldId);
        Object max = summary.getMaxValues().get(fieldId);
        if (min != null && max != null) {
            columnBuilder.setRange(DoubleRange.from(columnHandle.getType(), min, max));
        }
        columnHandleBuilder.put(columnHandle, columnBuilder.build());
    }
    return new TableStatistics(Estimate.of(recordCount), columnHandleBuilder.buildOrThrow());
}
Also used : Schema(org.apache.iceberg.Schema) UncheckedIOException(java.io.UncheckedIOException) DataFile(org.apache.iceberg.DataFile) PartitionField(org.apache.iceberg.PartitionField) ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) TableScan(org.apache.iceberg.TableScan) ColumnHandle(io.trino.spi.connector.ColumnHandle) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap(com.google.common.collect.ImmutableMap) Constraint(io.trino.spi.connector.Constraint) TypeConverter.toTrinoType(io.trino.plugin.iceberg.TypeConverter.toTrinoType) Type(org.apache.iceberg.types.Type) TableStatistics(io.trino.spi.statistics.TableStatistics) FileScanTask(org.apache.iceberg.FileScanTask)

Example 20 with TableStatistics

use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.

the class TestTpcdsMetadataStatistics method testTableStatsDetails.

@Test
public void testTableStatsDetails() {
    SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.CALL_CENTER.getName());
    ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
    TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue());
    estimateAssertion.assertClose(tableStatistics.getRowCount(), Estimate.of(6), "Row count does not match");
    // all columns have stats
    Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
    for (ColumnHandle column : columnHandles.values()) {
        assertTrue(tableStatistics.getColumnStatistics().containsKey(column));
        assertNotNull(tableStatistics.getColumnStatistics().get(column));
    }
    // identifier
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(6)).setRange(new DoubleRange(1, 6)).build());
    // varchar
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_ID.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(3)).setDataSize(Estimate.of(48.0)).build());
    // char
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_ZIP.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setDataSize(Estimate.of(5.0)).build());
    // decimal
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_GMT_OFFSET.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setRange(new DoubleRange(-5, -5)).build());
    // date
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_REC_START_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(4)).setRange(new DoubleRange(10227L, 11688L)).build());
    // only null values
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CLOSED_DATE_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(1)).setDistinctValuesCount(Estimate.of(0)).build());
}
Also used : ColumnHandle(io.trino.spi.connector.ColumnHandle) DoubleRange(io.trino.spi.statistics.DoubleRange) TableStatistics(io.trino.spi.statistics.TableStatistics) SchemaTableName(io.trino.spi.connector.SchemaTableName) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Aggregations

TableStatistics (io.trino.spi.statistics.TableStatistics)35 ColumnStatistics (io.trino.spi.statistics.ColumnStatistics)23 Test (org.testng.annotations.Test)20 DeltaLakeTableHandle (io.trino.plugin.deltalake.DeltaLakeTableHandle)15 ColumnHandle (io.trino.spi.connector.ColumnHandle)13 SchemaTableName (io.trino.spi.connector.SchemaTableName)9 Map (java.util.Map)7 Constraint (io.trino.spi.connector.Constraint)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 ConnectorTableHandle (io.trino.spi.connector.ConnectorTableHandle)5 TupleDomain (io.trino.spi.predicate.TupleDomain)5 Type (io.trino.spi.type.Type)5 IOException (java.io.IOException)5 Objects.requireNonNull (java.util.Objects.requireNonNull)5 Optional (java.util.Optional)5 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 DeltaLakeColumnHandle (io.trino.plugin.deltalake.DeltaLakeColumnHandle)4 DoubleRange (io.trino.spi.statistics.DoubleRange)4 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)3 ImmutableList (com.google.common.collect.ImmutableList)3