use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.
the class CachingJdbcClient method getTableStatistics.
@Override
public TableStatistics getTableStatistics(ConnectorSession session, JdbcTableHandle handle, TupleDomain<ColumnHandle> tupleDomain) {
TableStatisticsCacheKey key = new TableStatisticsCacheKey(handle, tupleDomain);
TableStatistics cachedStatistics = statisticsCache.getIfPresent(key);
if (cachedStatistics != null) {
if (cacheMissing || !cachedStatistics.equals(TableStatistics.empty())) {
return cachedStatistics;
}
statisticsCache.invalidate(key);
}
return get(statisticsCache, key, () -> delegate.getTableStatistics(session, handle, tupleDomain));
}
use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.
the class BlackHoleMetadata method getTableStatistics.
@Override
public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint constraint) {
BlackHoleTableHandle table = (BlackHoleTableHandle) tableHandle;
TableStatistics.Builder tableStats = TableStatistics.builder();
double rows = (double) table.getSplitCount() * table.getPagesPerSplit() * table.getRowsPerPage();
tableStats.setRowCount(Estimate.of(rows));
for (BlackHoleColumnHandle column : table.getColumnHandles()) {
ColumnStatistics.Builder stats = ColumnStatistics.builder().setDistinctValuesCount(Estimate.of(1)).setNullsFraction(Estimate.of(0));
if (isNumericType(column.getColumnType())) {
stats.setRange(new DoubleRange(0, 0));
}
tableStats.setColumnStatistics(column, stats.build());
}
return tableStats.build();
}
use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatistics.
@Test
public void testGetTableStatistics() {
String partitionName = "p1=string1/p2=1234";
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(partitionName, statistics));
HiveColumnHandle columnHandle = createBaseColumn(COLUMN, 2, HIVE_LONG, BIGINT, REGULAR, Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(PARTITION_COLUMN_1, ColumnStatistics.builder().setDataSize(Estimate.of(7000)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(PARTITION_COLUMN_2, ColumnStatistics.builder().setRange(new DoubleRange(1234, 1234)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(SESSION, TABLE, ImmutableMap.of("p1", PARTITION_COLUMN_1, "p2", PARTITION_COLUMN_2, COLUMN, columnHandle), ImmutableMap.of("p1", VARCHAR, "p2", BIGINT, COLUMN, BIGINT), ImmutableList.of(partition(partitionName))), expected);
}
use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.
the class TableStatisticsMaker method makeTableStatistics.
private TableStatistics makeTableStatistics(IcebergTableHandle tableHandle, Constraint constraint) {
if (tableHandle.getSnapshotId().isEmpty() || constraint.getSummary().isNone()) {
return TableStatistics.empty();
}
TupleDomain<IcebergColumnHandle> intersection = constraint.getSummary().transformKeys(IcebergColumnHandle.class::cast).intersect(tableHandle.getEnforcedPredicate());
if (intersection.isNone()) {
return TableStatistics.empty();
}
Schema icebergTableSchema = icebergTable.schema();
List<Types.NestedField> columns = icebergTableSchema.columns();
Map<Integer, Type.PrimitiveType> idToTypeMapping = primitiveFieldTypes(icebergTableSchema);
List<PartitionField> partitionFields = icebergTable.spec().fields();
List<Type> icebergPartitionTypes = partitionTypes(partitionFields, idToTypeMapping);
List<IcebergColumnHandle> columnHandles = getColumns(icebergTableSchema, typeManager);
Map<Integer, IcebergColumnHandle> idToColumnHandle = columnHandles.stream().collect(toUnmodifiableMap(IcebergColumnHandle::getId, identity()));
ImmutableMap.Builder<Integer, ColumnFieldDetails> idToDetailsBuilder = ImmutableMap.builder();
for (int index = 0; index < partitionFields.size(); index++) {
PartitionField field = partitionFields.get(index);
Type type = icebergPartitionTypes.get(index);
idToDetailsBuilder.put(field.fieldId(), new ColumnFieldDetails(field, idToColumnHandle.get(field.sourceId()), type, toTrinoType(type, typeManager), type.typeId().javaClass()));
}
Map<Integer, ColumnFieldDetails> idToDetails = idToDetailsBuilder.buildOrThrow();
TableScan tableScan = icebergTable.newScan().filter(toIcebergExpression(intersection)).useSnapshot(tableHandle.getSnapshotId().get()).includeColumnStats();
IcebergStatistics.Builder icebergStatisticsBuilder = new IcebergStatistics.Builder(columns, typeManager);
try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
for (FileScanTask fileScanTask : fileScanTasks) {
DataFile dataFile = fileScanTask.file();
if (!dataFileMatches(dataFile, constraint, partitionFields, idToDetails)) {
continue;
}
icebergStatisticsBuilder.acceptDataFile(dataFile, fileScanTask.spec());
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
IcebergStatistics summary = icebergStatisticsBuilder.build();
if (summary.getFileCount() == 0) {
return TableStatistics.empty();
}
ImmutableMap.Builder<ColumnHandle, ColumnStatistics> columnHandleBuilder = ImmutableMap.builder();
double recordCount = summary.getRecordCount();
for (IcebergColumnHandle columnHandle : idToColumnHandle.values()) {
int fieldId = columnHandle.getId();
ColumnStatistics.Builder columnBuilder = new ColumnStatistics.Builder();
Long nullCount = summary.getNullCounts().get(fieldId);
if (nullCount != null) {
columnBuilder.setNullsFraction(Estimate.of(nullCount / recordCount));
}
if (summary.getColumnSizes() != null) {
Long columnSize = summary.getColumnSizes().get(fieldId);
if (columnSize != null) {
columnBuilder.setDataSize(Estimate.of(columnSize));
}
}
Object min = summary.getMinValues().get(fieldId);
Object max = summary.getMaxValues().get(fieldId);
if (min != null && max != null) {
columnBuilder.setRange(DoubleRange.from(columnHandle.getType(), min, max));
}
columnHandleBuilder.put(columnHandle, columnBuilder.build());
}
return new TableStatistics(Estimate.of(recordCount), columnHandleBuilder.buildOrThrow());
}
use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.
the class TestTpcdsMetadataStatistics method testTableStatsDetails.
@Test
public void testTableStatsDetails() {
SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.CALL_CENTER.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue());
estimateAssertion.assertClose(tableStatistics.getRowCount(), Estimate.of(6), "Row count does not match");
// all columns have stats
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
for (ColumnHandle column : columnHandles.values()) {
assertTrue(tableStatistics.getColumnStatistics().containsKey(column));
assertNotNull(tableStatistics.getColumnStatistics().get(column));
}
// identifier
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(6)).setRange(new DoubleRange(1, 6)).build());
// varchar
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_ID.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(3)).setDataSize(Estimate.of(48.0)).build());
// char
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_ZIP.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setDataSize(Estimate.of(5.0)).build());
// decimal
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_GMT_OFFSET.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setRange(new DoubleRange(-5, -5)).build());
// date
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_REC_START_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(4)).setRange(new DoubleRange(10227L, 11688L)).build());
// only null values
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CLOSED_DATE_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(1)).setDistinctValuesCount(Estimate.of(0)).build());
}
Aggregations