use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.
the class TpcdsTableStatisticsFactory method toTableStatistics.
private TableStatistics toTableStatistics(Map<String, ColumnHandle> columnHandles, TableStatisticsData statisticsData) {
long rowCount = statisticsData.getRowCount();
TableStatistics.Builder tableStatistics = TableStatistics.builder().setRowCount(Estimate.of(rowCount));
if (rowCount > 0) {
Map<String, ColumnStatisticsData> columnsData = statisticsData.getColumns();
for (Map.Entry<String, ColumnHandle> entry : columnHandles.entrySet()) {
TpcdsColumnHandle columnHandle = (TpcdsColumnHandle) entry.getValue();
tableStatistics.setColumnStatistics(entry.getValue(), toColumnStatistics(columnsData.get(entry.getKey()), columnHandle.getType(), rowCount));
}
}
return tableStatistics.build();
}
use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.
the class TestTpchMetadata method testTableStats.
private void testTableStats(String schema, TpchTable<?> table, Constraint constraint, double expectedRowCount) {
TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName()));
TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint);
double actualRowCountValue = tableStatistics.getRowCount().getValue();
assertEquals(tableStatistics.getRowCount(), Estimate.of(actualRowCountValue));
assertEquals(actualRowCountValue, expectedRowCount, expectedRowCount * TOLERANCE);
}
use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.
the class TestTpchMetadata method testColumnStats.
private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint constraint, ColumnStatistics expected) {
TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName()));
TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint);
ColumnHandle columnHandle = tpchMetadata.getColumnHandles(session, tableHandle).get(column.getSimplifiedColumnName());
ColumnStatistics actual = tableStatistics.getColumnStatistics().get(columnHandle);
EstimateAssertion estimateAssertion = new EstimateAssertion(TOLERANCE);
estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinctValuesCount");
estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "dataSize");
estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction");
estimateAssertion.assertClose(actual.getRange(), expected.getRange(), "range");
}
use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.
the class AbstractTestHive method assertTableStatsComputed.
private void assertTableStatsComputed(SchemaTableName tableName, Set<String> expectedColumnStatsColumns) {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Constraint.alwaysTrue());
assertFalse(tableStatistics.getRowCount().isUnknown(), "row count is unknown");
Map<String, ColumnStatistics> columnsStatistics = tableStatistics.getColumnStatistics().entrySet().stream().collect(toImmutableMap(entry -> ((HiveColumnHandle) entry.getKey()).getName(), Map.Entry::getValue));
assertEquals(columnsStatistics.keySet(), expectedColumnStatsColumns, "columns with statistics");
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
columnsStatistics.forEach((columnName, columnStatistics) -> {
ColumnHandle columnHandle = columnHandles.get(columnName);
Type columnType = metadata.getColumnMetadata(session, tableHandle, columnHandle).getType();
assertFalse(columnStatistics.getNullsFraction().isUnknown(), "unknown nulls fraction for " + columnName);
assertFalse(columnStatistics.getDistinctValuesCount().isUnknown(), "unknown distinct values count for " + columnName);
if (columnType instanceof VarcharType) {
assertFalse(columnStatistics.getDataSize().isUnknown(), "unknown data size for " + columnName);
} else {
assertTrue(columnStatistics.getDataSize().isUnknown(), "unknown data size for" + columnName);
}
});
}
}
use of io.trino.spi.statistics.TableStatistics in project trino by trinodb.
the class MetastoreHiveStatisticsProvider method getTableStatistics.
private static TableStatistics getTableStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes, List<HivePartition> partitions, Map<String, PartitionStatistics> statistics) {
if (statistics.isEmpty()) {
return TableStatistics.empty();
}
checkArgument(!partitions.isEmpty(), "partitions is empty");
Optional<PartitionsRowCount> optionalRowCount = calculatePartitionsRowCount(statistics.values(), partitions.size());
if (optionalRowCount.isEmpty()) {
return TableStatistics.empty();
}
double rowCount = optionalRowCount.get().getRowCount();
TableStatistics.Builder result = TableStatistics.builder();
result.setRowCount(Estimate.of(rowCount));
for (Map.Entry<String, ColumnHandle> column : columns.entrySet()) {
String columnName = column.getKey();
HiveColumnHandle columnHandle = (HiveColumnHandle) column.getValue();
Type columnType = columnTypes.get(columnName);
ColumnStatistics columnStatistics;
if (columnHandle.isPartitionKey()) {
double averageRowsPerPartition = optionalRowCount.get().getAverageRowsPerPartition();
columnStatistics = createPartitionColumnStatistics(columnHandle, columnType, partitions, statistics, averageRowsPerPartition, rowCount);
} else {
columnStatistics = createDataColumnStatistics(columnName, columnType, rowCount, statistics.values());
}
result.setColumnStatistics(columnHandle, columnStatistics);
}
return result.build();
}
Aggregations