use of com.facebook.presto.spi.statistics.ColumnStatistics in project presto by prestodb.
the class TestConnectorFilterStatsCalculatorService method testTableStatisticsAfterFilter.
@Test
public void testTableStatisticsAfterFilter() {
// totalSize always be zero
assertPredicate("true", zeroTableStatistics, zeroTableStatistics);
assertPredicate("x < 3e0", zeroTableStatistics, zeroTableStatistics);
assertPredicate("false", zeroTableStatistics, zeroTableStatistics);
// rowCount and totalSize all NaN
assertPredicate("true", TableStatistics.empty(), TableStatistics.empty());
// rowCount and totalSize from NaN to 0.0
assertPredicate("false", TableStatistics.empty(), TableStatistics.builder().setRowCount(Estimate.zero()).setTotalSize(Estimate.zero()).build());
TableStatistics filteredToZeroStatistics = TableStatistics.builder().setRowCount(Estimate.zero()).setTotalSize(Estimate.zero()).setColumnStatistics(xColumn, new ColumnStatistics(Estimate.of(1.0), Estimate.zero(), Estimate.zero(), Optional.empty())).build();
assertPredicate("false", originalTableStatistics, filteredToZeroStatistics);
TableStatistics filteredStatistics = TableStatistics.builder().setRowCount(Estimate.of(37.5)).setTotalSize(Estimate.of(300)).setColumnStatistics(xColumn, new ColumnStatistics(Estimate.zero(), Estimate.of(20), Estimate.unknown(), Optional.of(new DoubleRange(-10, 0)))).build();
assertPredicate("x < 0", originalTableStatistics, filteredStatistics);
TableStatistics filteredStatisticsWithoutTotalSize = TableStatistics.builder().setRowCount(Estimate.of(37.5)).setColumnStatistics(xColumn, new ColumnStatistics(Estimate.zero(), Estimate.of(20), Estimate.unknown(), Optional.of(new DoubleRange(-10, 0)))).build();
assertPredicate("x < 0", originalTableStatisticsWithoutTotalSize, filteredStatisticsWithoutTotalSize);
}
use of com.facebook.presto.spi.statistics.ColumnStatistics in project presto by prestodb.
the class AbstractTestHiveClient method assertTableStatsComputed.
private void assertTableStatsComputed(SchemaTableName tableName, Set<String> expectedColumnStatsColumns) {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> allColumnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Optional.empty(), allColumnHandles, Constraint.alwaysTrue());
assertFalse(tableStatistics.getRowCount().isUnknown(), "row count is unknown");
Map<String, ColumnStatistics> columnsStatistics = tableStatistics.getColumnStatistics().entrySet().stream().collect(toImmutableMap(entry -> ((HiveColumnHandle) entry.getKey()).getName(), Map.Entry::getValue));
assertEquals(columnsStatistics.keySet(), expectedColumnStatsColumns, "columns with statistics");
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
columnsStatistics.forEach((columnName, columnStatistics) -> {
ColumnHandle columnHandle = columnHandles.get(columnName);
Type columnType = metadata.getColumnMetadata(session, tableHandle, columnHandle).getType();
assertFalse(columnStatistics.getNullsFraction().isUnknown(), "unknown nulls fraction for " + columnName);
assertFalse(columnStatistics.getDistinctValuesCount().isUnknown(), "unknown distinct values count for " + columnName);
if (isVarcharType(columnType)) {
assertFalse(columnStatistics.getDataSize().isUnknown(), "unknown data size for " + columnName);
} else {
assertTrue(columnStatistics.getDataSize().isUnknown(), "unknown data size for" + columnName);
}
});
}
}
use of com.facebook.presto.spi.statistics.ColumnStatistics in project presto by prestodb.
the class MetastoreHiveStatisticsProvider method calculateDataSize.
@VisibleForTesting
static Estimate calculateDataSize(String column, Collection<PartitionStatistics> partitionStatistics, double totalRowCount) {
List<PartitionStatistics> statisticsWithKnownRowCountAndDataSize = partitionStatistics.stream().filter(statistics -> {
if (!statistics.getBasicStatistics().getRowCount().isPresent()) {
return false;
}
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
if (columnStatistics == null) {
return false;
}
return columnStatistics.getTotalSizeInBytes().isPresent();
}).collect(toImmutableList());
if (statisticsWithKnownRowCountAndDataSize.isEmpty()) {
return Estimate.unknown();
}
long knownRowCount = 0;
long knownDataSize = 0;
for (PartitionStatistics statistics : statisticsWithKnownRowCountAndDataSize) {
long rowCount = statistics.getBasicStatistics().getRowCount().orElseThrow(() -> new VerifyException("rowCount is not present"));
verify(rowCount >= 0, "rowCount must be greater than or equal to zero");
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
verify(columnStatistics != null, "columnStatistics is null");
long dataSize = columnStatistics.getTotalSizeInBytes().orElseThrow(() -> new VerifyException("totalSizeInBytes is not present"));
verify(dataSize >= 0, "dataSize must be greater than or equal to zero");
knownRowCount += rowCount;
knownDataSize += dataSize;
}
if (totalRowCount == 0) {
return Estimate.zero();
}
if (knownRowCount == 0) {
return Estimate.unknown();
}
double averageValueDataSizeInBytes = ((double) knownDataSize) / knownRowCount;
return Estimate.of(averageValueDataSizeInBytes * totalRowCount);
}
use of com.facebook.presto.spi.statistics.ColumnStatistics in project presto by prestodb.
the class MetastoreHiveStatisticsProvider method calculateNullsFraction.
@VisibleForTesting
static Estimate calculateNullsFraction(String column, Collection<PartitionStatistics> partitionStatistics) {
List<PartitionStatistics> statisticsWithKnownRowCountAndNullsCount = partitionStatistics.stream().filter(statistics -> {
if (!statistics.getBasicStatistics().getRowCount().isPresent()) {
return false;
}
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
if (columnStatistics == null) {
return false;
}
return columnStatistics.getNullsCount().isPresent();
}).collect(toImmutableList());
if (statisticsWithKnownRowCountAndNullsCount.isEmpty()) {
return Estimate.unknown();
}
long totalNullsCount = 0;
long totalRowCount = 0;
for (PartitionStatistics statistics : statisticsWithKnownRowCountAndNullsCount) {
long rowCount = statistics.getBasicStatistics().getRowCount().orElseThrow(() -> new VerifyException("rowCount is not present"));
verify(rowCount >= 0, "rowCount must be greater than or equal to zero");
HiveColumnStatistics columnStatistics = statistics.getColumnStatistics().get(column);
verify(columnStatistics != null, "columnStatistics is null");
long nullsCount = columnStatistics.getNullsCount().orElseThrow(() -> new VerifyException("nullsCount is not present"));
verify(nullsCount >= 0, "nullsCount must be greater than or equal to zero");
verify(nullsCount <= rowCount, "nullsCount must be less than or equal to rowCount. nullsCount: %s. rowCount: %s.", nullsCount, rowCount);
totalNullsCount += nullsCount;
totalRowCount += rowCount;
}
if (totalRowCount == 0) {
return Estimate.zero();
}
verify(totalNullsCount <= totalRowCount, "totalNullsCount must be less than or equal to totalRowCount. totalNullsCount: %s. totalRowCount: %s.", totalNullsCount, totalRowCount);
return Estimate.of(((double) totalNullsCount) / totalRowCount);
}
use of com.facebook.presto.spi.statistics.ColumnStatistics in project presto by prestodb.
the class TestTpcdsMetadataStatistics method testTableStatisticsSerialization.
@Test
public void testTableStatisticsSerialization() {
SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Optional.empty(), columnHandles, alwaysTrue());
Entry<ColumnHandle, ColumnStatistics> entry = tableStatistics.getColumnStatistics().entrySet().iterator().next();
TableStatistics expectedTableStatictics = tableStatistics.builder().setRowCount(tableStatistics.getRowCount()).setColumnStatistics(entry.getKey(), entry.getValue()).build();
JsonCodec<TableStatistics> codec = JsonCodec.jsonCodec(TableStatistics.class);
String json = codec.toJson(expectedTableStatictics);
assertEquals(json, "{\n" + " \"rowCount\" : {\n" + " \"value\" : 30.0\n" + " },\n" + " \"totalSize\" : {\n" + " \"value\" : \"NaN\"\n" + " },\n" + " \"columnStatistics\" : {\n" + " \"tpcds:web_site_sk\" : {\n" + " \"nullsFraction\" : {\n" + " \"value\" : 0.0\n" + " },\n" + " \"distinctValuesCount\" : {\n" + " \"value\" : 30.0\n" + " },\n" + " \"dataSize\" : {\n" + " \"value\" : \"NaN\"\n" + " },\n" + " \"range\" : {\n" + " \"min\" : 1.0,\n" + " \"max\" : 30.0\n" + " }\n" + " }\n" + " }\n" + "}");
}
Aggregations