use of com.facebook.presto.spi.statistics.TableStatistics in project presto by prestodb.
the class TestTpcdsMetadataStatistics method testNullFraction.
@Test
public void testNullFraction() {
SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Optional.empty(), ImmutableList.copyOf(columnHandles.values()), alwaysTrue());
// some null values
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(WebSiteColumn.WEB_REC_END_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(3)).setRange(new DoubleRange(10819L, 11549L)).build());
}
use of com.facebook.presto.spi.statistics.TableStatistics in project presto by prestodb.
the class AbstractTestHiveClient method testPartitionStatisticsSampling.
protected void testPartitionStatisticsSampling(List<ColumnMetadata> columns, PartitionStatistics statistics) throws Exception {
SchemaTableName tableName = temporaryTable("test_partition_statistics_sampling");
try {
createDummyPartitionedTable(tableName, columns);
ExtendedHiveMetastore metastoreClient = getMetastoreClient();
metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-01", actualStatistics -> statistics);
metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-02", actualStatistics -> statistics);
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, tableName);
List<ColumnHandle> allColumnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
TableStatistics unsampledStatistics = metadata.getTableStatistics(sampleSize(2), tableHandle, Optional.empty(), allColumnHandles, Constraint.alwaysTrue());
TableStatistics sampledStatistics = metadata.getTableStatistics(sampleSize(1), tableHandle, Optional.empty(), allColumnHandles, Constraint.alwaysTrue());
assertEquals(sampledStatistics, unsampledStatistics);
}
} finally {
dropTable(tableName);
}
}
use of com.facebook.presto.spi.statistics.TableStatistics in project presto by prestodb.
the class MetastoreHiveStatisticsProvider method createZeroStatistics.
private TableStatistics createZeroStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes) {
TableStatistics.Builder result = TableStatistics.builder();
result.setRowCount(Estimate.of(0));
result.setTotalSize(Estimate.of(0));
columns.forEach((columnName, columnHandle) -> {
Type columnType = columnTypes.get(columnName);
verify(columnType != null, "columnType is missing for column: %s", columnName);
ColumnStatistics.Builder columnStatistics = ColumnStatistics.builder();
columnStatistics.setNullsFraction(Estimate.of(0));
columnStatistics.setDistinctValuesCount(Estimate.of(0));
if (hasDataSize(columnType)) {
columnStatistics.setDataSize(Estimate.of(0));
}
result.setColumnStatistics(columnHandle, columnStatistics.build());
});
return result.build();
}
use of com.facebook.presto.spi.statistics.TableStatistics in project presto by prestodb.
the class MetastoreHiveStatisticsProvider method getTableStatistics.
private static TableStatistics getTableStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes, List<HivePartition> partitions, Map<String, PartitionStatistics> statistics) {
if (statistics.isEmpty()) {
return TableStatistics.empty();
}
checkArgument(!partitions.isEmpty(), "partitions is empty");
OptionalDouble optionalAverageRowsPerPartition = calculateAverageRowsPerPartition(statistics.values());
if (!optionalAverageRowsPerPartition.isPresent()) {
return TableStatistics.empty();
}
double averageRowsPerPartition = optionalAverageRowsPerPartition.getAsDouble();
verify(averageRowsPerPartition >= 0, "averageRowsPerPartition must be greater than or equal to zero");
int queriedPartitionsCount = partitions.size();
double rowCount = averageRowsPerPartition * queriedPartitionsCount;
TableStatistics.Builder result = TableStatistics.builder();
result.setRowCount(Estimate.of(rowCount));
OptionalDouble optionalAverageSizePerPartition = calculateAverageSizePerPartition(statistics.values());
if (optionalAverageSizePerPartition.isPresent()) {
double averageSizePerPartition = optionalAverageSizePerPartition.getAsDouble();
verify(averageSizePerPartition >= 0, "averageSizePerPartition must be greater than or equal to zero: %s", averageSizePerPartition);
double totalSize = averageSizePerPartition * queriedPartitionsCount;
result.setTotalSize(Estimate.of(totalSize));
}
for (Map.Entry<String, ColumnHandle> column : columns.entrySet()) {
String columnName = column.getKey();
HiveColumnHandle columnHandle = (HiveColumnHandle) column.getValue();
Type columnType = columnTypes.get(columnName);
ColumnStatistics columnStatistics;
if (columnHandle.isPartitionKey()) {
columnStatistics = createPartitionColumnStatistics(columnHandle, columnType, partitions, statistics, averageRowsPerPartition, rowCount);
} else {
columnStatistics = createDataColumnStatistics(columnName, columnType, rowCount, statistics.values());
}
result.setColumnStatistics(columnHandle, columnStatistics);
}
return result.build();
}
use of com.facebook.presto.spi.statistics.TableStatistics in project presto by prestodb.
the class TableScanStatsRule method doCalculate.
@Override
protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) {
// TODO Construct predicate like AddExchanges's LayoutConstraintEvaluator
Constraint<ColumnHandle> constraint = new Constraint<>(node.getCurrentConstraint());
TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), ImmutableList.copyOf(node.getAssignments().values()), constraint);
Map<VariableReferenceExpression, VariableStatsEstimate> outputVariableStats = new HashMap<>();
for (Map.Entry<VariableReferenceExpression, ColumnHandle> entry : node.getAssignments().entrySet()) {
Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue()));
outputVariableStats.put(entry.getKey(), columnStatistics.map(statistics -> StatsUtil.toVariableStatsEstimate(tableStatistics, statistics)).orElse(VariableStatsEstimate.unknown()));
}
return Optional.of(PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).setTotalSize(tableStatistics.getTotalSize().getValue()).setConfident(true).addVariableStatistics(outputVariableStats).build());
}
Aggregations