use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.
the class TestConnectorFilterStatsCalculatorService method setUp.
@BeforeClass
public void setUp() {
session = testSessionBuilder().build();
MetadataManager metadata = MetadataManager.createTestMetadataManager();
FilterStatsCalculator statsCalculator = new FilterStatsCalculator(metadata, new ScalarStatsCalculator(metadata), new StatsNormalizer());
statsCalculatorService = new ConnectorFilterStatsCalculatorService(statsCalculator);
xStats = ColumnStatistics.builder().setDistinctValuesCount(Estimate.of(40)).setRange(new DoubleRange(-10, 10)).setNullsFraction(Estimate.of(0.25)).build();
zeroTableStatistics = TableStatistics.builder().setRowCount(Estimate.zero()).setTotalSize(Estimate.zero()).build();
originalTableStatistics = TableStatistics.builder().setRowCount(Estimate.of(100)).setTotalSize(Estimate.of(800)).setColumnStatistics(xColumn, xStats).build();
originalTableStatisticsWithoutTotalSize = TableStatistics.builder().setRowCount(Estimate.of(100)).setColumnStatistics(xColumn, xStats).build();
standardTypes = TypeProvider.fromVariables(ImmutableList.<VariableReferenceExpression>builder().add(new VariableReferenceExpression(Optional.empty(), "x", DOUBLE)).build());
translator = new TestingRowExpressionTranslator(MetadataManager.createTestMetadataManager());
}
use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.
the class ConnectorFilterStatsCalculatorService method toColumnStatistics.
private static ColumnStatistics toColumnStatistics(VariableStatsEstimate variableStatsEstimate, double rowCount) {
if (variableStatsEstimate.isUnknown()) {
return ColumnStatistics.empty();
}
double nullsFractionDouble = variableStatsEstimate.getNullsFraction();
double nonNullRowsCount = rowCount * (1.0 - nullsFractionDouble);
Builder builder = ColumnStatistics.builder();
if (!Double.isNaN(nullsFractionDouble)) {
builder.setNullsFraction(Estimate.of(nullsFractionDouble));
}
if (!Double.isNaN(variableStatsEstimate.getDistinctValuesCount())) {
builder.setDistinctValuesCount(Estimate.of(variableStatsEstimate.getDistinctValuesCount()));
}
if (!Double.isNaN(variableStatsEstimate.getAverageRowSize())) {
builder.setDataSize(Estimate.of(variableStatsEstimate.getAverageRowSize() * nonNullRowsCount));
}
if (!Double.isNaN(variableStatsEstimate.getLowValue()) && !Double.isNaN(variableStatsEstimate.getHighValue())) {
builder.setRange(new DoubleRange(variableStatsEstimate.getLowValue(), variableStatsEstimate.getHighValue()));
}
return builder.build();
}
use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.
the class TestConnectorFilterStatsCalculatorService method testTableStatisticsAfterFilter.
@Test
public void testTableStatisticsAfterFilter() {
// totalSize always be zero
assertPredicate("true", zeroTableStatistics, zeroTableStatistics);
assertPredicate("x < 3e0", zeroTableStatistics, zeroTableStatistics);
assertPredicate("false", zeroTableStatistics, zeroTableStatistics);
// rowCount and totalSize all NaN
assertPredicate("true", TableStatistics.empty(), TableStatistics.empty());
// rowCount and totalSize from NaN to 0.0
assertPredicate("false", TableStatistics.empty(), TableStatistics.builder().setRowCount(Estimate.zero()).setTotalSize(Estimate.zero()).build());
TableStatistics filteredToZeroStatistics = TableStatistics.builder().setRowCount(Estimate.zero()).setTotalSize(Estimate.zero()).setColumnStatistics(xColumn, new ColumnStatistics(Estimate.of(1.0), Estimate.zero(), Estimate.zero(), Optional.empty())).build();
assertPredicate("false", originalTableStatistics, filteredToZeroStatistics);
TableStatistics filteredStatistics = TableStatistics.builder().setRowCount(Estimate.of(37.5)).setTotalSize(Estimate.of(300)).setColumnStatistics(xColumn, new ColumnStatistics(Estimate.zero(), Estimate.of(20), Estimate.unknown(), Optional.of(new DoubleRange(-10, 0)))).build();
assertPredicate("x < 0", originalTableStatistics, filteredStatistics);
TableStatistics filteredStatisticsWithoutTotalSize = TableStatistics.builder().setRowCount(Estimate.of(37.5)).setColumnStatistics(xColumn, new ColumnStatistics(Estimate.zero(), Estimate.of(20), Estimate.unknown(), Optional.of(new DoubleRange(-10, 0)))).build();
assertPredicate("x < 0", originalTableStatisticsWithoutTotalSize, filteredStatisticsWithoutTotalSize);
}
use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.
the class TableStatisticsMaker method makeTableStatistics.
private TableStatistics makeTableStatistics(IcebergTableHandle tableHandle, Constraint constraint) {
if (!tableHandle.getSnapshotId().isPresent() || constraint.getSummary().isNone()) {
return TableStatistics.empty();
}
TupleDomain<IcebergColumnHandle> intersection = constraint.getSummary().transform(IcebergColumnHandle.class::cast).intersect(tableHandle.getPredicate());
if (intersection.isNone()) {
return TableStatistics.empty();
}
List<Types.NestedField> columns = icebergTable.schema().columns();
Map<Integer, Type.PrimitiveType> idToTypeMapping = columns.stream().filter(column -> column.type().isPrimitiveType()).collect(Collectors.toMap(Types.NestedField::fieldId, column -> column.type().asPrimitiveType()));
List<PartitionField> partitionFields = icebergTable.spec().fields();
Set<Integer> identityPartitionIds = getIdentityPartitions(icebergTable.spec()).keySet().stream().map(PartitionField::sourceId).collect(toSet());
List<Types.NestedField> nonPartitionPrimitiveColumns = columns.stream().filter(column -> !identityPartitionIds.contains(column.fieldId()) && column.type().isPrimitiveType()).collect(toImmutableList());
List<Type> icebergPartitionTypes = partitionTypes(partitionFields, idToTypeMapping);
List<IcebergColumnHandle> columnHandles = getColumns(icebergTable.schema(), typeManager);
Map<Integer, IcebergColumnHandle> idToColumnHandle = columnHandles.stream().collect(toImmutableMap(IcebergColumnHandle::getId, identity()));
ImmutableMap.Builder<Integer, ColumnFieldDetails> idToDetailsBuilder = ImmutableMap.builder();
for (int index = 0; index < partitionFields.size(); index++) {
PartitionField field = partitionFields.get(index);
Type type = icebergPartitionTypes.get(index);
idToDetailsBuilder.put(field.sourceId(), new ColumnFieldDetails(field, idToColumnHandle.get(field.sourceId()), type, toPrestoType(type, typeManager), type.typeId().javaClass()));
}
Map<Integer, ColumnFieldDetails> idToDetails = idToDetailsBuilder.build();
TableScan tableScan = icebergTable.newScan().filter(toIcebergExpression(intersection)).useSnapshot(tableHandle.getSnapshotId().get()).includeColumnStats();
Partition summary = null;
try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
for (FileScanTask fileScanTask : fileScanTasks) {
DataFile dataFile = fileScanTask.file();
if (!dataFileMatches(dataFile, constraint, idToTypeMapping, partitionFields, idToDetails)) {
continue;
}
if (summary == null) {
summary = new Partition(idToTypeMapping, nonPartitionPrimitiveColumns, dataFile.partition(), dataFile.recordCount(), dataFile.fileSizeInBytes(), toMap(idToTypeMapping, dataFile.lowerBounds()), toMap(idToTypeMapping, dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.columnSizes());
} else {
summary.incrementFileCount();
summary.incrementRecordCount(dataFile.recordCount());
summary.incrementSize(dataFile.fileSizeInBytes());
updateSummaryMin(summary, partitionFields, toMap(idToTypeMapping, dataFile.lowerBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
updateSummaryMax(summary, partitionFields, toMap(idToTypeMapping, dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
summary.updateNullCount(dataFile.nullValueCounts());
updateColumnSizes(summary, dataFile.columnSizes());
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
if (summary == null) {
return TableStatistics.empty();
}
double recordCount = summary.getRecordCount();
TableStatistics.Builder result = TableStatistics.builder();
result.setRowCount(Estimate.of(recordCount));
result.setTotalSize(Estimate.of(summary.getSize()));
for (IcebergColumnHandle columnHandle : idToColumnHandle.values()) {
int fieldId = columnHandle.getId();
ColumnStatistics.Builder columnBuilder = new ColumnStatistics.Builder();
Long nullCount = summary.getNullCounts().get(fieldId);
if (nullCount != null) {
columnBuilder.setNullsFraction(Estimate.of(nullCount / recordCount));
}
if (summary.getColumnSizes() != null) {
Long columnSize = summary.getColumnSizes().get(fieldId);
if (columnSize != null) {
columnBuilder.setDataSize(Estimate.of(columnSize));
}
}
Object min = summary.getMinValues().get(fieldId);
Object max = summary.getMaxValues().get(fieldId);
if (min instanceof Number && max instanceof Number) {
columnBuilder.setRange(Optional.of(new DoubleRange(((Number) min).doubleValue(), ((Number) max).doubleValue())));
}
result.setColumnStatistics(columnHandle, columnBuilder.build());
}
return result.build();
}
Aggregations