use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.
the class TestDeltaLakeMetastoreStatistics method testStatisticsInfinityAndNaN.
@Test
public void testStatisticsInfinityAndNaN() {
// Stats with NaN values cannot be used
DeltaLakeTableHandle tableHandle = registerTable("infinity_nan");
TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
ColumnStatistics columnStatistics = stats.getColumnStatistics().get(COLUMN_HANDLE);
assertEquals(columnStatistics.getRange().get().getMin(), POSITIVE_INFINITY);
assertEquals(columnStatistics.getRange().get().getMax(), POSITIVE_INFINITY);
}
use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.
the class TestDeltaLakeMetastoreStatistics method testStatisticsParquetParsedStatisticsNaNValues.
@Test
public void testStatisticsParquetParsedStatisticsNaNValues() {
// The transaction log for this table was created so that the checkpoints only write struct statistics, not json statistics
// The table has a REAL and DOUBLE columns each with 9 values, one of them being NaN
DeltaLakeTableHandle tableHandle = registerTable("parquet_struct_statistics_nan");
TableStatistics stats = deltaLakeMetastore.getTableStatistics(SESSION, tableHandle, Constraint.alwaysTrue());
assertEquals(stats.getRowCount(), Estimate.of(9));
Map<ColumnHandle, ColumnStatistics> statisticsMap = stats.getColumnStatistics();
ColumnStatistics columnStats = statisticsMap.get(new DeltaLakeColumnHandle("fl", REAL, REGULAR));
assertEquals(columnStats.getNullsFraction(), Estimate.zero());
assertThat(columnStats.getRange()).isEmpty();
columnStats = statisticsMap.get(new DeltaLakeColumnHandle("dou", DOUBLE, REGULAR));
assertEquals(columnStats.getNullsFraction(), Estimate.zero());
assertThat(columnStats.getRange()).isEmpty();
}
use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.
the class TableScanStatsRule method doCalculate.
@Override
protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) {
if (isStatisticsPrecalculationForPushdownEnabled(session) && node.getStatistics().isPresent()) {
return node.getStatistics();
}
// TODO Construct predicate like AddExchanges's LayoutConstraintEvaluator
Constraint constraint = new Constraint(TupleDomain.all());
TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), constraint);
Map<Symbol, SymbolStatsEstimate> outputSymbolStats = new HashMap<>();
for (Map.Entry<Symbol, ColumnHandle> entry : node.getAssignments().entrySet()) {
Symbol symbol = entry.getKey();
Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue()));
SymbolStatsEstimate symbolStatistics = columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics, types.get(symbol))).orElse(SymbolStatsEstimate.unknown());
outputSymbolStats.put(symbol, symbolStatistics);
}
return Optional.of(PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).addSymbolStatistics(outputSymbolStats).build());
}
use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.
the class HiveMetastoreBackedDeltaLakeMetastore method createZeroStatistics.
private TableStatistics createZeroStatistics(List<DeltaLakeColumnHandle> columns) {
TableStatistics.Builder statsBuilder = new TableStatistics.Builder().setRowCount(Estimate.of(0));
for (DeltaLakeColumnHandle column : columns) {
ColumnStatistics.Builder columnStatistics = ColumnStatistics.builder();
columnStatistics.setNullsFraction(Estimate.of(0));
columnStatistics.setDistinctValuesCount(Estimate.of(0));
statsBuilder.setColumnStatistics(column, columnStatistics.build());
}
return statsBuilder.build();
}
use of io.trino.spi.statistics.ColumnStatistics in project trino by trinodb.
the class TableStatisticsMaker method makeTableStatistics.
private TableStatistics makeTableStatistics(IcebergTableHandle tableHandle, Constraint constraint) {
if (tableHandle.getSnapshotId().isEmpty() || constraint.getSummary().isNone()) {
return TableStatistics.empty();
}
TupleDomain<IcebergColumnHandle> intersection = constraint.getSummary().transformKeys(IcebergColumnHandle.class::cast).intersect(tableHandle.getEnforcedPredicate());
if (intersection.isNone()) {
return TableStatistics.empty();
}
Schema icebergTableSchema = icebergTable.schema();
List<Types.NestedField> columns = icebergTableSchema.columns();
Map<Integer, Type.PrimitiveType> idToTypeMapping = primitiveFieldTypes(icebergTableSchema);
List<PartitionField> partitionFields = icebergTable.spec().fields();
List<Type> icebergPartitionTypes = partitionTypes(partitionFields, idToTypeMapping);
List<IcebergColumnHandle> columnHandles = getColumns(icebergTableSchema, typeManager);
Map<Integer, IcebergColumnHandle> idToColumnHandle = columnHandles.stream().collect(toUnmodifiableMap(IcebergColumnHandle::getId, identity()));
ImmutableMap.Builder<Integer, ColumnFieldDetails> idToDetailsBuilder = ImmutableMap.builder();
for (int index = 0; index < partitionFields.size(); index++) {
PartitionField field = partitionFields.get(index);
Type type = icebergPartitionTypes.get(index);
idToDetailsBuilder.put(field.fieldId(), new ColumnFieldDetails(field, idToColumnHandle.get(field.sourceId()), type, toTrinoType(type, typeManager), type.typeId().javaClass()));
}
Map<Integer, ColumnFieldDetails> idToDetails = idToDetailsBuilder.buildOrThrow();
TableScan tableScan = icebergTable.newScan().filter(toIcebergExpression(intersection)).useSnapshot(tableHandle.getSnapshotId().get()).includeColumnStats();
IcebergStatistics.Builder icebergStatisticsBuilder = new IcebergStatistics.Builder(columns, typeManager);
try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
for (FileScanTask fileScanTask : fileScanTasks) {
DataFile dataFile = fileScanTask.file();
if (!dataFileMatches(dataFile, constraint, partitionFields, idToDetails)) {
continue;
}
icebergStatisticsBuilder.acceptDataFile(dataFile, fileScanTask.spec());
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
IcebergStatistics summary = icebergStatisticsBuilder.build();
if (summary.getFileCount() == 0) {
return TableStatistics.empty();
}
ImmutableMap.Builder<ColumnHandle, ColumnStatistics> columnHandleBuilder = ImmutableMap.builder();
double recordCount = summary.getRecordCount();
for (IcebergColumnHandle columnHandle : idToColumnHandle.values()) {
int fieldId = columnHandle.getId();
ColumnStatistics.Builder columnBuilder = new ColumnStatistics.Builder();
Long nullCount = summary.getNullCounts().get(fieldId);
if (nullCount != null) {
columnBuilder.setNullsFraction(Estimate.of(nullCount / recordCount));
}
if (summary.getColumnSizes() != null) {
Long columnSize = summary.getColumnSizes().get(fieldId);
if (columnSize != null) {
columnBuilder.setDataSize(Estimate.of(columnSize));
}
}
Object min = summary.getMinValues().get(fieldId);
Object max = summary.getMaxValues().get(fieldId);
if (min != null && max != null) {
columnBuilder.setRange(DoubleRange.from(columnHandle.getType(), min, max));
}
columnHandleBuilder.put(columnHandle, columnBuilder.build());
}
return new TableStatistics(Estimate.of(recordCount), columnHandleBuilder.buildOrThrow());
}
Aggregations