Search in sources :

Example 6 with DoubleRange

use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.

the class TestConnectorFilterStatsCalculatorService method setUp.

@BeforeClass
public void setUp() {
    session = testSessionBuilder().build();
    MetadataManager metadata = MetadataManager.createTestMetadataManager();
    FilterStatsCalculator statsCalculator = new FilterStatsCalculator(metadata, new ScalarStatsCalculator(metadata), new StatsNormalizer());
    statsCalculatorService = new ConnectorFilterStatsCalculatorService(statsCalculator);
    xStats = ColumnStatistics.builder().setDistinctValuesCount(Estimate.of(40)).setRange(new DoubleRange(-10, 10)).setNullsFraction(Estimate.of(0.25)).build();
    zeroTableStatistics = TableStatistics.builder().setRowCount(Estimate.zero()).setTotalSize(Estimate.zero()).build();
    originalTableStatistics = TableStatistics.builder().setRowCount(Estimate.of(100)).setTotalSize(Estimate.of(800)).setColumnStatistics(xColumn, xStats).build();
    originalTableStatisticsWithoutTotalSize = TableStatistics.builder().setRowCount(Estimate.of(100)).setColumnStatistics(xColumn, xStats).build();
    standardTypes = TypeProvider.fromVariables(ImmutableList.<VariableReferenceExpression>builder().add(new VariableReferenceExpression(Optional.empty(), "x", DOUBLE)).build());
    translator = new TestingRowExpressionTranslator(MetadataManager.createTestMetadataManager());
}
Also used : DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) MetadataManager(com.facebook.presto.metadata.MetadataManager) TestingRowExpressionTranslator(com.facebook.presto.sql.TestingRowExpressionTranslator) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) BeforeClass(org.testng.annotations.BeforeClass)

Example 7 with DoubleRange

use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.

the class ConnectorFilterStatsCalculatorService method toColumnStatistics.

private static ColumnStatistics toColumnStatistics(VariableStatsEstimate variableStatsEstimate, double rowCount) {
    if (variableStatsEstimate.isUnknown()) {
        return ColumnStatistics.empty();
    }
    double nullsFractionDouble = variableStatsEstimate.getNullsFraction();
    double nonNullRowsCount = rowCount * (1.0 - nullsFractionDouble);
    Builder builder = ColumnStatistics.builder();
    if (!Double.isNaN(nullsFractionDouble)) {
        builder.setNullsFraction(Estimate.of(nullsFractionDouble));
    }
    if (!Double.isNaN(variableStatsEstimate.getDistinctValuesCount())) {
        builder.setDistinctValuesCount(Estimate.of(variableStatsEstimate.getDistinctValuesCount()));
    }
    if (!Double.isNaN(variableStatsEstimate.getAverageRowSize())) {
        builder.setDataSize(Estimate.of(variableStatsEstimate.getAverageRowSize() * nonNullRowsCount));
    }
    if (!Double.isNaN(variableStatsEstimate.getLowValue()) && !Double.isNaN(variableStatsEstimate.getHighValue())) {
        builder.setRange(new DoubleRange(variableStatsEstimate.getLowValue(), variableStatsEstimate.getHighValue()));
    }
    return builder.build();
}
Also used : DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) Builder(com.facebook.presto.spi.statistics.ColumnStatistics.Builder)

Example 8 with DoubleRange

use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.

the class TestConnectorFilterStatsCalculatorService method testTableStatisticsAfterFilter.

@Test
public void testTableStatisticsAfterFilter() {
    // totalSize always be zero
    assertPredicate("true", zeroTableStatistics, zeroTableStatistics);
    assertPredicate("x < 3e0", zeroTableStatistics, zeroTableStatistics);
    assertPredicate("false", zeroTableStatistics, zeroTableStatistics);
    // rowCount and totalSize all NaN
    assertPredicate("true", TableStatistics.empty(), TableStatistics.empty());
    // rowCount and totalSize from NaN to 0.0
    assertPredicate("false", TableStatistics.empty(), TableStatistics.builder().setRowCount(Estimate.zero()).setTotalSize(Estimate.zero()).build());
    TableStatistics filteredToZeroStatistics = TableStatistics.builder().setRowCount(Estimate.zero()).setTotalSize(Estimate.zero()).setColumnStatistics(xColumn, new ColumnStatistics(Estimate.of(1.0), Estimate.zero(), Estimate.zero(), Optional.empty())).build();
    assertPredicate("false", originalTableStatistics, filteredToZeroStatistics);
    TableStatistics filteredStatistics = TableStatistics.builder().setRowCount(Estimate.of(37.5)).setTotalSize(Estimate.of(300)).setColumnStatistics(xColumn, new ColumnStatistics(Estimate.zero(), Estimate.of(20), Estimate.unknown(), Optional.of(new DoubleRange(-10, 0)))).build();
    assertPredicate("x < 0", originalTableStatistics, filteredStatistics);
    TableStatistics filteredStatisticsWithoutTotalSize = TableStatistics.builder().setRowCount(Estimate.of(37.5)).setColumnStatistics(xColumn, new ColumnStatistics(Estimate.zero(), Estimate.of(20), Estimate.unknown(), Optional.of(new DoubleRange(-10, 0)))).build();
    assertPredicate("x < 0", originalTableStatisticsWithoutTotalSize, filteredStatisticsWithoutTotalSize);
}
Also used : ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) Test(org.testng.annotations.Test)

Example 9 with DoubleRange

use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.

the class TableStatisticsMaker method makeTableStatistics.

private TableStatistics makeTableStatistics(IcebergTableHandle tableHandle, Constraint constraint) {
    if (!tableHandle.getSnapshotId().isPresent() || constraint.getSummary().isNone()) {
        return TableStatistics.empty();
    }
    TupleDomain<IcebergColumnHandle> intersection = constraint.getSummary().transform(IcebergColumnHandle.class::cast).intersect(tableHandle.getPredicate());
    if (intersection.isNone()) {
        return TableStatistics.empty();
    }
    List<Types.NestedField> columns = icebergTable.schema().columns();
    Map<Integer, Type.PrimitiveType> idToTypeMapping = columns.stream().filter(column -> column.type().isPrimitiveType()).collect(Collectors.toMap(Types.NestedField::fieldId, column -> column.type().asPrimitiveType()));
    List<PartitionField> partitionFields = icebergTable.spec().fields();
    Set<Integer> identityPartitionIds = getIdentityPartitions(icebergTable.spec()).keySet().stream().map(PartitionField::sourceId).collect(toSet());
    List<Types.NestedField> nonPartitionPrimitiveColumns = columns.stream().filter(column -> !identityPartitionIds.contains(column.fieldId()) && column.type().isPrimitiveType()).collect(toImmutableList());
    List<Type> icebergPartitionTypes = partitionTypes(partitionFields, idToTypeMapping);
    List<IcebergColumnHandle> columnHandles = getColumns(icebergTable.schema(), typeManager);
    Map<Integer, IcebergColumnHandle> idToColumnHandle = columnHandles.stream().collect(toImmutableMap(IcebergColumnHandle::getId, identity()));
    ImmutableMap.Builder<Integer, ColumnFieldDetails> idToDetailsBuilder = ImmutableMap.builder();
    for (int index = 0; index < partitionFields.size(); index++) {
        PartitionField field = partitionFields.get(index);
        Type type = icebergPartitionTypes.get(index);
        idToDetailsBuilder.put(field.sourceId(), new ColumnFieldDetails(field, idToColumnHandle.get(field.sourceId()), type, toPrestoType(type, typeManager), type.typeId().javaClass()));
    }
    Map<Integer, ColumnFieldDetails> idToDetails = idToDetailsBuilder.build();
    TableScan tableScan = icebergTable.newScan().filter(toIcebergExpression(intersection)).useSnapshot(tableHandle.getSnapshotId().get()).includeColumnStats();
    Partition summary = null;
    try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
        for (FileScanTask fileScanTask : fileScanTasks) {
            DataFile dataFile = fileScanTask.file();
            if (!dataFileMatches(dataFile, constraint, idToTypeMapping, partitionFields, idToDetails)) {
                continue;
            }
            if (summary == null) {
                summary = new Partition(idToTypeMapping, nonPartitionPrimitiveColumns, dataFile.partition(), dataFile.recordCount(), dataFile.fileSizeInBytes(), toMap(idToTypeMapping, dataFile.lowerBounds()), toMap(idToTypeMapping, dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.columnSizes());
            } else {
                summary.incrementFileCount();
                summary.incrementRecordCount(dataFile.recordCount());
                summary.incrementSize(dataFile.fileSizeInBytes());
                updateSummaryMin(summary, partitionFields, toMap(idToTypeMapping, dataFile.lowerBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
                updateSummaryMax(summary, partitionFields, toMap(idToTypeMapping, dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
                summary.updateNullCount(dataFile.nullValueCounts());
                updateColumnSizes(summary, dataFile.columnSizes());
            }
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
    if (summary == null) {
        return TableStatistics.empty();
    }
    double recordCount = summary.getRecordCount();
    TableStatistics.Builder result = TableStatistics.builder();
    result.setRowCount(Estimate.of(recordCount));
    result.setTotalSize(Estimate.of(summary.getSize()));
    for (IcebergColumnHandle columnHandle : idToColumnHandle.values()) {
        int fieldId = columnHandle.getId();
        ColumnStatistics.Builder columnBuilder = new ColumnStatistics.Builder();
        Long nullCount = summary.getNullCounts().get(fieldId);
        if (nullCount != null) {
            columnBuilder.setNullsFraction(Estimate.of(nullCount / recordCount));
        }
        if (summary.getColumnSizes() != null) {
            Long columnSize = summary.getColumnSizes().get(fieldId);
            if (columnSize != null) {
                columnBuilder.setDataSize(Estimate.of(columnSize));
            }
        }
        Object min = summary.getMinValues().get(fieldId);
        Object max = summary.getMaxValues().get(fieldId);
        if (min instanceof Number && max instanceof Number) {
            columnBuilder.setRange(Optional.of(new DoubleRange(((Number) min).doubleValue(), ((Number) max).doubleValue())));
        }
        result.setColumnStatistics(columnHandle, columnBuilder.build());
    }
    return result.build();
}
Also used : Types(org.apache.iceberg.types.Types) ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) PartitionField(org.apache.iceberg.PartitionField) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) ImmutableList(com.google.common.collect.ImmutableList) Partition.toMap(com.facebook.presto.iceberg.Partition.toMap) TypeManager(com.facebook.presto.common.type.TypeManager) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) IcebergUtil.getIdentityPartitions(com.facebook.presto.iceberg.IcebergUtil.getIdentityPartitions) FileScanTask(org.apache.iceberg.FileScanTask) DataFile(org.apache.iceberg.DataFile) ExpressionConverter.toIcebergExpression(com.facebook.presto.iceberg.ExpressionConverter.toIcebergExpression) IcebergUtil.getColumns(com.facebook.presto.iceberg.IcebergUtil.getColumns) Collectors.toSet(java.util.stream.Collectors.toSet) Comparators(org.apache.iceberg.types.Comparators) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) CloseableIterable(org.apache.iceberg.io.CloseableIterable) NullableValue(com.facebook.presto.common.predicate.NullableValue) ImmutableMap(com.google.common.collect.ImmutableMap) Table(org.apache.iceberg.Table) Predicate(java.util.function.Predicate) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Constraint(com.facebook.presto.spi.Constraint) TableScan(org.apache.iceberg.TableScan) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) Type(org.apache.iceberg.types.Type) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Estimate(com.facebook.presto.spi.statistics.Estimate) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) Comparator(java.util.Comparator) Types(org.apache.iceberg.types.Types) UncheckedIOException(java.io.UncheckedIOException) DataFile(org.apache.iceberg.DataFile) PartitionField(org.apache.iceberg.PartitionField) ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) TableScan(org.apache.iceberg.TableScan) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Constraint(com.facebook.presto.spi.Constraint) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) TypeConverter.toPrestoType(com.facebook.presto.iceberg.TypeConverter.toPrestoType) Type(org.apache.iceberg.types.Type) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) FileScanTask(org.apache.iceberg.FileScanTask)

Aggregations

DoubleRange (com.facebook.presto.spi.statistics.DoubleRange)9 TableStatistics (com.facebook.presto.spi.statistics.TableStatistics)7 Test (org.testng.annotations.Test)5 HiveBasicStatistics (com.facebook.presto.hive.HiveBasicStatistics)3 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)3 PartitionStatistics (com.facebook.presto.hive.metastore.PartitionStatistics)3 ColumnHandle (com.facebook.presto.spi.ColumnHandle)3 SchemaTableName (com.facebook.presto.spi.SchemaTableName)3 CacheConfig (com.facebook.presto.cache.CacheConfig)2 NullableValue (com.facebook.presto.common.predicate.NullableValue)2 HiveClientConfig (com.facebook.presto.hive.HiveClientConfig)2 HivePartition (com.facebook.presto.hive.HivePartition)2 HiveSessionProperties (com.facebook.presto.hive.HiveSessionProperties)2 OrcFileWriterConfig (com.facebook.presto.hive.OrcFileWriterConfig)2 ParquetFileWriterConfig (com.facebook.presto.hive.ParquetFileWriterConfig)2 MetastoreHiveStatisticsProvider.validatePartitionStatistics (com.facebook.presto.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics)2 ConnectorTableHandle (com.facebook.presto.spi.ConnectorTableHandle)2 ColumnStatistics (com.facebook.presto.spi.statistics.ColumnStatistics)2 TestingConnectorSession (com.facebook.presto.testing.TestingConnectorSession)2 Logger (com.facebook.airlift.log.Logger)1