Search in sources :

Example 6 with TableStatistics

use of com.facebook.presto.spi.statistics.TableStatistics in project presto by prestodb.

the class TestTpcdsMetadataStatistics method testNullFraction.

@Test
public void testNullFraction() {
    SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName());
    ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
    Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
    TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Optional.empty(), ImmutableList.copyOf(columnHandles.values()), alwaysTrue());
    // some null values
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(WebSiteColumn.WEB_REC_END_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(3)).setRange(new DoubleRange(10819L, 11549L)).build());
}
Also used : ColumnHandle(com.facebook.presto.spi.ColumnHandle) DoubleRange(com.facebook.presto.spi.statistics.DoubleRange) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 7 with TableStatistics

use of com.facebook.presto.spi.statistics.TableStatistics in project presto by prestodb.

the class AbstractTestHiveClient method testPartitionStatisticsSampling.

protected void testPartitionStatisticsSampling(List<ColumnMetadata> columns, PartitionStatistics statistics) throws Exception {
    SchemaTableName tableName = temporaryTable("test_partition_statistics_sampling");
    try {
        createDummyPartitionedTable(tableName, columns);
        ExtendedHiveMetastore metastoreClient = getMetastoreClient();
        metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-01", actualStatistics -> statistics);
        metastoreClient.updatePartitionStatistics(METASTORE_CONTEXT, tableName.getSchemaName(), tableName.getTableName(), "ds=2016-01-02", actualStatistics -> statistics);
        try (Transaction transaction = newTransaction()) {
            ConnectorSession session = newSession();
            ConnectorMetadata metadata = transaction.getMetadata();
            ConnectorTableHandle tableHandle = metadata.getTableHandle(session, tableName);
            List<ColumnHandle> allColumnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
            TableStatistics unsampledStatistics = metadata.getTableStatistics(sampleSize(2), tableHandle, Optional.empty(), allColumnHandles, Constraint.alwaysTrue());
            TableStatistics sampledStatistics = metadata.getTableStatistics(sampleSize(1), tableHandle, Optional.empty(), allColumnHandles, Constraint.alwaysTrue());
            assertEquals(sampledStatistics, unsampledStatistics);
        }
    } finally {
        dropTable(tableName);
    }
}
Also used : HiveColumnHandle.bucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(com.facebook.presto.spi.ColumnHandle) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle)

Example 8 with TableStatistics

use of com.facebook.presto.spi.statistics.TableStatistics in project presto by prestodb.

the class MetastoreHiveStatisticsProvider method createZeroStatistics.

private TableStatistics createZeroStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes) {
    TableStatistics.Builder result = TableStatistics.builder();
    result.setRowCount(Estimate.of(0));
    result.setTotalSize(Estimate.of(0));
    columns.forEach((columnName, columnHandle) -> {
        Type columnType = columnTypes.get(columnName);
        verify(columnType != null, "columnType is missing for column: %s", columnName);
        ColumnStatistics.Builder columnStatistics = ColumnStatistics.builder();
        columnStatistics.setNullsFraction(Estimate.of(0));
        columnStatistics.setDistinctValuesCount(Estimate.of(0));
        if (hasDataSize(columnType)) {
            columnStatistics.setDataSize(Estimate.of(0));
        }
        result.setColumnStatistics(columnHandle, columnStatistics.build());
    });
    return result.build();
}
Also used : ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) DecimalType(com.facebook.presto.common.type.DecimalType) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) Type(com.facebook.presto.common.type.Type) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics)

Example 9 with TableStatistics

use of com.facebook.presto.spi.statistics.TableStatistics in project presto by prestodb.

the class MetastoreHiveStatisticsProvider method getTableStatistics.

private static TableStatistics getTableStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes, List<HivePartition> partitions, Map<String, PartitionStatistics> statistics) {
    if (statistics.isEmpty()) {
        return TableStatistics.empty();
    }
    checkArgument(!partitions.isEmpty(), "partitions is empty");
    OptionalDouble optionalAverageRowsPerPartition = calculateAverageRowsPerPartition(statistics.values());
    if (!optionalAverageRowsPerPartition.isPresent()) {
        return TableStatistics.empty();
    }
    double averageRowsPerPartition = optionalAverageRowsPerPartition.getAsDouble();
    verify(averageRowsPerPartition >= 0, "averageRowsPerPartition must be greater than or equal to zero");
    int queriedPartitionsCount = partitions.size();
    double rowCount = averageRowsPerPartition * queriedPartitionsCount;
    TableStatistics.Builder result = TableStatistics.builder();
    result.setRowCount(Estimate.of(rowCount));
    OptionalDouble optionalAverageSizePerPartition = calculateAverageSizePerPartition(statistics.values());
    if (optionalAverageSizePerPartition.isPresent()) {
        double averageSizePerPartition = optionalAverageSizePerPartition.getAsDouble();
        verify(averageSizePerPartition >= 0, "averageSizePerPartition must be greater than or equal to zero: %s", averageSizePerPartition);
        double totalSize = averageSizePerPartition * queriedPartitionsCount;
        result.setTotalSize(Estimate.of(totalSize));
    }
    for (Map.Entry<String, ColumnHandle> column : columns.entrySet()) {
        String columnName = column.getKey();
        HiveColumnHandle columnHandle = (HiveColumnHandle) column.getValue();
        Type columnType = columnTypes.get(columnName);
        ColumnStatistics columnStatistics;
        if (columnHandle.isPartitionKey()) {
            columnStatistics = createPartitionColumnStatistics(columnHandle, columnType, partitions, statistics, averageRowsPerPartition, rowCount);
        } else {
            columnStatistics = createDataColumnStatistics(columnName, columnType, rowCount, statistics.values());
        }
        result.setColumnStatistics(columnHandle, columnStatistics);
    }
    return result.build();
}
Also used : ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) DecimalType(com.facebook.presto.common.type.DecimalType) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) Type(com.facebook.presto.common.type.Type) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) OptionalDouble(java.util.OptionalDouble) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle)

Example 10 with TableStatistics

use of com.facebook.presto.spi.statistics.TableStatistics in project presto by prestodb.

the class TableScanStatsRule method doCalculate.

@Override
protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) {
    // TODO Construct predicate like AddExchanges's LayoutConstraintEvaluator
    Constraint<ColumnHandle> constraint = new Constraint<>(node.getCurrentConstraint());
    TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), ImmutableList.copyOf(node.getAssignments().values()), constraint);
    Map<VariableReferenceExpression, VariableStatsEstimate> outputVariableStats = new HashMap<>();
    for (Map.Entry<VariableReferenceExpression, ColumnHandle> entry : node.getAssignments().entrySet()) {
        Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue()));
        outputVariableStats.put(entry.getKey(), columnStatistics.map(statistics -> StatsUtil.toVariableStatsEstimate(tableStatistics, statistics)).orElse(VariableStatsEstimate.unknown()));
    }
    return Optional.of(PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).setTotalSize(tableStatistics.getTotalSize().getValue()).setConfident(true).addVariableStatistics(outputVariableStats).build());
}
Also used : ColumnStatistics(com.facebook.presto.spi.statistics.ColumnStatistics) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Constraint(com.facebook.presto.spi.Constraint) HashMap(java.util.HashMap) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) TableStatistics(com.facebook.presto.spi.statistics.TableStatistics) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

TableStatistics (com.facebook.presto.spi.statistics.TableStatistics)20 ColumnHandle (com.facebook.presto.spi.ColumnHandle)14 SchemaTableName (com.facebook.presto.spi.SchemaTableName)10 ColumnStatistics (com.facebook.presto.spi.statistics.ColumnStatistics)9 ConnectorTableHandle (com.facebook.presto.spi.ConnectorTableHandle)7 DoubleRange (com.facebook.presto.spi.statistics.DoubleRange)7 Map (java.util.Map)7 Test (org.testng.annotations.Test)7 Type (com.facebook.presto.common.type.Type)6 Chars.isCharType (com.facebook.presto.common.type.Chars.isCharType)5 Varchars.isVarcharType (com.facebook.presto.common.type.Varchars.isVarcharType)5 ConnectorSession (com.facebook.presto.spi.ConnectorSession)5 Constraint (com.facebook.presto.spi.Constraint)5 RowExpression (com.facebook.presto.spi.relation.RowExpression)5 ImmutableMap (com.google.common.collect.ImmutableMap)5 NullableValue (com.facebook.presto.common.predicate.NullableValue)4 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)4 Objects.requireNonNull (java.util.Objects.requireNonNull)4 Optional (java.util.Optional)4 Subfield (com.facebook.presto.common.Subfield)3