Search in sources :

Example 1 with TableStatistics

use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.

the class TestTpcdsMetadataStatistics method testTableStatsDetails.

@Test
public void testTableStatsDetails() {
    SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.CALL_CENTER.getName());
    ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
    TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue(), true);
    estimateAssertion.assertClose(tableStatistics.getRowCount(), Estimate.of(6), "Row count does not match");
    // all columns have stats
    Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
    for (ColumnHandle column : columnHandles.values()) {
        assertTrue(tableStatistics.getColumnStatistics().containsKey(column));
        assertNotNull(tableStatistics.getColumnStatistics().get(column));
    }
    // identifier
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(6)).setRange(new DoubleRange(1, 6)).build());
    // varchar
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_ID.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(3)).setDataSize(Estimate.of(48.0)).build());
    // char
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_ZIP.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setDataSize(Estimate.of(5.0)).build());
    // decimal
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_GMT_OFFSET.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setRange(new DoubleRange(-5, -5)).build());
    // date
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_REC_START_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(4)).setRange(new DoubleRange(10227L, 11688L)).build());
    // only null values
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CLOSED_DATE_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(1)).setDistinctValuesCount(Estimate.of(0)).build());
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DoubleRange(io.prestosql.spi.statistics.DoubleRange) TableStatistics(io.prestosql.spi.statistics.TableStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 2 with TableStatistics

use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.

the class TestTpcdsMetadataStatistics method testNullFraction.

@Test
public void testNullFraction() {
    SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName());
    ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
    TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue(), true);
    Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
    // some null values
    assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(WebSiteColumn.WEB_REC_END_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(3)).setRange(new DoubleRange(10819L, 11549L)).build());
}
Also used : ColumnHandle(io.prestosql.spi.connector.ColumnHandle) DoubleRange(io.prestosql.spi.statistics.DoubleRange) TableStatistics(io.prestosql.spi.statistics.TableStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) Test(org.testng.annotations.Test)

Example 3 with TableStatistics

use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.

the class TestTpchMetadata method testNoTableStats.

private void testNoTableStats(String schema, TpchTable<?> table) {
    TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName()));
    TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, alwaysTrue(), true);
    assertTrue(tableStatistics.getRowCount().isUnknown());
}
Also used : TableStatistics(io.prestosql.spi.statistics.TableStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName)

Example 4 with TableStatistics

use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.

the class TestTpchMetadata method testColumnStats.

private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint constraint, ColumnStatistics expected) {
    TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName()));
    TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint, true);
    ColumnHandle columnHandle = tpchMetadata.getColumnHandles(session, tableHandle).get(column.getSimplifiedColumnName());
    ColumnStatistics actual = tableStatistics.getColumnStatistics().get(columnHandle);
    EstimateAssertion estimateAssertion = new EstimateAssertion(TOLERANCE);
    estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinctValuesCount");
    estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "dataSize");
    estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction");
    estimateAssertion.assertClose(actual.getRange(), expected.getRange(), "range");
}
Also used : ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) TableStatistics(io.prestosql.spi.statistics.TableStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName)

Example 5 with TableStatistics

use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.

the class TableScanStatsRule method doCalculate.

@Override
protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) {
    // TODO Construct predicate like AddExchanges's LayoutConstraintEvaluator
    TupleDomain<ColumnHandle> predicate = metadata.getTableProperties(session, node.getTable()).getPredicate();
    Constraint constraint = new Constraint(predicate);
    TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), constraint, true);
    verify(tableStatistics != null, "tableStatistics is null for %s", node);
    Map<Symbol, SymbolStatsEstimate> outputSymbolStats = new HashMap<>();
    Map<ColumnHandle, Symbol> remainingSymbols = new HashMap<>();
    Map<ColumnHandle, Symbol> assignments = ImmutableBiMap.copyOf(node.getAssignments()).inverse();
    boolean isPredicatesPushDown = false;
    if ((predicate.isAll() || predicate.getDomains().get().equals(node.getEnforcedConstraint().getDomains().get())) && !(node.getEnforcedConstraint().isAll() || node.getEnforcedConstraint().isNone())) {
        predicate = node.getEnforcedConstraint();
        isPredicatesPushDown = true;
        predicate.getDomains().get().entrySet().stream().forEach(e -> {
            remainingSymbols.put(e.getKey(), new Symbol(e.getKey().getColumnName()));
        });
    }
    for (Map.Entry<Symbol, ColumnHandle> entry : node.getAssignments().entrySet()) {
        Symbol symbol = entry.getKey();
        Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue()));
        SymbolStatsEstimate symbolStatistics = columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics, types.get(symbol))).orElse(SymbolStatsEstimate.unknown());
        outputSymbolStats.put(symbol, symbolStatistics);
        remainingSymbols.remove(entry.getValue());
    }
    PlanNodeStatsEstimate tableEstimates = PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).addSymbolStatistics(outputSymbolStats).build();
    if (isPredicatesPushDown) {
        if (remainingSymbols.size() > 0) {
            ImmutableBiMap.Builder<ColumnHandle, Symbol> assignmentBuilder = ImmutableBiMap.builder();
            assignments = assignmentBuilder.putAll(assignments).putAll(remainingSymbols).build();
            for (Map.Entry<ColumnHandle, Symbol> entry : remainingSymbols.entrySet()) {
                Symbol symbol = entry.getValue();
                Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getKey()));
                SymbolStatsEstimate symbolStatistics = columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics, types.get(symbol))).orElse(SymbolStatsEstimate.unknown());
                outputSymbolStats.put(symbol, symbolStatistics);
            }
            /* Refresh TableEstimates for remaining columns */
            tableEstimates = PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).addSymbolStatistics(outputSymbolStats).build();
        }
        Expression pushDownExpression = domainTranslator.toPredicate(predicate.transform(assignments::get));
        PlanNodeStatsEstimate estimate = filterStatsCalculator.filterStats(tableEstimates, pushDownExpression, session, types);
        if ((isDefaultFilterFactorEnabled(session) || sourceStats.isEnforceDefaultFilterFactor()) && estimate.isOutputRowCountUnknown()) {
            PlanNodeStatsEstimate finalTableEstimates = tableEstimates;
            estimate = tableEstimates.mapOutputRowCount(sourceRowCount -> finalTableEstimates.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT);
        }
        return Optional.of(estimate);
    }
    return Optional.of(tableEstimates);
}
Also used : ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) TableStatistics(io.prestosql.spi.statistics.TableStatistics) Lookup(io.prestosql.sql.planner.iterative.Lookup) TypeProvider(io.prestosql.sql.planner.TypeProvider) HashMap(java.util.HashMap) Pattern(io.prestosql.matching.Pattern) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) NaN(java.lang.Double.NaN) ExpressionDomainTranslator(io.prestosql.sql.planner.ExpressionDomainTranslator) Verify.verify(com.google.common.base.Verify.verify) FixedWidthType(io.prestosql.spi.type.FixedWidthType) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Session(io.prestosql.Session) Type(io.prestosql.spi.type.Type) ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) Constraint(io.prestosql.spi.connector.Constraint) Symbol(io.prestosql.spi.plan.Symbol) UNKNOWN_FILTER_COEFFICIENT(io.prestosql.cost.FilterStatsCalculator.UNKNOWN_FILTER_COEFFICIENT) TupleDomain(io.prestosql.spi.predicate.TupleDomain) TableScanNode(io.prestosql.spi.plan.TableScanNode) SystemSessionProperties.isDefaultFilterFactorEnabled(io.prestosql.SystemSessionProperties.isDefaultFilterFactorEnabled) Metadata(io.prestosql.metadata.Metadata) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) LiteralEncoder(io.prestosql.sql.planner.LiteralEncoder) Optional(java.util.Optional) Patterns.tableScan(io.prestosql.sql.planner.plan.Patterns.tableScan) Expression(io.prestosql.sql.tree.Expression) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) Constraint(io.prestosql.spi.connector.Constraint) HashMap(java.util.HashMap) Symbol(io.prestosql.spi.plan.Symbol) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) Expression(io.prestosql.sql.tree.Expression) TableStatistics(io.prestosql.spi.statistics.TableStatistics) HashMap(java.util.HashMap) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) Map(java.util.Map)

Aggregations

TableStatistics (io.prestosql.spi.statistics.TableStatistics)24 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)12 SchemaTableName (io.prestosql.spi.connector.SchemaTableName)9 ColumnStatistics (io.prestosql.spi.statistics.ColumnStatistics)8 Type (io.prestosql.spi.type.Type)8 Map (java.util.Map)8 ConnectorTableHandle (io.prestosql.spi.connector.ConnectorTableHandle)7 Test (org.testng.annotations.Test)6 HiveColumnStatistics (io.prestosql.plugin.hive.metastore.HiveColumnStatistics)5 PrestoException (io.prestosql.spi.PrestoException)5 Constraint (io.prestosql.spi.connector.Constraint)5 DoubleRange (io.prestosql.spi.statistics.DoubleRange)5 HashMap (java.util.HashMap)5 List (java.util.List)5 Optional (java.util.Optional)5 Verify.verify (com.google.common.base.Verify.verify)4 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 TestingConnectorSession (io.prestosql.testing.TestingConnectorSession)4 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)3