Search in sources :

Example 1 with ColumnStatistics

use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.

the class ColumnStatisticsData method toColumnStatistics.

public ColumnStatistics toColumnStatistics(long rowCount) {
    ColumnStatistics.Builder builder = ColumnStatistics.builder();
    builder.setDataSize(Estimate.of((double) nullsCount / (double) rowCount));
    builder.setDistinctValuesCount(Estimate.of(distinctValuesCount));
    builder.setDataSize(dataSize.map(Estimate::of).orElse(Estimate.unknown()));
    if (min.isPresent() && max.isPresent()) {
        builder.setRange(new DoubleRange((double) min.get(), (double) max.get()));
    }
    return builder.build();
}
Also used : ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) DoubleRange(io.prestosql.spi.statistics.DoubleRange) Estimate(io.prestosql.spi.statistics.Estimate)

Example 2 with ColumnStatistics

use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.

the class TestTpchMetadata method testColumnStats.

private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint constraint, ColumnStatistics expected) {
    TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName()));
    TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint, true);
    ColumnHandle columnHandle = tpchMetadata.getColumnHandles(session, tableHandle).get(column.getSimplifiedColumnName());
    ColumnStatistics actual = tableStatistics.getColumnStatistics().get(columnHandle);
    EstimateAssertion estimateAssertion = new EstimateAssertion(TOLERANCE);
    estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinctValuesCount");
    estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "dataSize");
    estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction");
    estimateAssertion.assertClose(actual.getRange(), expected.getRange(), "range");
}
Also used : ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) TableStatistics(io.prestosql.spi.statistics.TableStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName)

Example 3 with ColumnStatistics

use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.

the class TableScanStatsRule method doCalculate.

@Override
protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) {
    // TODO Construct predicate like AddExchanges's LayoutConstraintEvaluator
    TupleDomain<ColumnHandle> predicate = metadata.getTableProperties(session, node.getTable()).getPredicate();
    Constraint constraint = new Constraint(predicate);
    TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), constraint, true);
    verify(tableStatistics != null, "tableStatistics is null for %s", node);
    Map<Symbol, SymbolStatsEstimate> outputSymbolStats = new HashMap<>();
    Map<ColumnHandle, Symbol> remainingSymbols = new HashMap<>();
    Map<ColumnHandle, Symbol> assignments = ImmutableBiMap.copyOf(node.getAssignments()).inverse();
    boolean isPredicatesPushDown = false;
    if ((predicate.isAll() || predicate.getDomains().get().equals(node.getEnforcedConstraint().getDomains().get())) && !(node.getEnforcedConstraint().isAll() || node.getEnforcedConstraint().isNone())) {
        predicate = node.getEnforcedConstraint();
        isPredicatesPushDown = true;
        predicate.getDomains().get().entrySet().stream().forEach(e -> {
            remainingSymbols.put(e.getKey(), new Symbol(e.getKey().getColumnName()));
        });
    }
    for (Map.Entry<Symbol, ColumnHandle> entry : node.getAssignments().entrySet()) {
        Symbol symbol = entry.getKey();
        Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue()));
        SymbolStatsEstimate symbolStatistics = columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics, types.get(symbol))).orElse(SymbolStatsEstimate.unknown());
        outputSymbolStats.put(symbol, symbolStatistics);
        remainingSymbols.remove(entry.getValue());
    }
    PlanNodeStatsEstimate tableEstimates = PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).addSymbolStatistics(outputSymbolStats).build();
    if (isPredicatesPushDown) {
        if (remainingSymbols.size() > 0) {
            ImmutableBiMap.Builder<ColumnHandle, Symbol> assignmentBuilder = ImmutableBiMap.builder();
            assignments = assignmentBuilder.putAll(assignments).putAll(remainingSymbols).build();
            for (Map.Entry<ColumnHandle, Symbol> entry : remainingSymbols.entrySet()) {
                Symbol symbol = entry.getValue();
                Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getKey()));
                SymbolStatsEstimate symbolStatistics = columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics, types.get(symbol))).orElse(SymbolStatsEstimate.unknown());
                outputSymbolStats.put(symbol, symbolStatistics);
            }
            /* Refresh TableEstimates for remaining columns */
            tableEstimates = PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).addSymbolStatistics(outputSymbolStats).build();
        }
        Expression pushDownExpression = domainTranslator.toPredicate(predicate.transform(assignments::get));
        PlanNodeStatsEstimate estimate = filterStatsCalculator.filterStats(tableEstimates, pushDownExpression, session, types);
        if ((isDefaultFilterFactorEnabled(session) || sourceStats.isEnforceDefaultFilterFactor()) && estimate.isOutputRowCountUnknown()) {
            PlanNodeStatsEstimate finalTableEstimates = tableEstimates;
            estimate = tableEstimates.mapOutputRowCount(sourceRowCount -> finalTableEstimates.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT);
        }
        return Optional.of(estimate);
    }
    return Optional.of(tableEstimates);
}
Also used : ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) TableStatistics(io.prestosql.spi.statistics.TableStatistics) Lookup(io.prestosql.sql.planner.iterative.Lookup) TypeProvider(io.prestosql.sql.planner.TypeProvider) HashMap(java.util.HashMap) Pattern(io.prestosql.matching.Pattern) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) NaN(java.lang.Double.NaN) ExpressionDomainTranslator(io.prestosql.sql.planner.ExpressionDomainTranslator) Verify.verify(com.google.common.base.Verify.verify) FixedWidthType(io.prestosql.spi.type.FixedWidthType) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Session(io.prestosql.Session) Type(io.prestosql.spi.type.Type) ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) Constraint(io.prestosql.spi.connector.Constraint) Symbol(io.prestosql.spi.plan.Symbol) UNKNOWN_FILTER_COEFFICIENT(io.prestosql.cost.FilterStatsCalculator.UNKNOWN_FILTER_COEFFICIENT) TupleDomain(io.prestosql.spi.predicate.TupleDomain) TableScanNode(io.prestosql.spi.plan.TableScanNode) SystemSessionProperties.isDefaultFilterFactorEnabled(io.prestosql.SystemSessionProperties.isDefaultFilterFactorEnabled) Metadata(io.prestosql.metadata.Metadata) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) LiteralEncoder(io.prestosql.sql.planner.LiteralEncoder) Optional(java.util.Optional) Patterns.tableScan(io.prestosql.sql.planner.plan.Patterns.tableScan) Expression(io.prestosql.sql.tree.Expression) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) Constraint(io.prestosql.spi.connector.Constraint) HashMap(java.util.HashMap) Symbol(io.prestosql.spi.plan.Symbol) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) Expression(io.prestosql.sql.tree.Expression) TableStatistics(io.prestosql.spi.statistics.TableStatistics) HashMap(java.util.HashMap) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) Map(java.util.Map)

Example 4 with ColumnStatistics

use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.

the class ConnectorFilterStatsCalculatorService method toPlanNodeStats.

private static PlanNodeStatsEstimate toPlanNodeStats(TableStatistics tableStatistics, Map<ColumnHandle, String> columnNames, Map<String, Type> columnTypes) {
    PlanNodeStatsEstimate.Builder builder = PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue());
    for (Map.Entry<ColumnHandle, ColumnStatistics> entry : tableStatistics.getColumnStatistics().entrySet()) {
        String columnName = columnNames.getOrDefault(entry.getKey(), null);
        if (columnName == null) {
            continue;
        }
        Symbol symbol = new Symbol(columnName);
        builder.addSymbolStatistics(symbol, toSymbolStatistics(tableStatistics, entry.getValue(), columnTypes.get(columnName)));
    }
    return builder.build();
}
Also used : ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) Symbol(io.prestosql.spi.plan.Symbol) ImmutableBiMap(com.google.common.collect.ImmutableBiMap) Map(java.util.Map)

Example 5 with ColumnStatistics

use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.

the class TestDataCenterClient method testGetTableStatistics.

@Test
public void testGetTableStatistics() {
    Map<String, ColumnHandle> columnHandles = new LinkedHashMap<>();
    DataCenterClient client = new DataCenterClient(this.config, httpClient, typeManager);
    columnHandles.put("orderkey", new DataCenterColumnHandle("orderkey", DOUBLE, 0));
    columnHandles.put("custkey", new DataCenterColumnHandle("custkey", DOUBLE, 1));
    columnHandles.put("orderstatus", new DataCenterColumnHandle("orderstatus", createVarcharType(1), 2));
    columnHandles.put("totalprice", new DataCenterColumnHandle("totalprice", DOUBLE, 3));
    columnHandles.put("orderdate", new DataCenterColumnHandle("orderdate", DATE, 4));
    columnHandles.put("orderpriority", new DataCenterColumnHandle("orderpriority", createVarcharType(15), 5));
    columnHandles.put("clerk", new DataCenterColumnHandle("clerk", createUnboundedVarcharType(), 6));
    columnHandles.put("shippriority", new DataCenterColumnHandle("shippriority", DOUBLE, 7));
    columnHandles.put("comment", new DataCenterColumnHandle("comment", createVarcharType(79), 8));
    TableStatistics tableStatistics = client.getTableStatistics("tpch.tiny.orders", columnHandles);
    assertEquals(tableStatistics.getRowCount().getValue(), 15000.0);
    Map<ColumnHandle, ColumnStatistics> columnStatistics = tableStatistics.getColumnStatistics();
    for (Map.Entry<ColumnHandle, ColumnStatistics> columnstatistics : columnStatistics.entrySet()) {
        ColumnHandle columnhandleKey = columnstatistics.getKey();
        ColumnStatistics columnhandleValue = columnstatistics.getValue();
        if (columnhandleKey.getColumnName().equals("orderkey")) {
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 15000.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
            assertEquals(columnhandleValue.getRange().get().getMin(), (double) 1);
            assertEquals(columnhandleValue.getRange().get().getMax(), (double) 60000);
        }
        if (columnhandleKey.getColumnName().equals("custkey")) {
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 1000.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
            assertEquals(columnhandleValue.getRange().get().getMin(), (double) 1);
            assertEquals(columnhandleValue.getRange().get().getMax(), (double) 1499);
        }
        if (columnhandleKey.getColumnName().equals("orderstatus")) {
            assertEquals(columnhandleValue.getDataSize().getValue(), 3.0);
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 3.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
        }
        if (columnhandleKey.getColumnName().equals("totalprice")) {
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 14996.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
            assertEquals(columnhandleValue.getRange().get().getMin(), 874.89);
            assertEquals(columnhandleValue.getRange().get().getMax(), 466001.28);
        }
        if (columnhandleKey.getColumnName().equals("orderdate")) {
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 2401.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
            assertEquals(columnhandleValue.getRange().get().getMin(), (double) 8035);
            assertEquals(columnhandleValue.getRange().get().getMax(), (double) 10440);
        }
        if (columnhandleKey.getColumnName().equals("orderpriority")) {
            assertEquals(columnhandleValue.getDataSize().getValue(), 42.0);
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 5.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
        }
        if (columnhandleKey.getColumnName().equals("clerk")) {
            assertEquals(columnhandleValue.getDataSize().getValue(), 15000.0);
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 1000.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
        }
        if (columnhandleKey.getColumnName().equals("shippriority")) {
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 1.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
            assertEquals(columnhandleValue.getRange().get().getMin(), (double) 0);
            assertEquals(columnhandleValue.getRange().get().getMax(), (double) 0);
        }
        if (columnhandleKey.getColumnName().equals("comment")) {
            assertEquals(columnhandleValue.getDataSize().getValue(), 727249.0);
            assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 14995.0);
            assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
        }
    }
}
Also used : ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) TableStatistics(io.prestosql.spi.statistics.TableStatistics) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) DataCenterClient(io.hetu.core.plugin.datacenter.client.DataCenterClient) Test(org.testng.annotations.Test)

Aggregations

ColumnStatistics (io.prestosql.spi.statistics.ColumnStatistics)13 ColumnHandle (io.prestosql.spi.connector.ColumnHandle)9 TableStatistics (io.prestosql.spi.statistics.TableStatistics)9 Type (io.prestosql.spi.type.Type)6 HiveColumnStatistics (io.prestosql.plugin.hive.metastore.HiveColumnStatistics)5 Chars.isCharType (io.prestosql.spi.type.Chars.isCharType)5 Varchars.isVarcharType (io.prestosql.spi.type.Varchars.isVarcharType)5 Map (java.util.Map)5 Verify.verify (com.google.common.base.Verify.verify)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 DoubleRange (io.prestosql.spi.statistics.DoubleRange)4 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)3 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3 ImmutableSet.toImmutableSet (com.google.common.collect.ImmutableSet.toImmutableSet)3 DecimalType (io.prestosql.spi.type.DecimalType)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 VerifyException (com.google.common.base.VerifyException)2 ImmutableBiMap (com.google.common.collect.ImmutableBiMap)2 Maps.immutableEntry (com.google.common.collect.Maps.immutableEntry)2 HashFunction (com.google.common.hash.HashFunction)2