Search in sources :

Example 6 with Stats

use of io.crate.statistics.Stats in project crate by crate.

the class SelectivityFunctionsTest method test_eq_not_in_mcv_is_based_on_approx_distinct.

@Test
public void test_eq_not_in_mcv_is_based_on_approx_distinct() {
    SqlExpressions expressions = new SqlExpressions(T3.sources(clusterService));
    Symbol query = expressions.asSymbol("x = 10");
    var statsByColumn = new HashMap<ColumnIdent, ColumnStats>();
    var numbers = IntStream.range(1, 20_001).boxed().collect(Collectors.toList());
    var columnStats = ColumnStats.fromSortedValues(numbers, DataTypes.INTEGER, 0, 20_000L);
    statsByColumn.put(new ColumnIdent("x"), columnStats);
    Stats stats = new Stats(20_000, 16, statsByColumn);
    assertThat(SelectivityFunctions.estimateNumRows(stats, query, null), Matchers.is(1L));
}
Also used : ColumnIdent(io.crate.metadata.ColumnIdent) HashMap(java.util.HashMap) Symbol(io.crate.expression.symbol.Symbol) ColumnStats(io.crate.statistics.ColumnStats) Stats(io.crate.statistics.Stats) SqlExpressions(io.crate.testing.SqlExpressions) Test(org.junit.Test) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest)

Example 7 with Stats

use of io.crate.statistics.Stats in project crate by crate.

the class GroupHashAggregate method approximateDistinctValues.

static long approximateDistinctValues(long numSourceRows, TableStats tableStats, List<Symbol> groupKeys) {
    long distinctValues = 1;
    int numKeysWithStats = 0;
    for (Symbol groupKey : groupKeys) {
        Stats stats = null;
        ColumnStats columnStats = null;
        if (groupKey instanceof Reference) {
            Reference ref = (Reference) groupKey;
            stats = tableStats.getStats(ref.ident().tableIdent());
            columnStats = stats.statsByColumn().get(ref.column());
            numKeysWithStats++;
        } else if (groupKey instanceof ScopedSymbol) {
            ScopedSymbol scopedSymbol = (ScopedSymbol) groupKey;
            stats = tableStats.getStats(scopedSymbol.relation());
            columnStats = stats.statsByColumn().get(scopedSymbol.column());
            numKeysWithStats++;
        }
        if (columnStats == null) {
            // Assume worst case: Every value is unique
            distinctValues *= numSourceRows;
        } else {
            // `approxDistinct` is the number of distinct values in relation to `stats.numDocs()ยด, not in
            // relation to `numSourceRows`, which is based on the estimates of a source operator.
            // That is why we calculate the cardinality ratio and calculate the new distinct
            // values based on `numSourceRows` to account for changes in the number of rows in source operators
            // 
            // e.g. SELECT x, count(*) FROM tbl GROUP BY x
            // and  SELECT x, count(*) FROM tbl WHERE pk = 1 GROUP BY x
            // 
            // have a different number of groups
            double cardinalityRatio = columnStats.approxDistinct() / stats.numDocs();
            distinctValues *= (long) (numSourceRows * cardinalityRatio);
        }
    }
    if (numKeysWithStats == groupKeys.size()) {
        return Math.min(distinctValues, numSourceRows);
    } else {
        return numSourceRows;
    }
}
Also used : ScopedSymbol(io.crate.expression.symbol.ScopedSymbol) Symbol(io.crate.expression.symbol.Symbol) ColumnStats(io.crate.statistics.ColumnStats) Reference(io.crate.metadata.Reference) ColumnStats(io.crate.statistics.ColumnStats) TableStats(io.crate.statistics.TableStats) Stats(io.crate.statistics.Stats) ScopedSymbol(io.crate.expression.symbol.ScopedSymbol)

Example 8 with Stats

use of io.crate.statistics.Stats in project crate by crate.

the class MergeFilterAndCollect method apply.

@Override
public LogicalPlan apply(Filter filter, Captures captures, TableStats tableStats, TransactionContext txnCtx, NodeContext nodeCtx) {
    Collect collect = captures.get(collectCapture);
    Stats stats = tableStats.getStats(collect.relation().tableInfo().ident());
    WhereClause newWhere = collect.where().add(filter.query());
    return new Collect(collect.relation(), collect.outputs(), newWhere, SelectivityFunctions.estimateNumRows(stats, newWhere.queryOrFallback(), null), stats.averageSizePerRowInBytes());
}
Also used : Collect(io.crate.planner.operators.Collect) Stats(io.crate.statistics.Stats) TableStats(io.crate.statistics.TableStats) WhereClause(io.crate.analyze.WhereClause)

Example 9 with Stats

use of io.crate.statistics.Stats in project crate by crate.

the class GroupHashAggregateTest method setUpStatsAndExpressions.

@Before
public void setUpStatsAndExpressions() throws Exception {
    var samples = IntStream.concat(IntStream.generate(() -> 10).limit(50), IntStream.generate(() -> 20).limit(50)).boxed().collect(Collectors.toList());
    long numDocs = 2_000L;
    ColumnStats<Integer> columnStats = ColumnStats.fromSortedValues(samples, DataTypes.INTEGER, 0, numDocs);
    Stats stats = new Stats(numDocs, DataTypes.INTEGER.fixedSize(), Map.of(new ColumnIdent("x"), columnStats, new ColumnIdent("i"), columnStats));
    tableStats = new TableStats();
    tableStats.updateTableStats(Map.of(new RelationName("doc", "t1"), stats));
    expressions = new SqlExpressions(T3.sources(clusterService));
}
Also used : ColumnIdent(io.crate.metadata.ColumnIdent) ColumnStats(io.crate.statistics.ColumnStats) Stats(io.crate.statistics.Stats) TableStats(io.crate.statistics.TableStats) RelationName(io.crate.metadata.RelationName) TableStats(io.crate.statistics.TableStats) SqlExpressions(io.crate.testing.SqlExpressions) Before(org.junit.Before)

Example 10 with Stats

use of io.crate.statistics.Stats in project crate by crate.

the class SelectivityFunctionsCalculationTest method test_group_operator_adapt_expected_row_count_based_on_column_stats.

@Test
public void test_group_operator_adapt_expected_row_count_based_on_column_stats() throws Throwable {
    var samples = IntStream.concat(IntStream.generate(() -> 10).limit(50), IntStream.generate(() -> 20).limit(50)).boxed().collect(Collectors.toList());
    long numDocs = 2_000L;
    Stats stats = new Stats(numDocs, DataTypes.INTEGER.fixedSize(), Map.of(new ColumnIdent("x"), ColumnStats.fromSortedValues(samples, DataTypes.INTEGER, 0, numDocs)));
    TableStats tableStats = new TableStats();
    tableStats.updateTableStats(Map.of(new RelationName("doc", "tbl"), stats));
    SQLExecutor e = SQLExecutor.builder(clusterService).setTableStats(tableStats).addTable("create table doc.tbl (x int)").build();
    LogicalPlan plan = e.logicalPlan("select x, count(*) from doc.tbl group by x");
    assertThat(plan.numExpectedRows(), Matchers.is(2L));
}
Also used : ColumnIdent(io.crate.metadata.ColumnIdent) SQLExecutor(io.crate.testing.SQLExecutor) ColumnStats(io.crate.statistics.ColumnStats) Stats(io.crate.statistics.Stats) TableStats(io.crate.statistics.TableStats) RelationName(io.crate.metadata.RelationName) TableStats(io.crate.statistics.TableStats) Test(org.junit.Test) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest)

Aggregations

Stats (io.crate.statistics.Stats)12 ColumnStats (io.crate.statistics.ColumnStats)10 ColumnIdent (io.crate.metadata.ColumnIdent)9 Test (org.junit.Test)9 CrateDummyClusterServiceUnitTest (io.crate.test.integration.CrateDummyClusterServiceUnitTest)8 Symbol (io.crate.expression.symbol.Symbol)7 SqlExpressions (io.crate.testing.SqlExpressions)7 TableStats (io.crate.statistics.TableStats)6 RelationName (io.crate.metadata.RelationName)4 HashMap (java.util.HashMap)4 SQLExecutor (io.crate.testing.SQLExecutor)2 WhereClause (io.crate.analyze.WhereClause)1 Row1 (io.crate.data.Row1)1 ScopedSymbol (io.crate.expression.symbol.ScopedSymbol)1 Reference (io.crate.metadata.Reference)1 Collect (io.crate.planner.operators.Collect)1 Before (org.junit.Before)1