Search in sources :

Example 11 with TableStats

use of io.crate.statistics.TableStats in project crate by crate.

the class BatchPortalTest method testEachStatementReceivesCorrectParams.

@Test
public void testEachStatementReceivesCorrectParams() throws Throwable {
    SQLExecutor sqlExecutor = SQLExecutor.builder(clusterService).addTable("create table t1 (x int)").build();
    Plan insertPlan = new Plan() {

        @Override
        public StatementType type() {
            return StatementType.INSERT;
        }

        @Override
        public void executeOrFail(DependencyCarrier executor, PlannerContext plannerContext, RowConsumer consumer, Row params, SubQueryResults subQueryResults) {
            consumer.accept(InMemoryBatchIterator.of(params, null), null);
        }
    };
    Planner planner = new Planner(Settings.EMPTY, clusterService, sqlExecutor.nodeCtx, new TableStats(), null, null, sqlExecutor.schemas(), new StubUserManager(), mock(SessionSettingRegistry.class)) {

        @Override
        public Plan plan(AnalyzedStatement analyzedStatement, PlannerContext plannerContext) {
            return insertPlan;
        }
    };
    DependencyCarrier executor = mock(DependencyCarrier.class, Answers.RETURNS_MOCKS);
    Session session = new Session(sqlExecutor.nodeCtx, sqlExecutor.analyzer, planner, new JobsLogs(() -> false), false, executor, AccessControl.DISABLED, SessionContext.systemSessionContext());
    session.parse("S_1", "insert into t1(x) values(1)", Collections.emptyList());
    session.bind("Portal", "S_1", Collections.emptyList(), null);
    final ArrayList<Object[]> s1Rows = new ArrayList<>();
    session.execute("Portal", 0, new BaseResultReceiver() {

        @Override
        public void setNextRow(Row row) {
            s1Rows.add(row.materialize());
        }
    });
    session.parse("S_2", "insert into t1(x) values(?)", Collections.emptyList());
    session.bind("Portal", "S_2", Collections.singletonList(2), null);
    final ArrayList<Object[]> s2Rows = new ArrayList<>();
    session.execute("Portal", 0, new BaseResultReceiver() {

        @Override
        public void setNextRow(Row row) {
            s2Rows.add(row.materialize());
        }
    });
    session.sync().get(5, TimeUnit.SECONDS);
    assertThat(s1Rows, contains(emptyArray()));
    assertThat(s2Rows, contains(arrayContaining(is(2))));
}
Also used : DependencyCarrier(io.crate.planner.DependencyCarrier) SubQueryResults(io.crate.planner.operators.SubQueryResults) ArrayList(java.util.ArrayList) Plan(io.crate.planner.Plan) TableStats(io.crate.statistics.TableStats) StubUserManager(io.crate.user.StubUserManager) SessionSettingRegistry(io.crate.metadata.settings.session.SessionSettingRegistry) PlannerContext(io.crate.planner.PlannerContext) SQLExecutor(io.crate.testing.SQLExecutor) BaseResultReceiver(io.crate.action.sql.BaseResultReceiver) Planner(io.crate.planner.Planner) AnalyzedStatement(io.crate.analyze.AnalyzedStatement) RowConsumer(io.crate.data.RowConsumer) Row(io.crate.data.Row) JobsLogs(io.crate.execution.engine.collect.stats.JobsLogs) Session(io.crate.action.sql.Session) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test)

Example 12 with TableStats

use of io.crate.statistics.TableStats in project crate by crate.

the class GroupHashAggregate method approximateDistinctValues.

static long approximateDistinctValues(long numSourceRows, TableStats tableStats, List<Symbol> groupKeys) {
    long distinctValues = 1;
    int numKeysWithStats = 0;
    for (Symbol groupKey : groupKeys) {
        Stats stats = null;
        ColumnStats columnStats = null;
        if (groupKey instanceof Reference) {
            Reference ref = (Reference) groupKey;
            stats = tableStats.getStats(ref.ident().tableIdent());
            columnStats = stats.statsByColumn().get(ref.column());
            numKeysWithStats++;
        } else if (groupKey instanceof ScopedSymbol) {
            ScopedSymbol scopedSymbol = (ScopedSymbol) groupKey;
            stats = tableStats.getStats(scopedSymbol.relation());
            columnStats = stats.statsByColumn().get(scopedSymbol.column());
            numKeysWithStats++;
        }
        if (columnStats == null) {
            // Assume worst case: Every value is unique
            distinctValues *= numSourceRows;
        } else {
            // `approxDistinct` is the number of distinct values in relation to `stats.numDocs()ยด, not in
            // relation to `numSourceRows`, which is based on the estimates of a source operator.
            // That is why we calculate the cardinality ratio and calculate the new distinct
            // values based on `numSourceRows` to account for changes in the number of rows in source operators
            // 
            // e.g. SELECT x, count(*) FROM tbl GROUP BY x
            // and  SELECT x, count(*) FROM tbl WHERE pk = 1 GROUP BY x
            // 
            // have a different number of groups
            double cardinalityRatio = columnStats.approxDistinct() / stats.numDocs();
            distinctValues *= (long) (numSourceRows * cardinalityRatio);
        }
    }
    if (numKeysWithStats == groupKeys.size()) {
        return Math.min(distinctValues, numSourceRows);
    } else {
        return numSourceRows;
    }
}
Also used : ScopedSymbol(io.crate.expression.symbol.ScopedSymbol) Symbol(io.crate.expression.symbol.Symbol) ColumnStats(io.crate.statistics.ColumnStats) Reference(io.crate.metadata.Reference) ColumnStats(io.crate.statistics.ColumnStats) TableStats(io.crate.statistics.TableStats) Stats(io.crate.statistics.Stats) ScopedSymbol(io.crate.expression.symbol.ScopedSymbol)

Example 13 with TableStats

use of io.crate.statistics.TableStats in project crate by crate.

the class MergeFilterAndCollect method apply.

@Override
public LogicalPlan apply(Filter filter, Captures captures, TableStats tableStats, TransactionContext txnCtx, NodeContext nodeCtx) {
    Collect collect = captures.get(collectCapture);
    Stats stats = tableStats.getStats(collect.relation().tableInfo().ident());
    WhereClause newWhere = collect.where().add(filter.query());
    return new Collect(collect.relation(), collect.outputs(), newWhere, SelectivityFunctions.estimateNumRows(stats, newWhere.queryOrFallback(), null), stats.averageSizePerRowInBytes());
}
Also used : Collect(io.crate.planner.operators.Collect) Stats(io.crate.statistics.Stats) TableStats(io.crate.statistics.TableStats) WhereClause(io.crate.analyze.WhereClause)

Example 14 with TableStats

use of io.crate.statistics.TableStats in project crate by crate.

the class TableStatsServiceIntegrationTest method testStatsUpdated.

@Test
public void testStatsUpdated() throws Exception {
    execute("create table t1(a int) with (number_of_replicas = 1)");
    ensureGreen();
    execute("insert into t1(a) values(1), (2), (3), (4), (5)");
    execute("refresh table t1");
    assertBusy(() -> {
        TableStats tableStats = internalCluster().getDataNodeInstance(TableStats.class);
        assertThat(tableStats.numDocs(new RelationName(sqlExecutor.getCurrentSchema(), "t1")), is(5L));
    // tableStats.tableStats.estimatedSizePerRow() is not tested because it's based on sys.shards size
    // column which is is cached for 10 secs in ShardSizeExpression which will increase the time needed
    // to run this test.
    }, 5, TimeUnit.SECONDS);
}
Also used : RelationName(io.crate.metadata.RelationName) TableStats(io.crate.statistics.TableStats) Test(org.junit.Test)

Example 15 with TableStats

use of io.crate.statistics.TableStats in project crate by crate.

the class GroupHashAggregateTest method setUpStatsAndExpressions.

@Before
public void setUpStatsAndExpressions() throws Exception {
    var samples = IntStream.concat(IntStream.generate(() -> 10).limit(50), IntStream.generate(() -> 20).limit(50)).boxed().collect(Collectors.toList());
    long numDocs = 2_000L;
    ColumnStats<Integer> columnStats = ColumnStats.fromSortedValues(samples, DataTypes.INTEGER, 0, numDocs);
    Stats stats = new Stats(numDocs, DataTypes.INTEGER.fixedSize(), Map.of(new ColumnIdent("x"), columnStats, new ColumnIdent("i"), columnStats));
    tableStats = new TableStats();
    tableStats.updateTableStats(Map.of(new RelationName("doc", "t1"), stats));
    expressions = new SqlExpressions(T3.sources(clusterService));
}
Also used : ColumnIdent(io.crate.metadata.ColumnIdent) ColumnStats(io.crate.statistics.ColumnStats) Stats(io.crate.statistics.Stats) TableStats(io.crate.statistics.TableStats) RelationName(io.crate.metadata.RelationName) TableStats(io.crate.statistics.TableStats) SqlExpressions(io.crate.testing.SqlExpressions) Before(org.junit.Before)

Aggregations

TableStats (io.crate.statistics.TableStats)23 Test (org.junit.Test)17 CrateDummyClusterServiceUnitTest (io.crate.test.integration.CrateDummyClusterServiceUnitTest)15 Symbol (io.crate.expression.symbol.Symbol)11 RelationName (io.crate.metadata.RelationName)8 Filter (io.crate.planner.operators.Filter)7 LogicalPlan (io.crate.planner.operators.LogicalPlan)7 SQLExecutor (io.crate.testing.SQLExecutor)7 WindowFunction (io.crate.expression.symbol.WindowFunction)6 WindowAgg (io.crate.planner.operators.WindowAgg)6 Stats (io.crate.statistics.Stats)6 DocTableRelation (io.crate.analyze.relations.DocTableRelation)4 PlannerContext (io.crate.planner.PlannerContext)4 ProjectionBuilder (io.crate.execution.dsl.projection.builder.ProjectionBuilder)3 ScopedSymbol (io.crate.expression.symbol.ScopedSymbol)3 ColumnIdent (io.crate.metadata.ColumnIdent)3 Reference (io.crate.metadata.Reference)3 DocTableInfo (io.crate.metadata.doc.DocTableInfo)3 ColumnStats (io.crate.statistics.ColumnStats)3 ArrayList (java.util.ArrayList)3