use of io.crate.statistics.TableStats in project crate by crate.
the class BatchPortalTest method testEachStatementReceivesCorrectParams.
@Test
public void testEachStatementReceivesCorrectParams() throws Throwable {
SQLExecutor sqlExecutor = SQLExecutor.builder(clusterService).addTable("create table t1 (x int)").build();
Plan insertPlan = new Plan() {
@Override
public StatementType type() {
return StatementType.INSERT;
}
@Override
public void executeOrFail(DependencyCarrier executor, PlannerContext plannerContext, RowConsumer consumer, Row params, SubQueryResults subQueryResults) {
consumer.accept(InMemoryBatchIterator.of(params, null), null);
}
};
Planner planner = new Planner(Settings.EMPTY, clusterService, sqlExecutor.nodeCtx, new TableStats(), null, null, sqlExecutor.schemas(), new StubUserManager(), mock(SessionSettingRegistry.class)) {
@Override
public Plan plan(AnalyzedStatement analyzedStatement, PlannerContext plannerContext) {
return insertPlan;
}
};
DependencyCarrier executor = mock(DependencyCarrier.class, Answers.RETURNS_MOCKS);
Session session = new Session(sqlExecutor.nodeCtx, sqlExecutor.analyzer, planner, new JobsLogs(() -> false), false, executor, AccessControl.DISABLED, SessionContext.systemSessionContext());
session.parse("S_1", "insert into t1(x) values(1)", Collections.emptyList());
session.bind("Portal", "S_1", Collections.emptyList(), null);
final ArrayList<Object[]> s1Rows = new ArrayList<>();
session.execute("Portal", 0, new BaseResultReceiver() {
@Override
public void setNextRow(Row row) {
s1Rows.add(row.materialize());
}
});
session.parse("S_2", "insert into t1(x) values(?)", Collections.emptyList());
session.bind("Portal", "S_2", Collections.singletonList(2), null);
final ArrayList<Object[]> s2Rows = new ArrayList<>();
session.execute("Portal", 0, new BaseResultReceiver() {
@Override
public void setNextRow(Row row) {
s2Rows.add(row.materialize());
}
});
session.sync().get(5, TimeUnit.SECONDS);
assertThat(s1Rows, contains(emptyArray()));
assertThat(s2Rows, contains(arrayContaining(is(2))));
}
use of io.crate.statistics.TableStats in project crate by crate.
the class GroupHashAggregate method approximateDistinctValues.
static long approximateDistinctValues(long numSourceRows, TableStats tableStats, List<Symbol> groupKeys) {
long distinctValues = 1;
int numKeysWithStats = 0;
for (Symbol groupKey : groupKeys) {
Stats stats = null;
ColumnStats columnStats = null;
if (groupKey instanceof Reference) {
Reference ref = (Reference) groupKey;
stats = tableStats.getStats(ref.ident().tableIdent());
columnStats = stats.statsByColumn().get(ref.column());
numKeysWithStats++;
} else if (groupKey instanceof ScopedSymbol) {
ScopedSymbol scopedSymbol = (ScopedSymbol) groupKey;
stats = tableStats.getStats(scopedSymbol.relation());
columnStats = stats.statsByColumn().get(scopedSymbol.column());
numKeysWithStats++;
}
if (columnStats == null) {
// Assume worst case: Every value is unique
distinctValues *= numSourceRows;
} else {
// `approxDistinct` is the number of distinct values in relation to `stats.numDocs()ยด, not in
// relation to `numSourceRows`, which is based on the estimates of a source operator.
// That is why we calculate the cardinality ratio and calculate the new distinct
// values based on `numSourceRows` to account for changes in the number of rows in source operators
//
// e.g. SELECT x, count(*) FROM tbl GROUP BY x
// and SELECT x, count(*) FROM tbl WHERE pk = 1 GROUP BY x
//
// have a different number of groups
double cardinalityRatio = columnStats.approxDistinct() / stats.numDocs();
distinctValues *= (long) (numSourceRows * cardinalityRatio);
}
}
if (numKeysWithStats == groupKeys.size()) {
return Math.min(distinctValues, numSourceRows);
} else {
return numSourceRows;
}
}
use of io.crate.statistics.TableStats in project crate by crate.
the class MergeFilterAndCollect method apply.
@Override
public LogicalPlan apply(Filter filter, Captures captures, TableStats tableStats, TransactionContext txnCtx, NodeContext nodeCtx) {
Collect collect = captures.get(collectCapture);
Stats stats = tableStats.getStats(collect.relation().tableInfo().ident());
WhereClause newWhere = collect.where().add(filter.query());
return new Collect(collect.relation(), collect.outputs(), newWhere, SelectivityFunctions.estimateNumRows(stats, newWhere.queryOrFallback(), null), stats.averageSizePerRowInBytes());
}
use of io.crate.statistics.TableStats in project crate by crate.
the class TableStatsServiceIntegrationTest method testStatsUpdated.
@Test
public void testStatsUpdated() throws Exception {
execute("create table t1(a int) with (number_of_replicas = 1)");
ensureGreen();
execute("insert into t1(a) values(1), (2), (3), (4), (5)");
execute("refresh table t1");
assertBusy(() -> {
TableStats tableStats = internalCluster().getDataNodeInstance(TableStats.class);
assertThat(tableStats.numDocs(new RelationName(sqlExecutor.getCurrentSchema(), "t1")), is(5L));
// tableStats.tableStats.estimatedSizePerRow() is not tested because it's based on sys.shards size
// column which is is cached for 10 secs in ShardSizeExpression which will increase the time needed
// to run this test.
}, 5, TimeUnit.SECONDS);
}
use of io.crate.statistics.TableStats in project crate by crate.
the class GroupHashAggregateTest method setUpStatsAndExpressions.
@Before
public void setUpStatsAndExpressions() throws Exception {
var samples = IntStream.concat(IntStream.generate(() -> 10).limit(50), IntStream.generate(() -> 20).limit(50)).boxed().collect(Collectors.toList());
long numDocs = 2_000L;
ColumnStats<Integer> columnStats = ColumnStats.fromSortedValues(samples, DataTypes.INTEGER, 0, numDocs);
Stats stats = new Stats(numDocs, DataTypes.INTEGER.fixedSize(), Map.of(new ColumnIdent("x"), columnStats, new ColumnIdent("i"), columnStats));
tableStats = new TableStats();
tableStats.updateTableStats(Map.of(new RelationName("doc", "t1"), stats));
expressions = new SqlExpressions(T3.sources(clusterService));
}
Aggregations