use of io.crate.statistics.ColumnStats in project crate by crate.
the class SelectivityFunctionsCalculationTest method test_collect_operator_adapts_expected_row_count_based_on_selectivity_calculation.
@Test
public void test_collect_operator_adapts_expected_row_count_based_on_selectivity_calculation() throws Throwable {
var columnStats = new HashMap<ColumnIdent, ColumnStats>();
long totalNumRows = 20000;
var numbers = IntStream.range(1, 20001).boxed().collect(Collectors.toList());
columnStats.put(new ColumnIdent("x"), ColumnStats.fromSortedValues(numbers, DataTypes.INTEGER, 0, totalNumRows));
Stats stats = new Stats(totalNumRows, DataTypes.INTEGER.fixedSize(), columnStats);
TableStats tableStats = new TableStats();
tableStats.updateTableStats(Map.of(new RelationName("doc", "tbl"), stats));
SQLExecutor e = SQLExecutor.builder(clusterService).setTableStats(tableStats).addTable("create table doc.tbl (x int)").build();
LogicalPlan plan = e.logicalPlan("select * from doc.tbl where x = 10");
assertThat(plan.numExpectedRows(), Matchers.is(1L));
}
use of io.crate.statistics.ColumnStats in project crate by crate.
the class SelectivityFunctionsTest method test_eq_value_that_is_present_in_mcv_uses_mcv_frequency_as_selectivity.
@Test
public void test_eq_value_that_is_present_in_mcv_uses_mcv_frequency_as_selectivity() {
SqlExpressions expressions = new SqlExpressions(T3.sources(clusterService));
Symbol query = expressions.asSymbol("x = ?");
var numbers = Lists2.concat(List.of(1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 10, 10, 10, 10, 10, 10, 10, 10), IntStream.range(11, 15).boxed().collect(Collectors.toList()));
var columnStats = ColumnStats.fromSortedValues(numbers, DataTypes.INTEGER, 0, numbers.size());
double frequencyOf10 = columnStats.mostCommonValues().frequencies()[0];
var statsByColumn = Map.<ColumnIdent, ColumnStats>of(new ColumnIdent("x"), columnStats);
Stats stats = new Stats(numbers.size(), 16, statsByColumn);
assertThat(SelectivityFunctions.estimateNumRows(stats, query, new Row1(10)), Matchers.is((long) (frequencyOf10 * numbers.size())));
}
use of io.crate.statistics.ColumnStats in project crate by crate.
the class SelectivityFunctionsTest method test_not_reverses_selectivity_of_inner_function.
@Test
public void test_not_reverses_selectivity_of_inner_function() {
SqlExpressions expressions = new SqlExpressions(T3.sources(clusterService));
Symbol query = expressions.asSymbol("NOT (x = 10)");
var numbers = IntStream.range(1, 20_001).boxed().collect(Collectors.toList());
var columnStats = ColumnStats.fromSortedValues(numbers, DataTypes.INTEGER, 0, 20_000L);
Stats stats = new Stats(20_000, 16, Map.of(new ColumnIdent("x"), columnStats));
assertThat(SelectivityFunctions.estimateNumRows(stats, query, null), Matchers.is(19999L));
}
use of io.crate.statistics.ColumnStats in project crate by crate.
the class SelectivityFunctionsTest method test_column_eq_column_uses_approx_distinct_for_selectivity_approximation.
@Test
public void test_column_eq_column_uses_approx_distinct_for_selectivity_approximation() {
SqlExpressions expressions = new SqlExpressions(T3.sources(clusterService));
Symbol query = expressions.asSymbol("x = y");
var numbers = Lists2.concat(List.of(1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 10, 10, 10, 10, 10, 10, 10, 10), IntStream.range(11, 15).boxed().collect(Collectors.toList()));
var columnStats = ColumnStats.fromSortedValues(numbers, DataTypes.INTEGER, 0, numbers.size());
var statsByColumn = Map.<ColumnIdent, ColumnStats>of(new ColumnIdent("x"), columnStats);
Stats stats = new Stats(numbers.size(), 16, statsByColumn);
assertThat(SelectivityFunctions.estimateNumRows(stats, query, null), Matchers.is(3L));
}
use of io.crate.statistics.ColumnStats in project crate by crate.
the class SelectivityFunctionsTest method test_eq_not_in_mcv_is_based_on_approx_distinct.
@Test
public void test_eq_not_in_mcv_is_based_on_approx_distinct() {
SqlExpressions expressions = new SqlExpressions(T3.sources(clusterService));
Symbol query = expressions.asSymbol("x = 10");
var statsByColumn = new HashMap<ColumnIdent, ColumnStats>();
var numbers = IntStream.range(1, 20_001).boxed().collect(Collectors.toList());
var columnStats = ColumnStats.fromSortedValues(numbers, DataTypes.INTEGER, 0, 20_000L);
statsByColumn.put(new ColumnIdent("x"), columnStats);
Stats stats = new Stats(20_000, 16, statsByColumn);
assertThat(SelectivityFunctions.estimateNumRows(stats, query, null), Matchers.is(1L));
}
Aggregations