use of io.prestosql.cost.FilterStatsCalculator.UNKNOWN_FILTER_COEFFICIENT in project hetu-core by openlookeng.
the class TableScanStatsRule method doCalculate.
@Override
protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) {
// TODO Construct predicate like AddExchanges's LayoutConstraintEvaluator
TupleDomain<ColumnHandle> predicate = metadata.getTableProperties(session, node.getTable()).getPredicate();
Constraint constraint = new Constraint(predicate);
TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), constraint, true);
verify(tableStatistics != null, "tableStatistics is null for %s", node);
Map<Symbol, SymbolStatsEstimate> outputSymbolStats = new HashMap<>();
Map<ColumnHandle, Symbol> remainingSymbols = new HashMap<>();
Map<ColumnHandle, Symbol> assignments = ImmutableBiMap.copyOf(node.getAssignments()).inverse();
boolean isPredicatesPushDown = false;
if ((predicate.isAll() || predicate.getDomains().get().equals(node.getEnforcedConstraint().getDomains().get())) && !(node.getEnforcedConstraint().isAll() || node.getEnforcedConstraint().isNone())) {
predicate = node.getEnforcedConstraint();
isPredicatesPushDown = true;
predicate.getDomains().get().entrySet().stream().forEach(e -> {
remainingSymbols.put(e.getKey(), new Symbol(e.getKey().getColumnName()));
});
}
for (Map.Entry<Symbol, ColumnHandle> entry : node.getAssignments().entrySet()) {
Symbol symbol = entry.getKey();
Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue()));
SymbolStatsEstimate symbolStatistics = columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics, types.get(symbol))).orElse(SymbolStatsEstimate.unknown());
outputSymbolStats.put(symbol, symbolStatistics);
remainingSymbols.remove(entry.getValue());
}
PlanNodeStatsEstimate tableEstimates = PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).addSymbolStatistics(outputSymbolStats).build();
if (isPredicatesPushDown) {
if (remainingSymbols.size() > 0) {
ImmutableBiMap.Builder<ColumnHandle, Symbol> assignmentBuilder = ImmutableBiMap.builder();
assignments = assignmentBuilder.putAll(assignments).putAll(remainingSymbols).build();
for (Map.Entry<ColumnHandle, Symbol> entry : remainingSymbols.entrySet()) {
Symbol symbol = entry.getValue();
Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getKey()));
SymbolStatsEstimate symbolStatistics = columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics, types.get(symbol))).orElse(SymbolStatsEstimate.unknown());
outputSymbolStats.put(symbol, symbolStatistics);
}
/* Refresh TableEstimates for remaining columns */
tableEstimates = PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).addSymbolStatistics(outputSymbolStats).build();
}
Expression pushDownExpression = domainTranslator.toPredicate(predicate.transform(assignments::get));
PlanNodeStatsEstimate estimate = filterStatsCalculator.filterStats(tableEstimates, pushDownExpression, session, types);
if ((isDefaultFilterFactorEnabled(session) || sourceStats.isEnforceDefaultFilterFactor()) && estimate.isOutputRowCountUnknown()) {
PlanNodeStatsEstimate finalTableEstimates = tableEstimates;
estimate = tableEstimates.mapOutputRowCount(sourceRowCount -> finalTableEstimates.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT);
}
return Optional.of(estimate);
}
return Optional.of(tableEstimates);
}
use of io.prestosql.cost.FilterStatsCalculator.UNKNOWN_FILTER_COEFFICIENT in project hetu-core by openlookeng.
the class FilterStatsRule method doCalculate.
@Override
public Optional<PlanNodeStatsEstimate> doCalculate(FilterNode node, StatsProvider statsProvider, Lookup lookup, Session session, TypeProvider types) {
PlanNodeStatsEstimate sourceStats = statsProvider.getStats(node.getSource());
PlanNodeStatsEstimate estimate;
if (isExpression(node.getPredicate())) {
estimate = filterStatsCalculator.filterStats(sourceStats, castToExpression(node.getPredicate()), session, types);
} else {
Map<Integer, Symbol> layout = new HashMap<>();
int channel = 0;
for (Symbol symbol : node.getSource().getOutputSymbols()) {
layout.put(channel++, symbol);
}
estimate = filterStatsCalculator.filterStats(sourceStats, node.getPredicate(), session, types, layout);
}
if ((isDefaultFilterFactorEnabled(session) || statsProvider.isEnforceDefaultFilterFactor()) && estimate.isOutputRowCountUnknown()) {
estimate = sourceStats.mapOutputRowCount(sourceRowCount -> sourceStats.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT);
}
return Optional.of(estimate);
}
use of io.prestosql.cost.FilterStatsCalculator.UNKNOWN_FILTER_COEFFICIENT in project hetu-core by openlookeng.
the class GroupIdStatsRule method groupBy.
public static PlanNodeStatsEstimate groupBy(PlanNodeStatsEstimate sourceStats, List<List<Symbol>> groupingSets, Map<Symbol, Symbol> groupingColumns) {
PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder();
double rowsCount;
double finalRowCount = 0;
boolean knowsStatsFound = false;
for (List<Symbol> groupingSet : groupingSets) {
if (groupingSet.size() == 0) {
finalRowCount = 1;
continue;
}
rowsCount = 1;
for (Symbol groupBySymbol : groupingSet) {
Symbol currentCol = groupingColumns.get(groupBySymbol);
SymbolStatsEstimate symbolStatistics = sourceStats.getSymbolStatistics(currentCol);
result.addSymbolStatistics(currentCol, symbolStatistics.mapNullsFraction(nullsFraction -> {
if (nullsFraction == 0.0) {
return 0.0;
}
return 1.0 / (symbolStatistics.getDistinctValuesCount() + 1);
}));
}
for (Symbol groupBySymbol : groupingSet) {
SymbolStatsEstimate symbolStatistics = sourceStats.getSymbolStatistics(groupingColumns.get(groupBySymbol));
if (symbolStatistics.isUnknown()) {
// so for now we dont consider the same for overall stats calculation.
continue;
}
knowsStatsFound = true;
int nullRow = (symbolStatistics.getNullsFraction() == 0.0) ? 0 : 1;
rowsCount *= symbolStatistics.getDistinctValuesCount() + nullRow;
}
rowsCount = min(rowsCount, sourceStats.getOutputRowCount());
// corresponding to each multi col group one NULL value (i.e. corresponding to all) is added, so adjust for the same.
finalRowCount += rowsCount + (groupingSet.size() > 1 ? (sourceStats.getOutputRowCount() / rowsCount) : 0);
}
if (knowsStatsFound) {
result.setOutputRowCount(finalRowCount + 1);
} else {
result.setOutputRowCount(sourceStats.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT);
}
return result.build();
}
use of io.prestosql.cost.FilterStatsCalculator.UNKNOWN_FILTER_COEFFICIENT in project hetu-core by openlookeng.
the class AggregationStatsRule method groupBy.
public static PlanNodeStatsEstimate groupBy(PlanNodeStatsEstimate sourceStats, Collection<Symbol> groupBySymbols, Map<Symbol, Aggregation> aggregations) {
PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder();
for (Symbol groupBySymbol : groupBySymbols) {
SymbolStatsEstimate symbolStatistics = sourceStats.getSymbolStatistics(groupBySymbol);
result.addSymbolStatistics(groupBySymbol, symbolStatistics.mapNullsFraction(nullsFraction -> {
if (nullsFraction == 0.0) {
return 0.0;
}
return 1.0 / (symbolStatistics.getDistinctValuesCount() + 1);
}));
}
double rowsCount = 1;
boolean knowsStatsFound = false;
for (Symbol groupBySymbol : groupBySymbols) {
SymbolStatsEstimate symbolStatistics = sourceStats.getSymbolStatistics(groupBySymbol);
if (symbolStatistics.isUnknown()) {
// so for now we dont consider the same for overall stats calculation.
continue;
}
knowsStatsFound = true;
int nullRow = (symbolStatistics.getNullsFraction() == 0.0) ? 0 : 1;
rowsCount *= symbolStatistics.getDistinctValuesCount() + nullRow;
}
if (knowsStatsFound) {
result.setOutputRowCount(min(rowsCount, sourceStats.getOutputRowCount()));
} else {
// If there was no proper stats in any of the grouping symbols, we just take fixed % of source.
result.setOutputRowCount(sourceStats.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT);
}
for (Map.Entry<Symbol, Aggregation> aggregationEntry : aggregations.entrySet()) {
result.addSymbolStatistics(aggregationEntry.getKey(), estimateAggregationStats(aggregationEntry.getValue(), sourceStats));
}
return result.build();
}
use of io.prestosql.cost.FilterStatsCalculator.UNKNOWN_FILTER_COEFFICIENT in project hetu-core by openlookeng.
the class TestJoinStatsRule method testStatsForInnerJoinWithTwoEquiClausesAndNonEqualityFunction.
@Test
public void testStatsForInnerJoinWithTwoEquiClausesAndNonEqualityFunction() {
double innerJoinRowCount = // driver join clause
LEFT_ROWS_COUNT * RIGHT_ROWS_COUNT / LEFT_JOIN_COLUMN_2_NDV * LEFT_JOIN_COLUMN_2_NON_NULLS * RIGHT_JOIN_COLUMN_2_NON_NULLS * // auxiliary join clause
UNKNOWN_FILTER_COEFFICIENT * // LEFT_JOIN_COLUMN < 10 non equality filter
0.3333333333;
PlanNodeStatsEstimate innerJoinStats = planNodeStats(innerJoinRowCount, symbolStatistics(LEFT_JOIN_COLUMN, 5.0, 10.0, 0.0, RIGHT_JOIN_COLUMN_NDV * 0.3333333333), symbolStatistics(RIGHT_JOIN_COLUMN, 5.0, 20.0, 0.0, RIGHT_JOIN_COLUMN_NDV), symbolStatistics(LEFT_JOIN_COLUMN_2, 100.0, 200.0, 0.0, RIGHT_JOIN_COLUMN_2_NDV), symbolStatistics(RIGHT_JOIN_COLUMN_2, 100.0, 200.0, 0.0, RIGHT_JOIN_COLUMN_2_NDV));
tester().assertStatsFor(pb -> {
Symbol leftJoinColumnSymbol = pb.symbol(LEFT_JOIN_COLUMN, BIGINT);
Symbol rightJoinColumnSymbol = pb.symbol(RIGHT_JOIN_COLUMN, DOUBLE);
Symbol leftJoinColumnSymbol2 = pb.symbol(LEFT_JOIN_COLUMN_2, BIGINT);
Symbol rightJoinColumnSymbol2 = pb.symbol(RIGHT_JOIN_COLUMN_2, DOUBLE);
ComparisonExpression leftJoinColumnLessThanTen = new ComparisonExpression(ComparisonExpression.Operator.LESS_THAN, toSymbolReference(leftJoinColumnSymbol), new LongLiteral("10"));
return pb.join(INNER, pb.values(leftJoinColumnSymbol, leftJoinColumnSymbol2), pb.values(rightJoinColumnSymbol, rightJoinColumnSymbol2), ImmutableList.of(new EquiJoinClause(leftJoinColumnSymbol2, rightJoinColumnSymbol2), new EquiJoinClause(leftJoinColumnSymbol, rightJoinColumnSymbol)), ImmutableList.of(leftJoinColumnSymbol, leftJoinColumnSymbol2, rightJoinColumnSymbol, rightJoinColumnSymbol2), Optional.of(castToRowExpression(leftJoinColumnLessThanTen)));
}).withSourceStats(0, planNodeStats(LEFT_ROWS_COUNT, LEFT_JOIN_COLUMN_STATS, LEFT_JOIN_COLUMN_2_STATS)).withSourceStats(1, planNodeStats(RIGHT_ROWS_COUNT, RIGHT_JOIN_COLUMN_STATS, RIGHT_JOIN_COLUMN_2_STATS)).check(stats -> stats.equalTo(innerJoinStats));
}
Aggregations