use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class TestTpcdsMetadataStatistics method testTableStatsDetails.
@Test
public void testTableStatsDetails() {
SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.CALL_CENTER.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue(), true);
estimateAssertion.assertClose(tableStatistics.getRowCount(), Estimate.of(6), "Row count does not match");
// all columns have stats
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
for (ColumnHandle column : columnHandles.values()) {
assertTrue(tableStatistics.getColumnStatistics().containsKey(column));
assertNotNull(tableStatistics.getColumnStatistics().get(column));
}
// identifier
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(6)).setRange(new DoubleRange(1, 6)).build());
// varchar
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_ID.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(3)).setDataSize(Estimate.of(48.0)).build());
// char
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_ZIP.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setDataSize(Estimate.of(5.0)).build());
// decimal
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_GMT_OFFSET.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setRange(new DoubleRange(-5, -5)).build());
// date
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_REC_START_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(4)).setRange(new DoubleRange(10227L, 11688L)).build());
// only null values
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CLOSED_DATE_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(1)).setDistinctValuesCount(Estimate.of(0)).build());
}
use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class TestTpcdsMetadataStatistics method testNullFraction.
@Test
public void testNullFraction() {
SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue(), true);
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
// some null values
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(WebSiteColumn.WEB_REC_END_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(3)).setRange(new DoubleRange(10819L, 11549L)).build());
}
use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class TestTpchMetadata method testNoTableStats.
private void testNoTableStats(String schema, TpchTable<?> table) {
TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName()));
TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, alwaysTrue(), true);
assertTrue(tableStatistics.getRowCount().isUnknown());
}
use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class TestTpchMetadata method testColumnStats.
private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint constraint, ColumnStatistics expected) {
TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName()));
TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint, true);
ColumnHandle columnHandle = tpchMetadata.getColumnHandles(session, tableHandle).get(column.getSimplifiedColumnName());
ColumnStatistics actual = tableStatistics.getColumnStatistics().get(columnHandle);
EstimateAssertion estimateAssertion = new EstimateAssertion(TOLERANCE);
estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinctValuesCount");
estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "dataSize");
estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction");
estimateAssertion.assertClose(actual.getRange(), expected.getRange(), "range");
}
use of io.prestosql.spi.statistics.TableStatistics in project hetu-core by openlookeng.
the class TableScanStatsRule method doCalculate.
@Override
protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) {
// TODO Construct predicate like AddExchanges's LayoutConstraintEvaluator
TupleDomain<ColumnHandle> predicate = metadata.getTableProperties(session, node.getTable()).getPredicate();
Constraint constraint = new Constraint(predicate);
TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), constraint, true);
verify(tableStatistics != null, "tableStatistics is null for %s", node);
Map<Symbol, SymbolStatsEstimate> outputSymbolStats = new HashMap<>();
Map<ColumnHandle, Symbol> remainingSymbols = new HashMap<>();
Map<ColumnHandle, Symbol> assignments = ImmutableBiMap.copyOf(node.getAssignments()).inverse();
boolean isPredicatesPushDown = false;
if ((predicate.isAll() || predicate.getDomains().get().equals(node.getEnforcedConstraint().getDomains().get())) && !(node.getEnforcedConstraint().isAll() || node.getEnforcedConstraint().isNone())) {
predicate = node.getEnforcedConstraint();
isPredicatesPushDown = true;
predicate.getDomains().get().entrySet().stream().forEach(e -> {
remainingSymbols.put(e.getKey(), new Symbol(e.getKey().getColumnName()));
});
}
for (Map.Entry<Symbol, ColumnHandle> entry : node.getAssignments().entrySet()) {
Symbol symbol = entry.getKey();
Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue()));
SymbolStatsEstimate symbolStatistics = columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics, types.get(symbol))).orElse(SymbolStatsEstimate.unknown());
outputSymbolStats.put(symbol, symbolStatistics);
remainingSymbols.remove(entry.getValue());
}
PlanNodeStatsEstimate tableEstimates = PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).addSymbolStatistics(outputSymbolStats).build();
if (isPredicatesPushDown) {
if (remainingSymbols.size() > 0) {
ImmutableBiMap.Builder<ColumnHandle, Symbol> assignmentBuilder = ImmutableBiMap.builder();
assignments = assignmentBuilder.putAll(assignments).putAll(remainingSymbols).build();
for (Map.Entry<ColumnHandle, Symbol> entry : remainingSymbols.entrySet()) {
Symbol symbol = entry.getValue();
Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getKey()));
SymbolStatsEstimate symbolStatistics = columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics, types.get(symbol))).orElse(SymbolStatsEstimate.unknown());
outputSymbolStats.put(symbol, symbolStatistics);
}
/* Refresh TableEstimates for remaining columns */
tableEstimates = PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).addSymbolStatistics(outputSymbolStats).build();
}
Expression pushDownExpression = domainTranslator.toPredicate(predicate.transform(assignments::get));
PlanNodeStatsEstimate estimate = filterStatsCalculator.filterStats(tableEstimates, pushDownExpression, session, types);
if ((isDefaultFilterFactorEnabled(session) || sourceStats.isEnforceDefaultFilterFactor()) && estimate.isOutputRowCountUnknown()) {
PlanNodeStatsEstimate finalTableEstimates = tableEstimates;
estimate = tableEstimates.mapOutputRowCount(sourceRowCount -> finalTableEstimates.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT);
}
return Optional.of(estimate);
}
return Optional.of(tableEstimates);
}
Aggregations