use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class ColumnStatisticsData method toColumnStatistics.
public ColumnStatistics toColumnStatistics(long rowCount) {
ColumnStatistics.Builder builder = ColumnStatistics.builder();
builder.setDataSize(Estimate.of((double) nullsCount / (double) rowCount));
builder.setDistinctValuesCount(Estimate.of(distinctValuesCount));
builder.setDataSize(dataSize.map(Estimate::of).orElse(Estimate.unknown()));
if (min.isPresent() && max.isPresent()) {
builder.setRange(new DoubleRange((double) min.get(), (double) max.get()));
}
return builder.build();
}
use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class TestTpchMetadata method testColumnStats.
private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint constraint, ColumnStatistics expected) {
TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName()));
TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint, true);
ColumnHandle columnHandle = tpchMetadata.getColumnHandles(session, tableHandle).get(column.getSimplifiedColumnName());
ColumnStatistics actual = tableStatistics.getColumnStatistics().get(columnHandle);
EstimateAssertion estimateAssertion = new EstimateAssertion(TOLERANCE);
estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinctValuesCount");
estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "dataSize");
estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction");
estimateAssertion.assertClose(actual.getRange(), expected.getRange(), "range");
}
use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class TableScanStatsRule method doCalculate.
@Override
protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) {
// TODO Construct predicate like AddExchanges's LayoutConstraintEvaluator
TupleDomain<ColumnHandle> predicate = metadata.getTableProperties(session, node.getTable()).getPredicate();
Constraint constraint = new Constraint(predicate);
TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), constraint, true);
verify(tableStatistics != null, "tableStatistics is null for %s", node);
Map<Symbol, SymbolStatsEstimate> outputSymbolStats = new HashMap<>();
Map<ColumnHandle, Symbol> remainingSymbols = new HashMap<>();
Map<ColumnHandle, Symbol> assignments = ImmutableBiMap.copyOf(node.getAssignments()).inverse();
boolean isPredicatesPushDown = false;
if ((predicate.isAll() || predicate.getDomains().get().equals(node.getEnforcedConstraint().getDomains().get())) && !(node.getEnforcedConstraint().isAll() || node.getEnforcedConstraint().isNone())) {
predicate = node.getEnforcedConstraint();
isPredicatesPushDown = true;
predicate.getDomains().get().entrySet().stream().forEach(e -> {
remainingSymbols.put(e.getKey(), new Symbol(e.getKey().getColumnName()));
});
}
for (Map.Entry<Symbol, ColumnHandle> entry : node.getAssignments().entrySet()) {
Symbol symbol = entry.getKey();
Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue()));
SymbolStatsEstimate symbolStatistics = columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics, types.get(symbol))).orElse(SymbolStatsEstimate.unknown());
outputSymbolStats.put(symbol, symbolStatistics);
remainingSymbols.remove(entry.getValue());
}
PlanNodeStatsEstimate tableEstimates = PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).addSymbolStatistics(outputSymbolStats).build();
if (isPredicatesPushDown) {
if (remainingSymbols.size() > 0) {
ImmutableBiMap.Builder<ColumnHandle, Symbol> assignmentBuilder = ImmutableBiMap.builder();
assignments = assignmentBuilder.putAll(assignments).putAll(remainingSymbols).build();
for (Map.Entry<ColumnHandle, Symbol> entry : remainingSymbols.entrySet()) {
Symbol symbol = entry.getValue();
Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getKey()));
SymbolStatsEstimate symbolStatistics = columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics, types.get(symbol))).orElse(SymbolStatsEstimate.unknown());
outputSymbolStats.put(symbol, symbolStatistics);
}
/* Refresh TableEstimates for remaining columns */
tableEstimates = PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue()).addSymbolStatistics(outputSymbolStats).build();
}
Expression pushDownExpression = domainTranslator.toPredicate(predicate.transform(assignments::get));
PlanNodeStatsEstimate estimate = filterStatsCalculator.filterStats(tableEstimates, pushDownExpression, session, types);
if ((isDefaultFilterFactorEnabled(session) || sourceStats.isEnforceDefaultFilterFactor()) && estimate.isOutputRowCountUnknown()) {
PlanNodeStatsEstimate finalTableEstimates = tableEstimates;
estimate = tableEstimates.mapOutputRowCount(sourceRowCount -> finalTableEstimates.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT);
}
return Optional.of(estimate);
}
return Optional.of(tableEstimates);
}
use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class ConnectorFilterStatsCalculatorService method toPlanNodeStats.
private static PlanNodeStatsEstimate toPlanNodeStats(TableStatistics tableStatistics, Map<ColumnHandle, String> columnNames, Map<String, Type> columnTypes) {
PlanNodeStatsEstimate.Builder builder = PlanNodeStatsEstimate.builder().setOutputRowCount(tableStatistics.getRowCount().getValue());
for (Map.Entry<ColumnHandle, ColumnStatistics> entry : tableStatistics.getColumnStatistics().entrySet()) {
String columnName = columnNames.getOrDefault(entry.getKey(), null);
if (columnName == null) {
continue;
}
Symbol symbol = new Symbol(columnName);
builder.addSymbolStatistics(symbol, toSymbolStatistics(tableStatistics, entry.getValue(), columnTypes.get(columnName)));
}
return builder.build();
}
use of io.prestosql.spi.statistics.ColumnStatistics in project hetu-core by openlookeng.
the class TestDataCenterClient method testGetTableStatistics.
@Test
public void testGetTableStatistics() {
Map<String, ColumnHandle> columnHandles = new LinkedHashMap<>();
DataCenterClient client = new DataCenterClient(this.config, httpClient, typeManager);
columnHandles.put("orderkey", new DataCenterColumnHandle("orderkey", DOUBLE, 0));
columnHandles.put("custkey", new DataCenterColumnHandle("custkey", DOUBLE, 1));
columnHandles.put("orderstatus", new DataCenterColumnHandle("orderstatus", createVarcharType(1), 2));
columnHandles.put("totalprice", new DataCenterColumnHandle("totalprice", DOUBLE, 3));
columnHandles.put("orderdate", new DataCenterColumnHandle("orderdate", DATE, 4));
columnHandles.put("orderpriority", new DataCenterColumnHandle("orderpriority", createVarcharType(15), 5));
columnHandles.put("clerk", new DataCenterColumnHandle("clerk", createUnboundedVarcharType(), 6));
columnHandles.put("shippriority", new DataCenterColumnHandle("shippriority", DOUBLE, 7));
columnHandles.put("comment", new DataCenterColumnHandle("comment", createVarcharType(79), 8));
TableStatistics tableStatistics = client.getTableStatistics("tpch.tiny.orders", columnHandles);
assertEquals(tableStatistics.getRowCount().getValue(), 15000.0);
Map<ColumnHandle, ColumnStatistics> columnStatistics = tableStatistics.getColumnStatistics();
for (Map.Entry<ColumnHandle, ColumnStatistics> columnstatistics : columnStatistics.entrySet()) {
ColumnHandle columnhandleKey = columnstatistics.getKey();
ColumnStatistics columnhandleValue = columnstatistics.getValue();
if (columnhandleKey.getColumnName().equals("orderkey")) {
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 15000.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
assertEquals(columnhandleValue.getRange().get().getMin(), (double) 1);
assertEquals(columnhandleValue.getRange().get().getMax(), (double) 60000);
}
if (columnhandleKey.getColumnName().equals("custkey")) {
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 1000.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
assertEquals(columnhandleValue.getRange().get().getMin(), (double) 1);
assertEquals(columnhandleValue.getRange().get().getMax(), (double) 1499);
}
if (columnhandleKey.getColumnName().equals("orderstatus")) {
assertEquals(columnhandleValue.getDataSize().getValue(), 3.0);
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 3.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
}
if (columnhandleKey.getColumnName().equals("totalprice")) {
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 14996.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
assertEquals(columnhandleValue.getRange().get().getMin(), 874.89);
assertEquals(columnhandleValue.getRange().get().getMax(), 466001.28);
}
if (columnhandleKey.getColumnName().equals("orderdate")) {
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 2401.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
assertEquals(columnhandleValue.getRange().get().getMin(), (double) 8035);
assertEquals(columnhandleValue.getRange().get().getMax(), (double) 10440);
}
if (columnhandleKey.getColumnName().equals("orderpriority")) {
assertEquals(columnhandleValue.getDataSize().getValue(), 42.0);
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 5.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
}
if (columnhandleKey.getColumnName().equals("clerk")) {
assertEquals(columnhandleValue.getDataSize().getValue(), 15000.0);
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 1000.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
}
if (columnhandleKey.getColumnName().equals("shippriority")) {
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 1.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
assertEquals(columnhandleValue.getRange().get().getMin(), (double) 0);
assertEquals(columnhandleValue.getRange().get().getMax(), (double) 0);
}
if (columnhandleKey.getColumnName().equals("comment")) {
assertEquals(columnhandleValue.getDataSize().getValue(), 727249.0);
assertEquals(columnhandleValue.getDistinctValuesCount().getValue(), 14995.0);
assertEquals(columnhandleValue.getNullsFraction().getValue(), 0.0);
}
}
}
Aggregations