use of io.prestosql.spi.statistics.DoubleRange in project hetu-core by openlookeng.
the class ColumnStatisticsData method toColumnStatistics.
public ColumnStatistics toColumnStatistics(long rowCount) {
ColumnStatistics.Builder builder = ColumnStatistics.builder();
builder.setDataSize(Estimate.of((double) nullsCount / (double) rowCount));
builder.setDistinctValuesCount(Estimate.of(distinctValuesCount));
builder.setDataSize(dataSize.map(Estimate::of).orElse(Estimate.unknown()));
if (min.isPresent() && max.isPresent()) {
builder.setRange(new DoubleRange((double) min.get(), (double) max.get()));
}
return builder.build();
}
use of io.prestosql.spi.statistics.DoubleRange in project hetu-core by openlookeng.
the class TestTpcdsMetadataStatistics method testTableStatsDetails.
@Test
public void testTableStatsDetails() {
SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.CALL_CENTER.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue(), true);
estimateAssertion.assertClose(tableStatistics.getRowCount(), Estimate.of(6), "Row count does not match");
// all columns have stats
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
for (ColumnHandle column : columnHandles.values()) {
assertTrue(tableStatistics.getColumnStatistics().containsKey(column));
assertNotNull(tableStatistics.getColumnStatistics().get(column));
}
// identifier
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(6)).setRange(new DoubleRange(1, 6)).build());
// varchar
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_ID.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(3)).setDataSize(Estimate.of(48.0)).build());
// char
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_ZIP.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setDataSize(Estimate.of(5.0)).build());
// decimal
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_GMT_OFFSET.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setRange(new DoubleRange(-5, -5)).build());
// date
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_REC_START_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(4)).setRange(new DoubleRange(10227L, 11688L)).build());
// only null values
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CLOSED_DATE_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(1)).setDistinctValuesCount(Estimate.of(0)).build());
}
use of io.prestosql.spi.statistics.DoubleRange in project hetu-core by openlookeng.
the class TestTpcdsMetadataStatistics method testNullFraction.
@Test
public void testNullFraction() {
SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue(), true);
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
// some null values
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(WebSiteColumn.WEB_REC_END_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(3)).setRange(new DoubleRange(10819L, 11549L)).build());
}
use of io.prestosql.spi.statistics.DoubleRange in project hetu-core by openlookeng.
the class DataCenterClient method getTableStatistics.
/**
* Get remote table statistics.
*
* @param tableFullName the fully qualified table name
* @param columnHandles data center column handles
* @return the table statistics
*/
public TableStatistics getTableStatistics(String tableFullName, Map<String, ColumnHandle> columnHandles) {
String query = "SHOW STATS FOR " + tableFullName;
Iterable<List<Object>> data;
try {
data = getResults(clientSession, query);
} catch (SQLException ex) {
throw new PrestoTransportException(REMOTE_TASK_ERROR, HostAddress.fromUri(this.serverUri.uri()), "could not connect to the remote data center");
}
TableStatistics.Builder builder = TableStatistics.builder();
List<Object> lastRow = null;
for (List<Object> row : data) {
ColumnStatistics.Builder columnStatisticBuilder = new ColumnStatistics.Builder();
lastRow = row;
if (row.get(0) == null) {
// Only the last row can have the first column (column name) null
continue;
}
// row[0] is column_name
DataCenterColumnHandle columnHandle = (DataCenterColumnHandle) columnHandles.get(row.get(0).toString());
if (columnHandle == null) {
// Unknown column found
continue;
}
// row[1] is data_size
if (row.get(1) != null) {
columnStatisticBuilder.setDataSize(Estimate.of(Double.parseDouble(row.get(1).toString())));
}
// row[2] is distinct_values_count
if (row.get(2) != null) {
columnStatisticBuilder.setDistinctValuesCount(Estimate.of(Double.parseDouble(row.get(2).toString())));
}
// row[3] is nulls_fraction
if (row.get(3) != null) {
columnStatisticBuilder.setNullsFraction(Estimate.of(Double.parseDouble(row.get(3).toString())));
}
// row[5] is low_value and row[6] is high_value
if (row.get(5) != null && row.get(6) != null) {
String minStr = row.get(5).toString();
String maxStr = row.get(6).toString();
Type columnType = columnHandle.getColumnType();
if (columnType.equals(DATE)) {
LocalDate minDate = LocalDate.parse(minStr, DATE_FORMATTER);
LocalDate maxDate = LocalDate.parse(maxStr, DATE_FORMATTER);
columnStatisticBuilder.setRange(new DoubleRange(minDate.toEpochDay(), maxDate.toEpochDay()));
} else {
columnStatisticBuilder.setRange(new DoubleRange(Double.parseDouble(minStr), Double.parseDouble(maxStr)));
}
}
builder.setColumnStatistics(columnHandle, columnStatisticBuilder.build());
}
// Get row_count from the last row
if (lastRow != null && lastRow.get(4) != null) {
builder.setRowCount(Estimate.of(Double.parseDouble(lastRow.get(4).toString())));
}
return builder.build();
}
use of io.prestosql.spi.statistics.DoubleRange in project hetu-core by openlookeng.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatistics.
@Test
public void testGetTableStatistics() {
String partitionName = "p1=string1/p2=1234";
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(partitionName, statistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(PARTITION_COLUMN_1, ColumnStatistics.builder().setDataSize(Estimate.of(7000)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(PARTITION_COLUMN_2, ColumnStatistics.builder().setRange(new DoubleRange(1234, 1234)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of("p1", PARTITION_COLUMN_1, "p2", PARTITION_COLUMN_2, COLUMN, columnHandle), ImmutableMap.of("p1", VARCHAR, "p2", BIGINT, COLUMN, BIGINT), ImmutableList.of(partition(partitionName)), true, table), expected);
}
Aggregations