use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatistics.
@Test
public void testGetTableStatistics() {
String partitionName = "p1=string1/p2=1234";
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.of(5000), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(partitionName, statistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty(), Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setTotalSize(Estimate.of(5000)).setColumnStatistics(PARTITION_COLUMN_1, ColumnStatistics.builder().setDataSize(Estimate.of(7000)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(PARTITION_COLUMN_2, ColumnStatistics.builder().setRange(new DoubleRange(1234, 1234)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of("p1", PARTITION_COLUMN_1, "p2", PARTITION_COLUMN_2, COLUMN, columnHandle), ImmutableMap.of("p1", VARCHAR, "p2", BIGINT, COLUMN, BIGINT), ImmutableList.of(partition(partitionName))), expected);
}
use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.
@Test
public void testGetTableStatisticsUnpartitioned() {
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.of(5000), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty(), Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setTotalSize(Estimate.of(5000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected);
}
use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.
the class TestTpcdsMetadataStatistics method testTableStatsDetails.
@Test
public void testTableStatsDetails() {
SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.CALL_CENTER.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Optional.empty(), ImmutableList.copyOf(columnHandles.values()), alwaysTrue());
estimateAssertion.assertClose(tableStatistics.getRowCount(), Estimate.of(6), "Row count does not match");
// all columns have stats
for (ColumnHandle column : columnHandles.values()) {
assertTrue(tableStatistics.getColumnStatistics().containsKey(column));
assertNotNull(tableStatistics.getColumnStatistics().get(column));
}
// identifier
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(6)).setRange(new DoubleRange(1, 6)).build());
// varchar
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_ID.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(3)).setDataSize(Estimate.of(48.0)).build());
// char
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_ZIP.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setDataSize(Estimate.of(5.0)).build());
// decimal
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_GMT_OFFSET.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).setRange(new DoubleRange(-5, -5)).build());
// date
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_REC_START_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(4)).setRange(new DoubleRange(10227L, 11688L)).build());
// only null values
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CLOSED_DATE_SK.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(1)).setDistinctValuesCount(Estimate.of(0)).build());
}
use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.
the class TestTpcdsMetadataStatistics method testNullFraction.
@Test
public void testNullFraction() {
SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName());
ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName);
Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle);
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Optional.empty(), ImmutableList.copyOf(columnHandles.values()), alwaysTrue());
// some null values
assertColumnStatistics(tableStatistics.getColumnStatistics().get(columnHandles.get(WebSiteColumn.WEB_REC_END_DATE.getName())), ColumnStatistics.builder().setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(3)).setRange(new DoubleRange(10819L, 11549L)).build());
}
use of com.facebook.presto.spi.statistics.DoubleRange in project presto by prestodb.
the class MetastoreHiveStatisticsProvider method calculateRangeForPartitioningKey.
@VisibleForTesting
static Optional<DoubleRange> calculateRangeForPartitioningKey(HiveColumnHandle column, Type type, List<HivePartition> partitions) {
if (!isRangeSupported(type)) {
return Optional.empty();
}
List<Double> values = partitions.stream().map(HivePartition::getKeys).map(keys -> keys.get(column)).filter(value -> !value.isNull()).map(NullableValue::getValue).map(value -> convertPartitionValueToDouble(type, value)).collect(toImmutableList());
if (values.isEmpty()) {
return Optional.empty();
}
double min = values.get(0);
double max = values.get(0);
for (Double value : values) {
if (value > max) {
max = value;
}
if (value < min) {
min = value;
}
}
return Optional.of(new DoubleRange(min, max));
}
Aggregations