use of io.trino.plugin.hive.metastore.BooleanStatistics in project trino by trinodb.
the class Statistics method createHiveColumnStatistics.
@VisibleForTesting
static HiveColumnStatistics createHiveColumnStatistics(Map<ColumnStatisticType, Block> computedStatistics, Type columnType, long rowCount) {
HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
// MIN_VALUE, MAX_VALUE
// We ask the engine to compute either both or neither
verify(computedStatistics.containsKey(MIN_VALUE) == computedStatistics.containsKey(MAX_VALUE));
if (computedStatistics.containsKey(MIN_VALUE)) {
setMinMax(columnType, computedStatistics.get(MIN_VALUE), computedStatistics.get(MAX_VALUE), result);
}
// MAX_VALUE_SIZE_IN_BYTES
if (computedStatistics.containsKey(MAX_VALUE_SIZE_IN_BYTES)) {
result.setMaxValueSizeInBytes(getIntegerValue(BIGINT, computedStatistics.get(MAX_VALUE_SIZE_IN_BYTES)));
}
// TOTAL_VALUES_SIZE_IN_BYTES
if (computedStatistics.containsKey(TOTAL_SIZE_IN_BYTES)) {
result.setTotalSizeInBytes(getIntegerValue(BIGINT, computedStatistics.get(TOTAL_SIZE_IN_BYTES)));
}
// NUMBER OF NULLS
if (computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
result.setNullsCount(rowCount - BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0));
}
// NDV
if (computedStatistics.containsKey(NUMBER_OF_DISTINCT_VALUES) && computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
// number of distinct value is estimated using HLL, and can be higher than the number of non null values
long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0);
long numberOfDistinctValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_DISTINCT_VALUES), 0);
if (numberOfDistinctValues > numberOfNonNullValues) {
result.setDistinctValuesCount(numberOfNonNullValues);
} else {
result.setDistinctValuesCount(numberOfDistinctValues);
}
}
// NUMBER OF FALSE, NUMBER OF TRUE
if (computedStatistics.containsKey(NUMBER_OF_TRUE_VALUES) && computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
long numberOfTrue = BIGINT.getLong(computedStatistics.get(NUMBER_OF_TRUE_VALUES), 0);
long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0);
result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(numberOfTrue), OptionalLong.of(numberOfNonNullValues - numberOfTrue)));
}
return result.build();
}
use of io.trino.plugin.hive.metastore.BooleanStatistics in project trino by trinodb.
the class TestThriftMetastoreUtil method testEmptyBooleanStatsToColumnStatistics.
@Test
public void testEmptyBooleanStatsToColumnStatistics() {
BooleanColumnStatsData emptyBooleanColumnStatsData = new BooleanColumnStatsData();
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(emptyBooleanColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
assertEquals(actual.getIntegerStatistics(), Optional.empty());
assertEquals(actual.getDoubleStatistics(), Optional.empty());
assertEquals(actual.getDecimalStatistics(), Optional.empty());
assertEquals(actual.getDateStatistics(), Optional.empty());
assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())));
assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getNullsCount(), OptionalLong.empty());
assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
use of io.trino.plugin.hive.metastore.BooleanStatistics in project trino by trinodb.
the class TestStatistics method testMergeBooleanColumnStatistics.
@Test
public void testMergeBooleanColumnStatistics() {
assertMergeHiveColumnStatistics(HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build());
assertMergeHiveColumnStatistics(HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(1), OptionalLong.of(2))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build());
assertMergeHiveColumnStatistics(HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(1), OptionalLong.of(2))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(2), OptionalLong.of(3))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(3), OptionalLong.of(5))).build());
}
use of io.trino.plugin.hive.metastore.BooleanStatistics in project trino by trinodb.
the class TestThriftMetastoreUtil method testImpalaGeneratedBooleanStatistics.
@Test
public void testImpalaGeneratedBooleanStatistics() {
BooleanColumnStatsData statsData = new BooleanColumnStatsData(1L, -1L, 2L);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(statsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
assertEquals(actual.getIntegerStatistics(), Optional.empty());
assertEquals(actual.getDoubleStatistics(), Optional.empty());
assertEquals(actual.getDecimalStatistics(), Optional.empty());
assertEquals(actual.getDateStatistics(), Optional.empty());
assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getNullsCount(), OptionalLong.of(2));
assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())));
}
use of io.trino.plugin.hive.metastore.BooleanStatistics in project trino by trinodb.
the class TestThriftMetastoreUtil method testBooleanStatsToColumnStatistics.
@Test
public void testBooleanStatsToColumnStatistics() {
BooleanColumnStatsData booleanColumnStatsData = new BooleanColumnStatsData();
booleanColumnStatsData.setNumTrues(100);
booleanColumnStatsData.setNumFalses(10);
booleanColumnStatsData.setNumNulls(0);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(booleanColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
assertEquals(actual.getIntegerStatistics(), Optional.empty());
assertEquals(actual.getDoubleStatistics(), Optional.empty());
assertEquals(actual.getDecimalStatistics(), Optional.empty());
assertEquals(actual.getDateStatistics(), Optional.empty());
assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.of(100), OptionalLong.of(10))));
assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getNullsCount(), OptionalLong.of(0));
assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
Aggregations