use of io.prestosql.plugin.hive.metastore.BooleanStatistics in project hetu-core by openlookeng.
the class Statistics method createColumnStatisticsForEmptyPartition.
private static HiveColumnStatistics createColumnStatisticsForEmptyPartition(Type columnType, Set<ColumnStatisticType> columnStatisticTypes) {
requireNonNull(columnType, "columnType is null");
HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
for (ColumnStatisticType columnStatisticType : columnStatisticTypes) {
switch(columnStatisticType) {
case MAX_VALUE_SIZE_IN_BYTES:
result.setMaxValueSizeInBytes(0);
break;
case TOTAL_SIZE_IN_BYTES:
result.setTotalSizeInBytes(0);
break;
case NUMBER_OF_DISTINCT_VALUES:
result.setDistinctValuesCount(0);
break;
case NUMBER_OF_NON_NULL_VALUES:
result.setNullsCount(0);
break;
case NUMBER_OF_TRUE_VALUES:
result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(0L), OptionalLong.of(0L)));
break;
case MIN_VALUE:
case MAX_VALUE:
setMinMaxForEmptyPartition(columnType, result);
break;
default:
throw new PrestoException(HiveErrorCode.HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE, "Unknown column statistics type: " + columnStatisticType.name());
}
}
return result.build();
}
use of io.prestosql.plugin.hive.metastore.BooleanStatistics in project hetu-core by openlookeng.
the class Statistics method createHiveColumnStatistics.
private static HiveColumnStatistics createHiveColumnStatistics(ConnectorSession session, Map<ColumnStatisticType, Block> computedStatistics, Type columnType, long rowCount) {
HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
// We ask the engine to compute either both or neither
verify(computedStatistics.containsKey(MIN_VALUE) == computedStatistics.containsKey(MAX_VALUE));
if (computedStatistics.containsKey(MIN_VALUE)) {
setMinMax(session, columnType, computedStatistics.get(MIN_VALUE), computedStatistics.get(MAX_VALUE), result);
}
// MAX_VALUE_SIZE_IN_BYTES
if (computedStatistics.containsKey(MAX_VALUE_SIZE_IN_BYTES)) {
result.setMaxValueSizeInBytes(getIntegerValue(session, BIGINT, computedStatistics.get(MAX_VALUE_SIZE_IN_BYTES)));
}
// TOTAL_VALUES_SIZE_IN_BYTES
if (computedStatistics.containsKey(TOTAL_SIZE_IN_BYTES)) {
result.setTotalSizeInBytes(getIntegerValue(session, BIGINT, computedStatistics.get(TOTAL_SIZE_IN_BYTES)));
}
// NUMBER OF NULLS
if (computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
result.setNullsCount(rowCount - BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0));
}
// NDV
if (computedStatistics.containsKey(NUMBER_OF_DISTINCT_VALUES) && computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
// number of distinct value is estimated using HLL, and can be higher than the number of non null values
long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0);
long numberOfDistinctValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_DISTINCT_VALUES), 0);
if (numberOfDistinctValues > numberOfNonNullValues) {
result.setDistinctValuesCount(numberOfNonNullValues);
} else {
result.setDistinctValuesCount(numberOfDistinctValues);
}
}
// NUMBER OF FALSE, NUMBER OF TRUE
if (computedStatistics.containsKey(NUMBER_OF_TRUE_VALUES) && computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
long numberOfTrue = BIGINT.getLong(computedStatistics.get(NUMBER_OF_TRUE_VALUES), 0);
long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0);
result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(numberOfTrue), OptionalLong.of(numberOfNonNullValues - numberOfTrue)));
}
return result.build();
}
use of io.prestosql.plugin.hive.metastore.BooleanStatistics in project hetu-core by openlookeng.
the class TestThriftMetastoreUtil method testBooleanStatsToColumnStatistics.
@Test
public void testBooleanStatsToColumnStatistics() {
BooleanColumnStatsData booleanColumnStatsData = new BooleanColumnStatsData();
booleanColumnStatsData.setNumTrues(100);
booleanColumnStatsData.setNumFalses(10);
booleanColumnStatsData.setNumNulls(0);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(booleanColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
assertEquals(actual.getIntegerStatistics(), Optional.empty());
assertEquals(actual.getDoubleStatistics(), Optional.empty());
assertEquals(actual.getDecimalStatistics(), Optional.empty());
assertEquals(actual.getDateStatistics(), Optional.empty());
assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.of(100), OptionalLong.of(10))));
assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getNullsCount(), OptionalLong.of(0));
assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
use of io.prestosql.plugin.hive.metastore.BooleanStatistics in project hetu-core by openlookeng.
the class TestThriftMetastoreUtil method testImpalaGeneratedBooleanStatistics.
@Test
public void testImpalaGeneratedBooleanStatistics() {
BooleanColumnStatsData statsData = new BooleanColumnStatsData(1L, -1L, 2L);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(statsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
assertEquals(actual.getIntegerStatistics(), Optional.empty());
assertEquals(actual.getDoubleStatistics(), Optional.empty());
assertEquals(actual.getDecimalStatistics(), Optional.empty());
assertEquals(actual.getDateStatistics(), Optional.empty());
assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getNullsCount(), OptionalLong.of(2));
assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())));
}
use of io.prestosql.plugin.hive.metastore.BooleanStatistics in project boostkit-bigdata by kunpengcompute.
the class Statistics method createColumnStatisticsForEmptyPartition.
private static HiveColumnStatistics createColumnStatisticsForEmptyPartition(Type columnType, Set<ColumnStatisticType> columnStatisticTypes) {
requireNonNull(columnType, "columnType is null");
HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
for (ColumnStatisticType columnStatisticType : columnStatisticTypes) {
switch(columnStatisticType) {
case MAX_VALUE_SIZE_IN_BYTES:
result.setMaxValueSizeInBytes(0);
break;
case TOTAL_SIZE_IN_BYTES:
result.setTotalSizeInBytes(0);
break;
case NUMBER_OF_DISTINCT_VALUES:
result.setDistinctValuesCount(0);
break;
case NUMBER_OF_NON_NULL_VALUES:
result.setNullsCount(0);
break;
case NUMBER_OF_TRUE_VALUES:
result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(0L), OptionalLong.of(0L)));
break;
case MIN_VALUE:
case MAX_VALUE:
setMinMaxForEmptyPartition(columnType, result);
break;
default:
throw new PrestoException(HiveErrorCode.HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE, "Unknown column statistics type: " + columnStatisticType.name());
}
}
return result.build();
}
Aggregations