Search in sources :

Example 1 with BooleanStatistics

use of io.trino.plugin.hive.metastore.BooleanStatistics in project trino by trinodb.

the class Statistics method createHiveColumnStatistics.

@VisibleForTesting
static HiveColumnStatistics createHiveColumnStatistics(Map<ColumnStatisticType, Block> computedStatistics, Type columnType, long rowCount) {
    HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
    // MIN_VALUE, MAX_VALUE
    // We ask the engine to compute either both or neither
    verify(computedStatistics.containsKey(MIN_VALUE) == computedStatistics.containsKey(MAX_VALUE));
    if (computedStatistics.containsKey(MIN_VALUE)) {
        setMinMax(columnType, computedStatistics.get(MIN_VALUE), computedStatistics.get(MAX_VALUE), result);
    }
    // MAX_VALUE_SIZE_IN_BYTES
    if (computedStatistics.containsKey(MAX_VALUE_SIZE_IN_BYTES)) {
        result.setMaxValueSizeInBytes(getIntegerValue(BIGINT, computedStatistics.get(MAX_VALUE_SIZE_IN_BYTES)));
    }
    // TOTAL_VALUES_SIZE_IN_BYTES
    if (computedStatistics.containsKey(TOTAL_SIZE_IN_BYTES)) {
        result.setTotalSizeInBytes(getIntegerValue(BIGINT, computedStatistics.get(TOTAL_SIZE_IN_BYTES)));
    }
    // NUMBER OF NULLS
    if (computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
        result.setNullsCount(rowCount - BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0));
    }
    // NDV
    if (computedStatistics.containsKey(NUMBER_OF_DISTINCT_VALUES) && computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
        // number of distinct value is estimated using HLL, and can be higher than the number of non null values
        long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0);
        long numberOfDistinctValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_DISTINCT_VALUES), 0);
        if (numberOfDistinctValues > numberOfNonNullValues) {
            result.setDistinctValuesCount(numberOfNonNullValues);
        } else {
            result.setDistinctValuesCount(numberOfDistinctValues);
        }
    }
    // NUMBER OF FALSE, NUMBER OF TRUE
    if (computedStatistics.containsKey(NUMBER_OF_TRUE_VALUES) && computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
        long numberOfTrue = BIGINT.getLong(computedStatistics.get(NUMBER_OF_TRUE_VALUES), 0);
        long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0);
        result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(numberOfTrue), OptionalLong.of(numberOfNonNullValues - numberOfTrue)));
    }
    return result.build();
}
Also used : BooleanStatistics(io.trino.plugin.hive.metastore.BooleanStatistics) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with BooleanStatistics

use of io.trino.plugin.hive.metastore.BooleanStatistics in project trino by trinodb.

the class TestThriftMetastoreUtil method testEmptyBooleanStatsToColumnStatistics.

@Test
public void testEmptyBooleanStatsToColumnStatistics() {
    BooleanColumnStatsData emptyBooleanColumnStatsData = new BooleanColumnStatsData();
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(emptyBooleanColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())));
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.empty());
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) BooleanStatistics(io.trino.plugin.hive.metastore.BooleanStatistics) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) Test(org.testng.annotations.Test)

Example 3 with BooleanStatistics

use of io.trino.plugin.hive.metastore.BooleanStatistics in project trino by trinodb.

the class TestStatistics method testMergeBooleanColumnStatistics.

@Test
public void testMergeBooleanColumnStatistics() {
    assertMergeHiveColumnStatistics(HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build());
    assertMergeHiveColumnStatistics(HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(1), OptionalLong.of(2))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build());
    assertMergeHiveColumnStatistics(HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(1), OptionalLong.of(2))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(2), OptionalLong.of(3))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(3), OptionalLong.of(5))).build());
}
Also used : BooleanStatistics(io.trino.plugin.hive.metastore.BooleanStatistics) Test(org.testng.annotations.Test)

Example 4 with BooleanStatistics

use of io.trino.plugin.hive.metastore.BooleanStatistics in project trino by trinodb.

the class TestThriftMetastoreUtil method testImpalaGeneratedBooleanStatistics.

@Test
public void testImpalaGeneratedBooleanStatistics() {
    BooleanColumnStatsData statsData = new BooleanColumnStatsData(1L, -1L, 2L);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(statsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(2));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())));
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) BooleanStatistics(io.trino.plugin.hive.metastore.BooleanStatistics) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) Test(org.testng.annotations.Test)

Example 5 with BooleanStatistics

use of io.trino.plugin.hive.metastore.BooleanStatistics in project trino by trinodb.

the class TestThriftMetastoreUtil method testBooleanStatsToColumnStatistics.

@Test
public void testBooleanStatsToColumnStatistics() {
    BooleanColumnStatsData booleanColumnStatsData = new BooleanColumnStatsData();
    booleanColumnStatsData.setNumTrues(100);
    booleanColumnStatsData.setNumFalses(10);
    booleanColumnStatsData.setNumNulls(0);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(booleanColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.of(100), OptionalLong.of(10))));
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(0));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) BooleanStatistics(io.trino.plugin.hive.metastore.BooleanStatistics) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) Test(org.testng.annotations.Test)

Aggregations

BooleanStatistics (io.trino.plugin.hive.metastore.BooleanStatistics)5 HiveColumnStatistics (io.trino.plugin.hive.metastore.HiveColumnStatistics)4 Test (org.testng.annotations.Test)4 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)3 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1