Search in sources :

Example 1 with BooleanStatistics

use of io.prestosql.plugin.hive.metastore.BooleanStatistics in project hetu-core by openlookeng.

the class Statistics method createColumnStatisticsForEmptyPartition.

private static HiveColumnStatistics createColumnStatisticsForEmptyPartition(Type columnType, Set<ColumnStatisticType> columnStatisticTypes) {
    requireNonNull(columnType, "columnType is null");
    HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
    for (ColumnStatisticType columnStatisticType : columnStatisticTypes) {
        switch(columnStatisticType) {
            case MAX_VALUE_SIZE_IN_BYTES:
                result.setMaxValueSizeInBytes(0);
                break;
            case TOTAL_SIZE_IN_BYTES:
                result.setTotalSizeInBytes(0);
                break;
            case NUMBER_OF_DISTINCT_VALUES:
                result.setDistinctValuesCount(0);
                break;
            case NUMBER_OF_NON_NULL_VALUES:
                result.setNullsCount(0);
                break;
            case NUMBER_OF_TRUE_VALUES:
                result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(0L), OptionalLong.of(0L)));
                break;
            case MIN_VALUE:
            case MAX_VALUE:
                setMinMaxForEmptyPartition(columnType, result);
                break;
            default:
                throw new PrestoException(HiveErrorCode.HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE, "Unknown column statistics type: " + columnStatisticType.name());
        }
    }
    return result.build();
}
Also used : ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) BooleanStatistics(io.prestosql.plugin.hive.metastore.BooleanStatistics) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) PrestoException(io.prestosql.spi.PrestoException)

Example 2 with BooleanStatistics

use of io.prestosql.plugin.hive.metastore.BooleanStatistics in project hetu-core by openlookeng.

the class Statistics method createHiveColumnStatistics.

private static HiveColumnStatistics createHiveColumnStatistics(ConnectorSession session, Map<ColumnStatisticType, Block> computedStatistics, Type columnType, long rowCount) {
    HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
    // We ask the engine to compute either both or neither
    verify(computedStatistics.containsKey(MIN_VALUE) == computedStatistics.containsKey(MAX_VALUE));
    if (computedStatistics.containsKey(MIN_VALUE)) {
        setMinMax(session, columnType, computedStatistics.get(MIN_VALUE), computedStatistics.get(MAX_VALUE), result);
    }
    // MAX_VALUE_SIZE_IN_BYTES
    if (computedStatistics.containsKey(MAX_VALUE_SIZE_IN_BYTES)) {
        result.setMaxValueSizeInBytes(getIntegerValue(session, BIGINT, computedStatistics.get(MAX_VALUE_SIZE_IN_BYTES)));
    }
    // TOTAL_VALUES_SIZE_IN_BYTES
    if (computedStatistics.containsKey(TOTAL_SIZE_IN_BYTES)) {
        result.setTotalSizeInBytes(getIntegerValue(session, BIGINT, computedStatistics.get(TOTAL_SIZE_IN_BYTES)));
    }
    // NUMBER OF NULLS
    if (computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
        result.setNullsCount(rowCount - BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0));
    }
    // NDV
    if (computedStatistics.containsKey(NUMBER_OF_DISTINCT_VALUES) && computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
        // number of distinct value is estimated using HLL, and can be higher than the number of non null values
        long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0);
        long numberOfDistinctValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_DISTINCT_VALUES), 0);
        if (numberOfDistinctValues > numberOfNonNullValues) {
            result.setDistinctValuesCount(numberOfNonNullValues);
        } else {
            result.setDistinctValuesCount(numberOfDistinctValues);
        }
    }
    // NUMBER OF FALSE, NUMBER OF TRUE
    if (computedStatistics.containsKey(NUMBER_OF_TRUE_VALUES) && computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) {
        long numberOfTrue = BIGINT.getLong(computedStatistics.get(NUMBER_OF_TRUE_VALUES), 0);
        long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0);
        result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(numberOfTrue), OptionalLong.of(numberOfNonNullValues - numberOfTrue)));
    }
    return result.build();
}
Also used : BooleanStatistics(io.prestosql.plugin.hive.metastore.BooleanStatistics) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics)

Example 3 with BooleanStatistics

use of io.prestosql.plugin.hive.metastore.BooleanStatistics in project hetu-core by openlookeng.

the class TestThriftMetastoreUtil method testBooleanStatsToColumnStatistics.

@Test
public void testBooleanStatsToColumnStatistics() {
    BooleanColumnStatsData booleanColumnStatsData = new BooleanColumnStatsData();
    booleanColumnStatsData.setNumTrues(100);
    booleanColumnStatsData.setNumFalses(10);
    booleanColumnStatsData.setNumNulls(0);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(booleanColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.of(100), OptionalLong.of(10))));
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(0));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) BooleanStatistics(io.prestosql.plugin.hive.metastore.BooleanStatistics) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) Test(org.testng.annotations.Test)

Example 4 with BooleanStatistics

use of io.prestosql.plugin.hive.metastore.BooleanStatistics in project hetu-core by openlookeng.

the class TestThriftMetastoreUtil method testImpalaGeneratedBooleanStatistics.

@Test
public void testImpalaGeneratedBooleanStatistics() {
    BooleanColumnStatsData statsData = new BooleanColumnStatsData(1L, -1L, 2L);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(statsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(2));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())));
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) BooleanStatistics(io.prestosql.plugin.hive.metastore.BooleanStatistics) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) Test(org.testng.annotations.Test)

Example 5 with BooleanStatistics

use of io.prestosql.plugin.hive.metastore.BooleanStatistics in project boostkit-bigdata by kunpengcompute.

the class Statistics method createColumnStatisticsForEmptyPartition.

private static HiveColumnStatistics createColumnStatisticsForEmptyPartition(Type columnType, Set<ColumnStatisticType> columnStatisticTypes) {
    requireNonNull(columnType, "columnType is null");
    HiveColumnStatistics.Builder result = HiveColumnStatistics.builder();
    for (ColumnStatisticType columnStatisticType : columnStatisticTypes) {
        switch(columnStatisticType) {
            case MAX_VALUE_SIZE_IN_BYTES:
                result.setMaxValueSizeInBytes(0);
                break;
            case TOTAL_SIZE_IN_BYTES:
                result.setTotalSizeInBytes(0);
                break;
            case NUMBER_OF_DISTINCT_VALUES:
                result.setDistinctValuesCount(0);
                break;
            case NUMBER_OF_NON_NULL_VALUES:
                result.setNullsCount(0);
                break;
            case NUMBER_OF_TRUE_VALUES:
                result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(0L), OptionalLong.of(0L)));
                break;
            case MIN_VALUE:
            case MAX_VALUE:
                setMinMaxForEmptyPartition(columnType, result);
                break;
            default:
                throw new PrestoException(HiveErrorCode.HIVE_UNKNOWN_COLUMN_STATISTIC_TYPE, "Unknown column statistics type: " + columnStatisticType.name());
        }
    }
    return result.build();
}
Also used : ColumnStatisticType(io.prestosql.spi.statistics.ColumnStatisticType) BooleanStatistics(io.prestosql.plugin.hive.metastore.BooleanStatistics) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) PrestoException(io.prestosql.spi.PrestoException)

Aggregations

BooleanStatistics (io.prestosql.plugin.hive.metastore.BooleanStatistics)12 HiveColumnStatistics (io.prestosql.plugin.hive.metastore.HiveColumnStatistics)10 Test (org.testng.annotations.Test)8 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)6 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)6 PrestoException (io.prestosql.spi.PrestoException)2 ColumnStatisticType (io.prestosql.spi.statistics.ColumnStatisticType)2