Search in sources :

Example 61 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.

the class TestThriftHiveMetastoreUtil method testEmptyBooleanStatsToColumnStatistics.

@Test
public void testEmptyBooleanStatsToColumnStatistics() {
    BooleanColumnStatsData emptyBooleanColumnStatsData = new BooleanColumnStatsData();
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(emptyBooleanColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())));
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.empty());
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) BooleanStatistics(com.facebook.presto.hive.metastore.BooleanStatistics) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) Test(org.testng.annotations.Test)

Example 62 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.

the class TestThriftHiveMetastoreUtil method testDecimalStatsToColumnStatistics.

@Test
public void testDecimalStatsToColumnStatistics() {
    DecimalColumnStatsData decimalColumnStatsData = new DecimalColumnStatsData();
    BigDecimal low = new BigDecimal("0");
    decimalColumnStatsData.setLowValue(toMetastoreDecimal(low));
    BigDecimal high = new BigDecimal("100");
    decimalColumnStatsData.setHighValue(toMetastoreDecimal(high));
    decimalColumnStatsData.setNumNulls(1);
    decimalColumnStatsData.setNumDVs(20);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(decimalColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000));
    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.of(new DecimalStatistics(Optional.of(low), Optional.of(high))));
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(1));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19));
}
Also used : DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalStatistics(com.facebook.presto.hive.metastore.DecimalStatistics) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) BigDecimal(java.math.BigDecimal) Test(org.testng.annotations.Test)

Example 63 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.

the class TestThriftHiveMetastoreUtil method testSingleDistinctValue.

@Test
public void testSingleDistinctValue() {
    DoubleColumnStatsData doubleColumnStatsData = new DoubleColumnStatsData();
    doubleColumnStatsData.setNumNulls(10);
    doubleColumnStatsData.setNumDVs(1);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(10));
    assertEquals(actual.getNullsCount(), OptionalLong.of(10));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(0));
    doubleColumnStatsData = new DoubleColumnStatsData();
    doubleColumnStatsData.setNumNulls(10);
    doubleColumnStatsData.setNumDVs(1);
    columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData));
    actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(11));
    assertEquals(actual.getNullsCount(), OptionalLong.of(10));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(1));
}
Also used : DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) Test(org.testng.annotations.Test)

Example 64 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project presto by prestodb.

the class TestThriftHiveMetastoreUtil method testBooleanStatsToColumnStatistics.

@Test
public void testBooleanStatsToColumnStatistics() {
    BooleanColumnStatsData booleanColumnStatsData = new BooleanColumnStatsData();
    booleanColumnStatsData.setNumTrues(100);
    booleanColumnStatsData.setNumFalses(10);
    booleanColumnStatsData.setNumNulls(0);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(booleanColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.of(100), OptionalLong.of(10))));
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
    assertEquals(actual.getNullsCount(), OptionalLong.of(0));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) BooleanStatistics(com.facebook.presto.hive.metastore.BooleanStatistics) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) Test(org.testng.annotations.Test)

Example 65 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project flink by apache.

the class HiveStatsUtil method createCatalogColumnStats.

/**
 * Create a map of Flink column stats from the given Hive column stats.
 */
public static Map<String, CatalogColumnStatisticsDataBase> createCatalogColumnStats(@Nonnull List<ColumnStatisticsObj> hiveColStats, String hiveVersion) {
    checkNotNull(hiveColStats, "hiveColStats can not be null");
    Map<String, CatalogColumnStatisticsDataBase> colStats = new HashMap<>();
    for (ColumnStatisticsObj colStatsObj : hiveColStats) {
        CatalogColumnStatisticsDataBase columnStats = createTableColumnStats(HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(colStatsObj.getColType())), colStatsObj.getStatsData(), hiveVersion);
        colStats.put(colStatsObj.getColName(), columnStats);
    }
    return colStats;
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) CatalogColumnStatisticsDataBase(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBase) HashMap(java.util.HashMap) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString)

Aggregations

ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)219 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)104 ArrayList (java.util.ArrayList)98 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)82 Test (org.junit.Test)79 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)68 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)43 Table (org.apache.hadoop.hive.metastore.api.Table)43 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)35 Partition (org.apache.hadoop.hive.metastore.api.Partition)35 List (java.util.List)34 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)30 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)29 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)29 HashMap (java.util.HashMap)28 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)28 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)27 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)25 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)23 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)22