Search in sources :

Example 41 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project presto by prestodb.

the class TestThriftHiveMetastoreUtil method testBinaryStatsToColumnStatistics.

@Test
public void testBinaryStatsToColumnStatistics() {
    BinaryColumnStatsData binaryColumnStatsData = new BinaryColumnStatsData();
    binaryColumnStatsData.setMaxColLen(100);
    binaryColumnStatsData.setAvgColLen(22.2);
    binaryColumnStatsData.setNumNulls(2);
    ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BINARY_TYPE_NAME, binaryStats(binaryColumnStatsData));
    HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(4));
    assertEquals(actual.getIntegerStatistics(), Optional.empty());
    assertEquals(actual.getDoubleStatistics(), Optional.empty());
    assertEquals(actual.getDecimalStatistics(), Optional.empty());
    assertEquals(actual.getDateStatistics(), Optional.empty());
    assertEquals(actual.getBooleanStatistics(), Optional.empty());
    assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.of(100));
    assertEquals(actual.getTotalSizeInBytes(), OptionalLong.of(44));
    assertEquals(actual.getNullsCount(), OptionalLong.of(2));
    assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) HiveColumnStatistics(com.facebook.presto.hive.metastore.HiveColumnStatistics) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Test(org.testng.annotations.Test)

Example 42 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project flink by apache.

the class HiveStatsUtil method createTableColumnStats.

/**
 * Create Flink ColumnStats from Hive ColumnStatisticsData.
 */
private static CatalogColumnStatisticsDataBase createTableColumnStats(DataType colType, ColumnStatisticsData stats, String hiveVersion) {
    HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
    if (stats.isSetBinaryStats()) {
        BinaryColumnStatsData binaryStats = stats.getBinaryStats();
        return new CatalogColumnStatisticsDataBinary(binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null, binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null);
    } else if (stats.isSetBooleanStats()) {
        BooleanColumnStatsData booleanStats = stats.getBooleanStats();
        return new CatalogColumnStatisticsDataBoolean(booleanStats.isSetNumTrues() ? booleanStats.getNumTrues() : null, booleanStats.isSetNumFalses() ? booleanStats.getNumFalses() : null, booleanStats.isSetNumNulls() ? booleanStats.getNumNulls() : null);
    } else if (hiveShim.isDateStats(stats)) {
        return hiveShim.toFlinkDateColStats(stats);
    } else if (stats.isSetDoubleStats()) {
        DoubleColumnStatsData doubleStats = stats.getDoubleStats();
        return new CatalogColumnStatisticsDataDouble(doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null);
    } else if (stats.isSetLongStats()) {
        LongColumnStatsData longColStats = stats.getLongStats();
        return new CatalogColumnStatisticsDataLong(longColStats.isSetLowValue() ? longColStats.getLowValue() : null, longColStats.isSetHighValue() ? longColStats.getHighValue() : null, longColStats.isSetNumDVs() ? longColStats.getNumDVs() : null, longColStats.isSetNumNulls() ? longColStats.getNumNulls() : null);
    } else if (stats.isSetStringStats()) {
        StringColumnStatsData stringStats = stats.getStringStats();
        return new CatalogColumnStatisticsDataString(stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetNumDVs() ? stringStats.getNumNulls() : null);
    } else if (stats.isSetDecimalStats()) {
        DecimalColumnStatsData decimalStats = stats.getDecimalStats();
        // for now, just return CatalogColumnStatisticsDataDouble for decimal columns
        Double max = null;
        if (decimalStats.isSetHighValue()) {
            max = toHiveDecimal(decimalStats.getHighValue()).doubleValue();
        }
        Double min = null;
        if (decimalStats.isSetLowValue()) {
            min = toHiveDecimal(decimalStats.getLowValue()).doubleValue();
        }
        Long ndv = decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null;
        Long nullCount = decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null;
        return new CatalogColumnStatisticsDataDouble(min, max, ndv, nullCount);
    } else {
        LOG.warn("Flink does not support converting ColumnStatisticsData '{}' for Hive column type '{}' yet.", stats, colType);
        return null;
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim)

Example 43 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project alluxio by Alluxio.

the class HiveUtilsTest method verifyColumnStats.

private void verifyColumnStats(ColumnStatisticsObj hiveColStats) {
    ColumnStatisticsInfo colStats = HiveUtils.toProto(hiveColStats);
    assertEquals(hiveColStats.getColName(), colStats.getColName());
    assertEquals(hiveColStats.getColType(), colStats.getColType());
    assertEquals(hiveColStats.isSetStatsData(), colStats.hasData());
    if (hiveColStats.isSetStatsData()) {
        ColumnStatisticsData hiveData = hiveColStats.getStatsData();
        alluxio.grpc.table.ColumnStatisticsData data = colStats.getData();
        // verify binary
        assertEquals(hiveData.isSetBinaryStats(), data.hasBinaryStats());
        if (hiveData.isSetBinaryStats()) {
            BinaryColumnStatsData hiveBinary = hiveData.getBinaryStats();
            alluxio.grpc.table.BinaryColumnStatsData binary = data.getBinaryStats();
            assertEquals(hiveBinary.isSetBitVectors(), binary.hasBitVectors());
            if (hiveBinary.isSetBitVectors()) {
                assertEquals(hiveBinary.getBitVectors(), binary.getBitVectors());
            }
            assertEquals(hiveBinary.getAvgColLen(), binary.getAvgColLen(), 0.01);
            assertEquals(hiveBinary.getMaxColLen(), binary.getMaxColLen());
            assertEquals(hiveBinary.getNumNulls(), binary.getNumNulls());
        }
        // verify boolean
        assertEquals(hiveData.isSetBooleanStats(), data.hasBooleanStats());
        if (hiveData.isSetBooleanStats()) {
            BooleanColumnStatsData hiveBoolean = hiveData.getBooleanStats();
            alluxio.grpc.table.BooleanColumnStatsData bool = data.getBooleanStats();
            assertEquals(hiveBoolean.isSetBitVectors(), bool.hasBitVectors());
            if (hiveBoolean.isSetBitVectors()) {
                assertEquals(hiveBoolean.getBitVectors(), bool.getBitVectors());
            }
            assertEquals(hiveBoolean.getNumFalses(), bool.getNumFalses());
            assertEquals(hiveBoolean.getNumTrues(), bool.getNumTrues());
            assertEquals(hiveBoolean.getNumNulls(), bool.getNumNulls());
        }
        // verify date
        assertEquals(hiveData.isSetDateStats(), data.hasDateStats());
        if (hiveData.isSetDateStats()) {
            DateColumnStatsData hiveDate = hiveData.getDateStats();
            alluxio.grpc.table.DateColumnStatsData date = data.getDateStats();
            assertEquals(hiveDate.isSetBitVectors(), date.hasBitVectors());
            if (hiveDate.isSetBitVectors()) {
                assertEquals(hiveDate.getBitVectors(), date.getBitVectors());
            }
            assertEquals(hiveDate.getNumNulls(), date.getNumNulls());
            assertEquals(hiveDate.getNumDVs(), date.getNumDistincts());
            assertEquals(hiveDate.isSetHighValue(), date.hasHighValue());
            if (hiveDate.isSetHighValue()) {
                assertEquals(hiveDate.getHighValue().getDaysSinceEpoch(), date.getHighValue().getDaysSinceEpoch());
            }
            assertEquals(hiveDate.isSetLowValue(), date.hasLowValue());
            if (hiveDate.isSetLowValue()) {
                assertEquals(hiveDate.getLowValue().getDaysSinceEpoch(), date.getLowValue().getDaysSinceEpoch());
            }
        }
        // verify decimal
        assertEquals(hiveData.isSetDecimalStats(), data.hasDecimalStats());
        if (hiveData.isSetDecimalStats()) {
            DecimalColumnStatsData hiveDecimal = hiveData.getDecimalStats();
            alluxio.grpc.table.DecimalColumnStatsData decimal = data.getDecimalStats();
            assertEquals(hiveDecimal.isSetBitVectors(), decimal.hasBitVectors());
            if (hiveDecimal.isSetBitVectors()) {
                assertEquals(hiveDecimal.getBitVectors(), decimal.getBitVectors());
            }
            assertEquals(hiveDecimal.getNumNulls(), decimal.getNumNulls());
            assertEquals(hiveDecimal.getNumDVs(), decimal.getNumDistincts());
            assertEquals(hiveDecimal.isSetHighValue(), decimal.hasHighValue());
            if (hiveDecimal.isSetHighValue()) {
                assertEquals(hiveDecimal.getHighValue().getScale(), decimal.getHighValue().getScale());
                assertArrayEquals(hiveDecimal.getHighValue().getUnscaled(), decimal.getHighValue().getUnscaled().toByteArray());
            }
            assertEquals(hiveDecimal.isSetLowValue(), decimal.hasLowValue());
            if (hiveDecimal.isSetLowValue()) {
                assertEquals(hiveDecimal.getLowValue().getScale(), decimal.getLowValue().getScale());
                assertArrayEquals(hiveDecimal.getLowValue().getUnscaled(), decimal.getLowValue().getUnscaled().toByteArray());
            }
        }
        // verify double
        assertEquals(hiveData.isSetDoubleStats(), data.hasDoubleStats());
        if (hiveData.isSetDoubleStats()) {
            DoubleColumnStatsData hiveDouble = hiveData.getDoubleStats();
            alluxio.grpc.table.DoubleColumnStatsData dbl = data.getDoubleStats();
            assertEquals(hiveDouble.isSetBitVectors(), dbl.hasBitVectors());
            if (hiveDouble.isSetBitVectors()) {
                assertEquals(hiveDouble.getBitVectors(), dbl.getBitVectors());
            }
            assertEquals(hiveDouble.getNumNulls(), dbl.getNumNulls());
            assertEquals(hiveDouble.getNumDVs(), dbl.getNumDistincts());
            assertEquals(hiveDouble.isSetHighValue(), dbl.hasHighValue());
            if (hiveDouble.isSetHighValue()) {
                assertEquals(hiveDouble.getHighValue(), dbl.getHighValue(), 0.01);
            }
            assertEquals(hiveDouble.isSetLowValue(), dbl.hasLowValue());
            if (hiveDouble.isSetLowValue()) {
                assertEquals(hiveDouble.getLowValue(), dbl.getLowValue(), 0.01);
            }
        }
        // verify long
        assertEquals(hiveData.isSetLongStats(), data.hasLongStats());
        if (hiveData.isSetLongStats()) {
            LongColumnStatsData hiveLong = hiveData.getLongStats();
            alluxio.grpc.table.LongColumnStatsData dbl = data.getLongStats();
            assertEquals(hiveLong.isSetBitVectors(), dbl.hasBitVectors());
            if (hiveLong.isSetBitVectors()) {
                assertEquals(hiveLong.getBitVectors(), dbl.getBitVectors());
            }
            assertEquals(hiveLong.getNumNulls(), dbl.getNumNulls());
            assertEquals(hiveLong.getNumDVs(), dbl.getNumDistincts());
            assertEquals(hiveLong.isSetHighValue(), dbl.hasHighValue());
            if (hiveLong.isSetHighValue()) {
                assertEquals(hiveLong.getHighValue(), dbl.getHighValue());
            }
            assertEquals(hiveLong.isSetLowValue(), dbl.hasLowValue());
            if (hiveLong.isSetLowValue()) {
                assertEquals(hiveLong.getLowValue(), dbl.getLowValue());
            }
        }
        // verify string
        assertEquals(hiveData.isSetStringStats(), data.hasStringStats());
        if (hiveData.isSetStringStats()) {
            StringColumnStatsData hiveString = hiveData.getStringStats();
            alluxio.grpc.table.StringColumnStatsData string = data.getStringStats();
            assertEquals(hiveString.isSetBitVectors(), string.hasBitVectors());
            if (hiveString.isSetBitVectors()) {
                assertEquals(hiveString.getBitVectors(), string.getBitVectors());
            }
            assertEquals(hiveString.getAvgColLen(), string.getAvgColLen(), 0.01);
            assertEquals(hiveString.getMaxColLen(), string.getMaxColLen());
            assertEquals(hiveString.getNumNulls(), string.getNumNulls());
            assertEquals(hiveString.getNumDVs(), string.getNumDistincts());
        }
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) ColumnStatisticsInfo(alluxio.grpc.table.ColumnStatisticsInfo) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Aggregations

BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)43 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)31 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)26 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)22 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)22 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)22 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)22 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)22 DateColumnStatsData (org.apache.hadoop.hive.metastore.api.DateColumnStatsData)15 DateColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector)9 DecimalColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector)9 DoubleColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector)9 LongColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)9 StringColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector)9 TimestampColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector)8 Date (org.apache.hadoop.hive.metastore.api.Date)7 BigDecimal (java.math.BigDecimal)5 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)5 ArrayList (java.util.ArrayList)4 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)4