use of org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData in project presto by prestodb.
the class ThriftMetastoreUtil method fromMetastoreApiColumnStatistics.
public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatisticsObj columnStatistics, OptionalLong rowCount) {
if (columnStatistics.getStatsData().isSetLongStats()) {
LongColumnStatsData longStatsData = columnStatistics.getStatsData().getLongStats();
OptionalLong min = longStatsData.isSetLowValue() ? OptionalLong.of(longStatsData.getLowValue()) : OptionalLong.empty();
OptionalLong max = longStatsData.isSetHighValue() ? OptionalLong.of(longStatsData.getHighValue()) : OptionalLong.empty();
OptionalLong nullsCount = longStatsData.isSetNumNulls() ? fromMetastoreNullsCount(longStatsData.getNumNulls()) : OptionalLong.empty();
OptionalLong distinctValuesCount = longStatsData.isSetNumDVs() ? OptionalLong.of(longStatsData.getNumDVs()) : OptionalLong.empty();
return createIntegerColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount));
}
if (columnStatistics.getStatsData().isSetDoubleStats()) {
DoubleColumnStatsData doubleStatsData = columnStatistics.getStatsData().getDoubleStats();
OptionalDouble min = doubleStatsData.isSetLowValue() ? OptionalDouble.of(doubleStatsData.getLowValue()) : OptionalDouble.empty();
OptionalDouble max = doubleStatsData.isSetHighValue() ? OptionalDouble.of(doubleStatsData.getHighValue()) : OptionalDouble.empty();
OptionalLong nullsCount = doubleStatsData.isSetNumNulls() ? fromMetastoreNullsCount(doubleStatsData.getNumNulls()) : OptionalLong.empty();
OptionalLong distinctValuesCount = doubleStatsData.isSetNumDVs() ? OptionalLong.of(doubleStatsData.getNumDVs()) : OptionalLong.empty();
return createDoubleColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount));
}
if (columnStatistics.getStatsData().isSetDecimalStats()) {
DecimalColumnStatsData decimalStatsData = columnStatistics.getStatsData().getDecimalStats();
Optional<BigDecimal> min = decimalStatsData.isSetLowValue() ? fromMetastoreDecimal(decimalStatsData.getLowValue()) : Optional.empty();
Optional<BigDecimal> max = decimalStatsData.isSetHighValue() ? fromMetastoreDecimal(decimalStatsData.getHighValue()) : Optional.empty();
OptionalLong nullsCount = decimalStatsData.isSetNumNulls() ? fromMetastoreNullsCount(decimalStatsData.getNumNulls()) : OptionalLong.empty();
OptionalLong distinctValuesCount = decimalStatsData.isSetNumDVs() ? OptionalLong.of(decimalStatsData.getNumDVs()) : OptionalLong.empty();
return createDecimalColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount));
}
if (columnStatistics.getStatsData().isSetDateStats()) {
DateColumnStatsData dateStatsData = columnStatistics.getStatsData().getDateStats();
Optional<LocalDate> min = dateStatsData.isSetLowValue() ? fromMetastoreDate(dateStatsData.getLowValue()) : Optional.empty();
Optional<LocalDate> max = dateStatsData.isSetHighValue() ? fromMetastoreDate(dateStatsData.getHighValue()) : Optional.empty();
OptionalLong nullsCount = dateStatsData.isSetNumNulls() ? fromMetastoreNullsCount(dateStatsData.getNumNulls()) : OptionalLong.empty();
OptionalLong distinctValuesCount = dateStatsData.isSetNumDVs() ? OptionalLong.of(dateStatsData.getNumDVs()) : OptionalLong.empty();
return createDateColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount));
}
if (columnStatistics.getStatsData().isSetBooleanStats()) {
BooleanColumnStatsData booleanStatsData = columnStatistics.getStatsData().getBooleanStats();
return createBooleanColumnStatistics(booleanStatsData.isSetNumTrues() ? OptionalLong.of(booleanStatsData.getNumTrues()) : OptionalLong.empty(), booleanStatsData.isSetNumFalses() ? OptionalLong.of(booleanStatsData.getNumFalses()) : OptionalLong.empty(), booleanStatsData.isSetNumNulls() ? fromMetastoreNullsCount(booleanStatsData.getNumNulls()) : OptionalLong.empty());
}
if (columnStatistics.getStatsData().isSetStringStats()) {
StringColumnStatsData stringStatsData = columnStatistics.getStatsData().getStringStats();
OptionalLong maxColumnLength = stringStatsData.isSetMaxColLen() ? OptionalLong.of(stringStatsData.getMaxColLen()) : OptionalLong.empty();
OptionalDouble averageColumnLength = stringStatsData.isSetAvgColLen() ? OptionalDouble.of(stringStatsData.getAvgColLen()) : OptionalDouble.empty();
OptionalLong nullsCount = stringStatsData.isSetNumNulls() ? fromMetastoreNullsCount(stringStatsData.getNumNulls()) : OptionalLong.empty();
OptionalLong distinctValuesCount = stringStatsData.isSetNumDVs() ? OptionalLong.of(stringStatsData.getNumDVs()) : OptionalLong.empty();
return createStringColumnStatistics(maxColumnLength, getTotalSizeInBytes(averageColumnLength, rowCount, nullsCount), nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount));
}
if (columnStatistics.getStatsData().isSetBinaryStats()) {
BinaryColumnStatsData binaryStatsData = columnStatistics.getStatsData().getBinaryStats();
OptionalLong maxColumnLength = binaryStatsData.isSetMaxColLen() ? OptionalLong.of(binaryStatsData.getMaxColLen()) : OptionalLong.empty();
OptionalDouble averageColumnLength = binaryStatsData.isSetAvgColLen() ? OptionalDouble.of(binaryStatsData.getAvgColLen()) : OptionalDouble.empty();
OptionalLong nullsCount = binaryStatsData.isSetNumNulls() ? fromMetastoreNullsCount(binaryStatsData.getNumNulls()) : OptionalLong.empty();
return createBinaryColumnStatistics(maxColumnLength, getTotalSizeInBytes(averageColumnLength, rowCount, nullsCount), nullsCount);
} else {
throw new PrestoException(HIVE_INVALID_METADATA, "Invalid column statistics data: " + columnStatistics);
}
}
use of org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData in project presto by prestodb.
the class ThriftMetastoreUtil method createDoubleStatistics.
private static ColumnStatisticsObj createDoubleStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) {
DoubleColumnStatsData data = new DoubleColumnStatsData();
statistics.getDoubleStatistics().ifPresent(doubleStatistics -> {
doubleStatistics.getMin().ifPresent(data::setLowValue);
doubleStatistics.getMax().ifPresent(data::setHighValue);
});
statistics.getNullsCount().ifPresent(data::setNumNulls);
toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs);
return new ColumnStatisticsObj(columnName, columnType.toString(), doubleStats(data));
}
use of org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData in project presto by prestodb.
the class TestThriftHiveMetastoreUtil method testDoubleStatsToColumnStatistics.
@Test
public void testDoubleStatsToColumnStatistics() {
DoubleColumnStatsData doubleColumnStatsData = new DoubleColumnStatsData();
doubleColumnStatsData.setLowValue(0);
doubleColumnStatsData.setHighValue(100);
doubleColumnStatsData.setNumNulls(1);
doubleColumnStatsData.setNumDVs(20);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000));
assertEquals(actual.getIntegerStatistics(), Optional.empty());
assertEquals(actual.getDoubleStatistics(), Optional.of(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(100))));
assertEquals(actual.getDecimalStatistics(), Optional.empty());
assertEquals(actual.getDateStatistics(), Optional.empty());
assertEquals(actual.getBooleanStatistics(), Optional.empty());
assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getNullsCount(), OptionalLong.of(1));
assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19));
}
use of org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData in project flink by apache.
the class HiveStatsUtil method createTableColumnStats.
/**
* Create Flink ColumnStats from Hive ColumnStatisticsData.
*/
private static CatalogColumnStatisticsDataBase createTableColumnStats(DataType colType, ColumnStatisticsData stats, String hiveVersion) {
HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
if (stats.isSetBinaryStats()) {
BinaryColumnStatsData binaryStats = stats.getBinaryStats();
return new CatalogColumnStatisticsDataBinary(binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null, binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null);
} else if (stats.isSetBooleanStats()) {
BooleanColumnStatsData booleanStats = stats.getBooleanStats();
return new CatalogColumnStatisticsDataBoolean(booleanStats.isSetNumTrues() ? booleanStats.getNumTrues() : null, booleanStats.isSetNumFalses() ? booleanStats.getNumFalses() : null, booleanStats.isSetNumNulls() ? booleanStats.getNumNulls() : null);
} else if (hiveShim.isDateStats(stats)) {
return hiveShim.toFlinkDateColStats(stats);
} else if (stats.isSetDoubleStats()) {
DoubleColumnStatsData doubleStats = stats.getDoubleStats();
return new CatalogColumnStatisticsDataDouble(doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null);
} else if (stats.isSetLongStats()) {
LongColumnStatsData longColStats = stats.getLongStats();
return new CatalogColumnStatisticsDataLong(longColStats.isSetLowValue() ? longColStats.getLowValue() : null, longColStats.isSetHighValue() ? longColStats.getHighValue() : null, longColStats.isSetNumDVs() ? longColStats.getNumDVs() : null, longColStats.isSetNumNulls() ? longColStats.getNumNulls() : null);
} else if (stats.isSetStringStats()) {
StringColumnStatsData stringStats = stats.getStringStats();
return new CatalogColumnStatisticsDataString(stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetNumDVs() ? stringStats.getNumNulls() : null);
} else if (stats.isSetDecimalStats()) {
DecimalColumnStatsData decimalStats = stats.getDecimalStats();
// for now, just return CatalogColumnStatisticsDataDouble for decimal columns
Double max = null;
if (decimalStats.isSetHighValue()) {
max = toHiveDecimal(decimalStats.getHighValue()).doubleValue();
}
Double min = null;
if (decimalStats.isSetLowValue()) {
min = toHiveDecimal(decimalStats.getLowValue()).doubleValue();
}
Long ndv = decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null;
Long nullCount = decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null;
return new CatalogColumnStatisticsDataDouble(min, max, ndv, nullCount);
} else {
LOG.warn("Flink does not support converting ColumnStatisticsData '{}' for Hive column type '{}' yet.", stats, colType);
return null;
}
}
use of org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData in project alluxio by Alluxio.
the class HiveUtilsTest method verifyColumnStats.
private void verifyColumnStats(ColumnStatisticsObj hiveColStats) {
ColumnStatisticsInfo colStats = HiveUtils.toProto(hiveColStats);
assertEquals(hiveColStats.getColName(), colStats.getColName());
assertEquals(hiveColStats.getColType(), colStats.getColType());
assertEquals(hiveColStats.isSetStatsData(), colStats.hasData());
if (hiveColStats.isSetStatsData()) {
ColumnStatisticsData hiveData = hiveColStats.getStatsData();
alluxio.grpc.table.ColumnStatisticsData data = colStats.getData();
// verify binary
assertEquals(hiveData.isSetBinaryStats(), data.hasBinaryStats());
if (hiveData.isSetBinaryStats()) {
BinaryColumnStatsData hiveBinary = hiveData.getBinaryStats();
alluxio.grpc.table.BinaryColumnStatsData binary = data.getBinaryStats();
assertEquals(hiveBinary.isSetBitVectors(), binary.hasBitVectors());
if (hiveBinary.isSetBitVectors()) {
assertEquals(hiveBinary.getBitVectors(), binary.getBitVectors());
}
assertEquals(hiveBinary.getAvgColLen(), binary.getAvgColLen(), 0.01);
assertEquals(hiveBinary.getMaxColLen(), binary.getMaxColLen());
assertEquals(hiveBinary.getNumNulls(), binary.getNumNulls());
}
// verify boolean
assertEquals(hiveData.isSetBooleanStats(), data.hasBooleanStats());
if (hiveData.isSetBooleanStats()) {
BooleanColumnStatsData hiveBoolean = hiveData.getBooleanStats();
alluxio.grpc.table.BooleanColumnStatsData bool = data.getBooleanStats();
assertEquals(hiveBoolean.isSetBitVectors(), bool.hasBitVectors());
if (hiveBoolean.isSetBitVectors()) {
assertEquals(hiveBoolean.getBitVectors(), bool.getBitVectors());
}
assertEquals(hiveBoolean.getNumFalses(), bool.getNumFalses());
assertEquals(hiveBoolean.getNumTrues(), bool.getNumTrues());
assertEquals(hiveBoolean.getNumNulls(), bool.getNumNulls());
}
// verify date
assertEquals(hiveData.isSetDateStats(), data.hasDateStats());
if (hiveData.isSetDateStats()) {
DateColumnStatsData hiveDate = hiveData.getDateStats();
alluxio.grpc.table.DateColumnStatsData date = data.getDateStats();
assertEquals(hiveDate.isSetBitVectors(), date.hasBitVectors());
if (hiveDate.isSetBitVectors()) {
assertEquals(hiveDate.getBitVectors(), date.getBitVectors());
}
assertEquals(hiveDate.getNumNulls(), date.getNumNulls());
assertEquals(hiveDate.getNumDVs(), date.getNumDistincts());
assertEquals(hiveDate.isSetHighValue(), date.hasHighValue());
if (hiveDate.isSetHighValue()) {
assertEquals(hiveDate.getHighValue().getDaysSinceEpoch(), date.getHighValue().getDaysSinceEpoch());
}
assertEquals(hiveDate.isSetLowValue(), date.hasLowValue());
if (hiveDate.isSetLowValue()) {
assertEquals(hiveDate.getLowValue().getDaysSinceEpoch(), date.getLowValue().getDaysSinceEpoch());
}
}
// verify decimal
assertEquals(hiveData.isSetDecimalStats(), data.hasDecimalStats());
if (hiveData.isSetDecimalStats()) {
DecimalColumnStatsData hiveDecimal = hiveData.getDecimalStats();
alluxio.grpc.table.DecimalColumnStatsData decimal = data.getDecimalStats();
assertEquals(hiveDecimal.isSetBitVectors(), decimal.hasBitVectors());
if (hiveDecimal.isSetBitVectors()) {
assertEquals(hiveDecimal.getBitVectors(), decimal.getBitVectors());
}
assertEquals(hiveDecimal.getNumNulls(), decimal.getNumNulls());
assertEquals(hiveDecimal.getNumDVs(), decimal.getNumDistincts());
assertEquals(hiveDecimal.isSetHighValue(), decimal.hasHighValue());
if (hiveDecimal.isSetHighValue()) {
assertEquals(hiveDecimal.getHighValue().getScale(), decimal.getHighValue().getScale());
assertArrayEquals(hiveDecimal.getHighValue().getUnscaled(), decimal.getHighValue().getUnscaled().toByteArray());
}
assertEquals(hiveDecimal.isSetLowValue(), decimal.hasLowValue());
if (hiveDecimal.isSetLowValue()) {
assertEquals(hiveDecimal.getLowValue().getScale(), decimal.getLowValue().getScale());
assertArrayEquals(hiveDecimal.getLowValue().getUnscaled(), decimal.getLowValue().getUnscaled().toByteArray());
}
}
// verify double
assertEquals(hiveData.isSetDoubleStats(), data.hasDoubleStats());
if (hiveData.isSetDoubleStats()) {
DoubleColumnStatsData hiveDouble = hiveData.getDoubleStats();
alluxio.grpc.table.DoubleColumnStatsData dbl = data.getDoubleStats();
assertEquals(hiveDouble.isSetBitVectors(), dbl.hasBitVectors());
if (hiveDouble.isSetBitVectors()) {
assertEquals(hiveDouble.getBitVectors(), dbl.getBitVectors());
}
assertEquals(hiveDouble.getNumNulls(), dbl.getNumNulls());
assertEquals(hiveDouble.getNumDVs(), dbl.getNumDistincts());
assertEquals(hiveDouble.isSetHighValue(), dbl.hasHighValue());
if (hiveDouble.isSetHighValue()) {
assertEquals(hiveDouble.getHighValue(), dbl.getHighValue(), 0.01);
}
assertEquals(hiveDouble.isSetLowValue(), dbl.hasLowValue());
if (hiveDouble.isSetLowValue()) {
assertEquals(hiveDouble.getLowValue(), dbl.getLowValue(), 0.01);
}
}
// verify long
assertEquals(hiveData.isSetLongStats(), data.hasLongStats());
if (hiveData.isSetLongStats()) {
LongColumnStatsData hiveLong = hiveData.getLongStats();
alluxio.grpc.table.LongColumnStatsData dbl = data.getLongStats();
assertEquals(hiveLong.isSetBitVectors(), dbl.hasBitVectors());
if (hiveLong.isSetBitVectors()) {
assertEquals(hiveLong.getBitVectors(), dbl.getBitVectors());
}
assertEquals(hiveLong.getNumNulls(), dbl.getNumNulls());
assertEquals(hiveLong.getNumDVs(), dbl.getNumDistincts());
assertEquals(hiveLong.isSetHighValue(), dbl.hasHighValue());
if (hiveLong.isSetHighValue()) {
assertEquals(hiveLong.getHighValue(), dbl.getHighValue());
}
assertEquals(hiveLong.isSetLowValue(), dbl.hasLowValue());
if (hiveLong.isSetLowValue()) {
assertEquals(hiveLong.getLowValue(), dbl.getLowValue());
}
}
// verify string
assertEquals(hiveData.isSetStringStats(), data.hasStringStats());
if (hiveData.isSetStringStats()) {
StringColumnStatsData hiveString = hiveData.getStringStats();
alluxio.grpc.table.StringColumnStatsData string = data.getStringStats();
assertEquals(hiveString.isSetBitVectors(), string.hasBitVectors());
if (hiveString.isSetBitVectors()) {
assertEquals(hiveString.getBitVectors(), string.getBitVectors());
}
assertEquals(hiveString.getAvgColLen(), string.getAvgColLen(), 0.01);
assertEquals(hiveString.getMaxColLen(), string.getMaxColLen());
assertEquals(hiveString.getNumNulls(), string.getNumNulls());
assertEquals(hiveString.getNumDVs(), string.getNumDistincts());
}
}
}
Aggregations