use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project presto by prestodb.
the class ThriftMetastoreUtil method createLongStatistics.
private static ColumnStatisticsObj createLongStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) {
LongColumnStatsData data = new LongColumnStatsData();
statistics.getIntegerStatistics().ifPresent(integerStatistics -> {
integerStatistics.getMin().ifPresent(data::setLowValue);
integerStatistics.getMax().ifPresent(data::setHighValue);
});
statistics.getNullsCount().ifPresent(data::setNumNulls);
toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs);
return new ColumnStatisticsObj(columnName, columnType.toString(), longStats(data));
}
use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project flink by apache.
the class HiveStatsUtil method createTableColumnStats.
/**
* Create Flink ColumnStats from Hive ColumnStatisticsData.
*/
private static CatalogColumnStatisticsDataBase createTableColumnStats(DataType colType, ColumnStatisticsData stats, String hiveVersion) {
HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
if (stats.isSetBinaryStats()) {
BinaryColumnStatsData binaryStats = stats.getBinaryStats();
return new CatalogColumnStatisticsDataBinary(binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null, binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null);
} else if (stats.isSetBooleanStats()) {
BooleanColumnStatsData booleanStats = stats.getBooleanStats();
return new CatalogColumnStatisticsDataBoolean(booleanStats.isSetNumTrues() ? booleanStats.getNumTrues() : null, booleanStats.isSetNumFalses() ? booleanStats.getNumFalses() : null, booleanStats.isSetNumNulls() ? booleanStats.getNumNulls() : null);
} else if (hiveShim.isDateStats(stats)) {
return hiveShim.toFlinkDateColStats(stats);
} else if (stats.isSetDoubleStats()) {
DoubleColumnStatsData doubleStats = stats.getDoubleStats();
return new CatalogColumnStatisticsDataDouble(doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null);
} else if (stats.isSetLongStats()) {
LongColumnStatsData longColStats = stats.getLongStats();
return new CatalogColumnStatisticsDataLong(longColStats.isSetLowValue() ? longColStats.getLowValue() : null, longColStats.isSetHighValue() ? longColStats.getHighValue() : null, longColStats.isSetNumDVs() ? longColStats.getNumDVs() : null, longColStats.isSetNumNulls() ? longColStats.getNumNulls() : null);
} else if (stats.isSetStringStats()) {
StringColumnStatsData stringStats = stats.getStringStats();
return new CatalogColumnStatisticsDataString(stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetNumDVs() ? stringStats.getNumNulls() : null);
} else if (stats.isSetDecimalStats()) {
DecimalColumnStatsData decimalStats = stats.getDecimalStats();
// for now, just return CatalogColumnStatisticsDataDouble for decimal columns
Double max = null;
if (decimalStats.isSetHighValue()) {
max = toHiveDecimal(decimalStats.getHighValue()).doubleValue();
}
Double min = null;
if (decimalStats.isSetLowValue()) {
min = toHiveDecimal(decimalStats.getLowValue()).doubleValue();
}
Long ndv = decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null;
Long nullCount = decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null;
return new CatalogColumnStatisticsDataDouble(min, max, ndv, nullCount);
} else {
LOG.warn("Flink does not support converting ColumnStatisticsData '{}' for Hive column type '{}' yet.", stats, colType);
return null;
}
}
use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project alluxio by Alluxio.
the class HiveUtilsTest method protoColStatsWithLongData.
@Test
public void protoColStatsWithLongData() {
ColumnStatisticsObj hiveColStats = new ColumnStatisticsObj();
hiveColStats.setColName("colName");
hiveColStats.setColType("colType");
ColumnStatisticsData data = new ColumnStatisticsData();
// verify empty data
LongColumnStatsData longData = new LongColumnStatsData();
data.setLongStats(longData);
hiveColStats.setStatsData(data);
verifyColumnStats(hiveColStats);
// verify non-empty data
longData.setBitVectors(CommonUtils.randomAlphaNumString(5));
longData.setNumNulls(mRandom.nextLong());
longData.setHighValue(mRandom.nextLong());
longData.setLowValue(mRandom.nextLong());
longData.setNumDVs(mRandom.nextLong());
data.setLongStats(longData);
hiveColStats.setStatsData(data);
verifyColumnStats(hiveColStats);
}
use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project alluxio by Alluxio.
the class HiveUtilsTest method verifyColumnStats.
private void verifyColumnStats(ColumnStatisticsObj hiveColStats) {
ColumnStatisticsInfo colStats = HiveUtils.toProto(hiveColStats);
assertEquals(hiveColStats.getColName(), colStats.getColName());
assertEquals(hiveColStats.getColType(), colStats.getColType());
assertEquals(hiveColStats.isSetStatsData(), colStats.hasData());
if (hiveColStats.isSetStatsData()) {
ColumnStatisticsData hiveData = hiveColStats.getStatsData();
alluxio.grpc.table.ColumnStatisticsData data = colStats.getData();
// verify binary
assertEquals(hiveData.isSetBinaryStats(), data.hasBinaryStats());
if (hiveData.isSetBinaryStats()) {
BinaryColumnStatsData hiveBinary = hiveData.getBinaryStats();
alluxio.grpc.table.BinaryColumnStatsData binary = data.getBinaryStats();
assertEquals(hiveBinary.isSetBitVectors(), binary.hasBitVectors());
if (hiveBinary.isSetBitVectors()) {
assertEquals(hiveBinary.getBitVectors(), binary.getBitVectors());
}
assertEquals(hiveBinary.getAvgColLen(), binary.getAvgColLen(), 0.01);
assertEquals(hiveBinary.getMaxColLen(), binary.getMaxColLen());
assertEquals(hiveBinary.getNumNulls(), binary.getNumNulls());
}
// verify boolean
assertEquals(hiveData.isSetBooleanStats(), data.hasBooleanStats());
if (hiveData.isSetBooleanStats()) {
BooleanColumnStatsData hiveBoolean = hiveData.getBooleanStats();
alluxio.grpc.table.BooleanColumnStatsData bool = data.getBooleanStats();
assertEquals(hiveBoolean.isSetBitVectors(), bool.hasBitVectors());
if (hiveBoolean.isSetBitVectors()) {
assertEquals(hiveBoolean.getBitVectors(), bool.getBitVectors());
}
assertEquals(hiveBoolean.getNumFalses(), bool.getNumFalses());
assertEquals(hiveBoolean.getNumTrues(), bool.getNumTrues());
assertEquals(hiveBoolean.getNumNulls(), bool.getNumNulls());
}
// verify date
assertEquals(hiveData.isSetDateStats(), data.hasDateStats());
if (hiveData.isSetDateStats()) {
DateColumnStatsData hiveDate = hiveData.getDateStats();
alluxio.grpc.table.DateColumnStatsData date = data.getDateStats();
assertEquals(hiveDate.isSetBitVectors(), date.hasBitVectors());
if (hiveDate.isSetBitVectors()) {
assertEquals(hiveDate.getBitVectors(), date.getBitVectors());
}
assertEquals(hiveDate.getNumNulls(), date.getNumNulls());
assertEquals(hiveDate.getNumDVs(), date.getNumDistincts());
assertEquals(hiveDate.isSetHighValue(), date.hasHighValue());
if (hiveDate.isSetHighValue()) {
assertEquals(hiveDate.getHighValue().getDaysSinceEpoch(), date.getHighValue().getDaysSinceEpoch());
}
assertEquals(hiveDate.isSetLowValue(), date.hasLowValue());
if (hiveDate.isSetLowValue()) {
assertEquals(hiveDate.getLowValue().getDaysSinceEpoch(), date.getLowValue().getDaysSinceEpoch());
}
}
// verify decimal
assertEquals(hiveData.isSetDecimalStats(), data.hasDecimalStats());
if (hiveData.isSetDecimalStats()) {
DecimalColumnStatsData hiveDecimal = hiveData.getDecimalStats();
alluxio.grpc.table.DecimalColumnStatsData decimal = data.getDecimalStats();
assertEquals(hiveDecimal.isSetBitVectors(), decimal.hasBitVectors());
if (hiveDecimal.isSetBitVectors()) {
assertEquals(hiveDecimal.getBitVectors(), decimal.getBitVectors());
}
assertEquals(hiveDecimal.getNumNulls(), decimal.getNumNulls());
assertEquals(hiveDecimal.getNumDVs(), decimal.getNumDistincts());
assertEquals(hiveDecimal.isSetHighValue(), decimal.hasHighValue());
if (hiveDecimal.isSetHighValue()) {
assertEquals(hiveDecimal.getHighValue().getScale(), decimal.getHighValue().getScale());
assertArrayEquals(hiveDecimal.getHighValue().getUnscaled(), decimal.getHighValue().getUnscaled().toByteArray());
}
assertEquals(hiveDecimal.isSetLowValue(), decimal.hasLowValue());
if (hiveDecimal.isSetLowValue()) {
assertEquals(hiveDecimal.getLowValue().getScale(), decimal.getLowValue().getScale());
assertArrayEquals(hiveDecimal.getLowValue().getUnscaled(), decimal.getLowValue().getUnscaled().toByteArray());
}
}
// verify double
assertEquals(hiveData.isSetDoubleStats(), data.hasDoubleStats());
if (hiveData.isSetDoubleStats()) {
DoubleColumnStatsData hiveDouble = hiveData.getDoubleStats();
alluxio.grpc.table.DoubleColumnStatsData dbl = data.getDoubleStats();
assertEquals(hiveDouble.isSetBitVectors(), dbl.hasBitVectors());
if (hiveDouble.isSetBitVectors()) {
assertEquals(hiveDouble.getBitVectors(), dbl.getBitVectors());
}
assertEquals(hiveDouble.getNumNulls(), dbl.getNumNulls());
assertEquals(hiveDouble.getNumDVs(), dbl.getNumDistincts());
assertEquals(hiveDouble.isSetHighValue(), dbl.hasHighValue());
if (hiveDouble.isSetHighValue()) {
assertEquals(hiveDouble.getHighValue(), dbl.getHighValue(), 0.01);
}
assertEquals(hiveDouble.isSetLowValue(), dbl.hasLowValue());
if (hiveDouble.isSetLowValue()) {
assertEquals(hiveDouble.getLowValue(), dbl.getLowValue(), 0.01);
}
}
// verify long
assertEquals(hiveData.isSetLongStats(), data.hasLongStats());
if (hiveData.isSetLongStats()) {
LongColumnStatsData hiveLong = hiveData.getLongStats();
alluxio.grpc.table.LongColumnStatsData dbl = data.getLongStats();
assertEquals(hiveLong.isSetBitVectors(), dbl.hasBitVectors());
if (hiveLong.isSetBitVectors()) {
assertEquals(hiveLong.getBitVectors(), dbl.getBitVectors());
}
assertEquals(hiveLong.getNumNulls(), dbl.getNumNulls());
assertEquals(hiveLong.getNumDVs(), dbl.getNumDistincts());
assertEquals(hiveLong.isSetHighValue(), dbl.hasHighValue());
if (hiveLong.isSetHighValue()) {
assertEquals(hiveLong.getHighValue(), dbl.getHighValue());
}
assertEquals(hiveLong.isSetLowValue(), dbl.hasLowValue());
if (hiveLong.isSetLowValue()) {
assertEquals(hiveLong.getLowValue(), dbl.getLowValue());
}
}
// verify string
assertEquals(hiveData.isSetStringStats(), data.hasStringStats());
if (hiveData.isSetStringStats()) {
StringColumnStatsData hiveString = hiveData.getStringStats();
alluxio.grpc.table.StringColumnStatsData string = data.getStringStats();
assertEquals(hiveString.isSetBitVectors(), string.hasBitVectors());
if (hiveString.isSetBitVectors()) {
assertEquals(hiveString.getBitVectors(), string.getBitVectors());
}
assertEquals(hiveString.getAvgColLen(), string.getAvgColLen(), 0.01);
assertEquals(hiveString.getMaxColLen(), string.getMaxColLen());
assertEquals(hiveString.getNumNulls(), string.getNumNulls());
assertEquals(hiveString.getNumDVs(), string.getNumDistincts());
}
}
}
Aggregations