use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project hive by apache.
the class StatObjectConverter method convertToMPartitionColumnStatistics.
public static MPartitionColumnStatistics convertToMPartitionColumnStatistics(MPartition partition, ColumnStatisticsDesc statsDesc, ColumnStatisticsObj statsObj) throws MetaException, NoSuchObjectException {
if (statsDesc == null || statsObj == null) {
return null;
}
MPartitionColumnStatistics mColStats = new MPartitionColumnStatistics();
mColStats.setPartition(partition);
mColStats.setDbName(statsDesc.getDbName());
mColStats.setTableName(statsDesc.getTableName());
mColStats.setPartitionName(statsDesc.getPartName());
mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed());
mColStats.setColName(statsObj.getColName());
mColStats.setColType(statsObj.getColType());
if (statsObj.getStatsData().isSetBooleanStats()) {
BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats();
mColStats.setBooleanStats(boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null);
} else if (statsObj.getStatsData().isSetLongStats()) {
LongColumnStatsData longStats = statsObj.getStatsData().getLongStats();
mColStats.setLongStats(longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetBitVectors() ? longStats.getBitVectors() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null);
} else if (statsObj.getStatsData().isSetDoubleStats()) {
DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
mColStats.setDoubleStats(doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetBitVectors() ? doubleStats.getBitVectors() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null);
} else if (statsObj.getStatsData().isSetDecimalStats()) {
DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats();
String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null;
String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null;
mColStats.setDecimalStats(decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, decimalStats.isSetBitVectors() ? decimalStats.getBitVectors() : null, low, high);
} else if (statsObj.getStatsData().isSetStringStats()) {
StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
mColStats.setStringStats(stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetBitVectors() ? stringStats.getBitVectors() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null);
} else if (statsObj.getStatsData().isSetBinaryStats()) {
BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats();
mColStats.setBinaryStats(binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null);
} else if (statsObj.getStatsData().isSetDateStats()) {
DateColumnStatsData dateStats = statsObj.getStatsData().getDateStats();
mColStats.setDateStats(dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null);
}
return mColStats;
}
use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project presto by prestodb.
the class TestThriftHiveMetastoreUtil method testLongStatsToColumnStatistics.
@Test
public void testLongStatsToColumnStatistics() {
LongColumnStatsData longColumnStatsData = new LongColumnStatsData();
longColumnStatsData.setLowValue(0);
longColumnStatsData.setHighValue(100);
longColumnStatsData.setNumNulls(1);
longColumnStatsData.setNumDVs(20);
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BIGINT_TYPE_NAME, longStats(longColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000));
assertEquals(actual.getIntegerStatistics(), Optional.of(new IntegerStatistics(OptionalLong.of(0), OptionalLong.of(100))));
assertEquals(actual.getDoubleStatistics(), Optional.empty());
assertEquals(actual.getDecimalStatistics(), Optional.empty());
assertEquals(actual.getDateStatistics(), Optional.empty());
assertEquals(actual.getBooleanStatistics(), Optional.empty());
assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getNullsCount(), OptionalLong.of(1));
assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19));
}
use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project presto by prestodb.
the class TestThriftHiveMetastoreUtil method testEmptyLongStatsToColumnStatistics.
@Test
public void testEmptyLongStatsToColumnStatistics() {
LongColumnStatsData emptyLongColumnStatsData = new LongColumnStatsData();
ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BIGINT_TYPE_NAME, longStats(emptyLongColumnStatsData));
HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty());
assertEquals(actual.getIntegerStatistics(), Optional.of(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty())));
assertEquals(actual.getDoubleStatistics(), Optional.empty());
assertEquals(actual.getDecimalStatistics(), Optional.empty());
assertEquals(actual.getDateStatistics(), Optional.empty());
assertEquals(actual.getBooleanStatistics(), Optional.empty());
assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty());
assertEquals(actual.getNullsCount(), OptionalLong.empty());
assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty());
}
use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project flink by apache.
the class HiveStatsUtil method getColumnStatisticsData.
/**
* Convert Flink ColumnStats to Hive ColumnStatisticsData according to Hive column type. Note we
* currently assume that, in Flink, the max and min of ColumnStats will be same type as the
* Flink column type. For example, for SHORT and Long columns, the max and min of their
* ColumnStats should be of type SHORT and LONG.
*/
private static ColumnStatisticsData getColumnStatisticsData(DataType colType, CatalogColumnStatisticsDataBase colStat, String hiveVersion) {
LogicalTypeRoot type = colType.getLogicalType().getTypeRoot();
if (type.equals(LogicalTypeRoot.CHAR) || type.equals(LogicalTypeRoot.VARCHAR)) {
if (colStat instanceof CatalogColumnStatisticsDataString) {
CatalogColumnStatisticsDataString stringColStat = (CatalogColumnStatisticsDataString) colStat;
StringColumnStatsData hiveStringColumnStats = new StringColumnStatsData();
hiveStringColumnStats.clear();
if (null != stringColStat.getMaxLength()) {
hiveStringColumnStats.setMaxColLen(stringColStat.getMaxLength());
}
if (null != stringColStat.getAvgLength()) {
hiveStringColumnStats.setAvgColLen(stringColStat.getAvgLength());
}
if (null != stringColStat.getNullCount()) {
hiveStringColumnStats.setNumNulls(stringColStat.getNullCount());
}
if (null != stringColStat.getNdv()) {
hiveStringColumnStats.setNumDVs(stringColStat.getNdv());
}
return ColumnStatisticsData.stringStats(hiveStringColumnStats);
}
} else if (type.equals(LogicalTypeRoot.BOOLEAN)) {
if (colStat instanceof CatalogColumnStatisticsDataBoolean) {
CatalogColumnStatisticsDataBoolean booleanColStat = (CatalogColumnStatisticsDataBoolean) colStat;
BooleanColumnStatsData hiveBoolStats = new BooleanColumnStatsData();
hiveBoolStats.clear();
if (null != booleanColStat.getTrueCount()) {
hiveBoolStats.setNumTrues(booleanColStat.getTrueCount());
}
if (null != booleanColStat.getFalseCount()) {
hiveBoolStats.setNumFalses(booleanColStat.getFalseCount());
}
if (null != booleanColStat.getNullCount()) {
hiveBoolStats.setNumNulls(booleanColStat.getNullCount());
}
return ColumnStatisticsData.booleanStats(hiveBoolStats);
}
} else if (type.equals(LogicalTypeRoot.TINYINT) || type.equals(LogicalTypeRoot.SMALLINT) || type.equals(LogicalTypeRoot.INTEGER) || type.equals(LogicalTypeRoot.BIGINT) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE) || type.equals(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) {
if (colStat instanceof CatalogColumnStatisticsDataLong) {
CatalogColumnStatisticsDataLong longColStat = (CatalogColumnStatisticsDataLong) colStat;
LongColumnStatsData hiveLongColStats = new LongColumnStatsData();
hiveLongColStats.clear();
if (null != longColStat.getMax()) {
hiveLongColStats.setHighValue(longColStat.getMax());
}
if (null != longColStat.getMin()) {
hiveLongColStats.setLowValue(longColStat.getMin());
}
if (null != longColStat.getNdv()) {
hiveLongColStats.setNumDVs(longColStat.getNdv());
}
if (null != longColStat.getNullCount()) {
hiveLongColStats.setNumNulls(longColStat.getNullCount());
}
return ColumnStatisticsData.longStats(hiveLongColStats);
}
} else if (type.equals(LogicalTypeRoot.FLOAT) || type.equals(LogicalTypeRoot.DOUBLE)) {
if (colStat instanceof CatalogColumnStatisticsDataDouble) {
CatalogColumnStatisticsDataDouble doubleColumnStatsData = (CatalogColumnStatisticsDataDouble) colStat;
DoubleColumnStatsData hiveFloatStats = new DoubleColumnStatsData();
hiveFloatStats.clear();
if (null != doubleColumnStatsData.getMax()) {
hiveFloatStats.setHighValue(doubleColumnStatsData.getMax());
}
if (null != doubleColumnStatsData.getMin()) {
hiveFloatStats.setLowValue(doubleColumnStatsData.getMin());
}
if (null != doubleColumnStatsData.getNullCount()) {
hiveFloatStats.setNumNulls(doubleColumnStatsData.getNullCount());
}
if (null != doubleColumnStatsData.getNdv()) {
hiveFloatStats.setNumDVs(doubleColumnStatsData.getNdv());
}
return ColumnStatisticsData.doubleStats(hiveFloatStats);
}
} else if (type.equals(LogicalTypeRoot.DATE)) {
if (colStat instanceof CatalogColumnStatisticsDataDate) {
HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
return hiveShim.toHiveDateColStats((CatalogColumnStatisticsDataDate) colStat);
}
} else if (type.equals(LogicalTypeRoot.VARBINARY) || type.equals(LogicalTypeRoot.BINARY)) {
if (colStat instanceof CatalogColumnStatisticsDataBinary) {
CatalogColumnStatisticsDataBinary binaryColumnStatsData = (CatalogColumnStatisticsDataBinary) colStat;
BinaryColumnStatsData hiveBinaryColumnStats = new BinaryColumnStatsData();
hiveBinaryColumnStats.clear();
if (null != binaryColumnStatsData.getMaxLength()) {
hiveBinaryColumnStats.setMaxColLen(binaryColumnStatsData.getMaxLength());
}
if (null != binaryColumnStatsData.getAvgLength()) {
hiveBinaryColumnStats.setAvgColLen(binaryColumnStatsData.getAvgLength());
}
if (null != binaryColumnStatsData.getNullCount()) {
hiveBinaryColumnStats.setNumNulls(binaryColumnStatsData.getNullCount());
}
return ColumnStatisticsData.binaryStats(hiveBinaryColumnStats);
}
} else if (type.equals(LogicalTypeRoot.DECIMAL)) {
if (colStat instanceof CatalogColumnStatisticsDataDouble) {
CatalogColumnStatisticsDataDouble flinkStats = (CatalogColumnStatisticsDataDouble) colStat;
DecimalColumnStatsData hiveStats = new DecimalColumnStatsData();
if (flinkStats.getMax() != null) {
// in older versions we cannot create HiveDecimal from Double, so convert Double
// to BigDecimal first
hiveStats.setHighValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMax()))));
}
if (flinkStats.getMin() != null) {
hiveStats.setLowValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMin()))));
}
if (flinkStats.getNdv() != null) {
hiveStats.setNumDVs(flinkStats.getNdv());
}
if (flinkStats.getNullCount() != null) {
hiveStats.setNumNulls(flinkStats.getNullCount());
}
return ColumnStatisticsData.decimalStats(hiveStats);
}
}
throw new CatalogException(String.format("Flink does not support converting ColumnStats '%s' for Hive column " + "type '%s' yet", colStat, colType));
}
use of org.apache.hadoop.hive.metastore.api.LongColumnStatsData in project hive by apache.
the class StatObjectConverter method convertToMTableColumnStatistics.
// JDO
public static MTableColumnStatistics convertToMTableColumnStatistics(MTable table, ColumnStatisticsDesc statsDesc, ColumnStatisticsObj statsObj, String engine) throws NoSuchObjectException, MetaException, InvalidObjectException {
if (statsObj == null || statsDesc == null) {
throw new InvalidObjectException("Invalid column stats object");
}
MTableColumnStatistics mColStats = new MTableColumnStatistics();
mColStats.setTable(table);
mColStats.setDbName(statsDesc.getDbName());
mColStats.setCatName(statsDesc.isSetCatName() ? statsDesc.getCatName() : DEFAULT_CATALOG_NAME);
mColStats.setTableName(statsDesc.getTableName());
mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed());
mColStats.setColName(statsObj.getColName());
mColStats.setColType(statsObj.getColType());
if (statsObj.getStatsData().isSetBooleanStats()) {
BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats();
mColStats.setBooleanStats(boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null);
} else if (statsObj.getStatsData().isSetLongStats()) {
LongColumnStatsData longStats = statsObj.getStatsData().getLongStats();
mColStats.setLongStats(longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetBitVectors() ? longStats.getBitVectors() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null);
} else if (statsObj.getStatsData().isSetDoubleStats()) {
DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
mColStats.setDoubleStats(doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetBitVectors() ? doubleStats.getBitVectors() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null);
} else if (statsObj.getStatsData().isSetDecimalStats()) {
DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats();
String low = decimalStats.isSetLowValue() ? DecimalUtils.createJdoDecimalString(decimalStats.getLowValue()) : null;
String high = decimalStats.isSetHighValue() ? DecimalUtils.createJdoDecimalString(decimalStats.getHighValue()) : null;
mColStats.setDecimalStats(decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, decimalStats.isSetBitVectors() ? decimalStats.getBitVectors() : null, low, high);
} else if (statsObj.getStatsData().isSetStringStats()) {
StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
mColStats.setStringStats(stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetBitVectors() ? stringStats.getBitVectors() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null);
} else if (statsObj.getStatsData().isSetBinaryStats()) {
BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats();
mColStats.setBinaryStats(binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null);
} else if (statsObj.getStatsData().isSetDateStats()) {
DateColumnStatsData dateStats = statsObj.getStatsData().getDateStats();
mColStats.setDateStats(dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null);
} else if (statsObj.getStatsData().isSetTimestampStats()) {
TimestampColumnStatsData timestampStats = statsObj.getStatsData().getTimestampStats();
mColStats.setTimestampStats(timestampStats.isSetNumNulls() ? timestampStats.getNumNulls() : null, timestampStats.isSetNumDVs() ? timestampStats.getNumDVs() : null, timestampStats.isSetBitVectors() ? timestampStats.getBitVectors() : null, timestampStats.isSetLowValue() ? timestampStats.getLowValue().getSecondsSinceEpoch() : null, timestampStats.isSetHighValue() ? timestampStats.getHighValue().getSecondsSinceEpoch() : null);
}
mColStats.setEngine(engine);
return mColStats;
}
Aggregations