use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.
the class TestHBaseStoreBitVector method decimalPartitionStatistics.
@Test
public void decimalPartitionStatistics() throws Exception {
createMockTableAndPartition(DECIMAL_TYPE, DECIMAL_VAL);
// Add partition stats for: DECIMAL_COL and partition: {PART_KEY, DECIMAL_VAL} to DB
// Because of the way our mock implementation works we actually need to not create the table
// before we set statistics on it.
ColumnStatistics stats = new ColumnStatistics();
// Get a default ColumnStatisticsDesc for partition level stats
ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DECIMAL_VAL);
stats.setStatsDesc(desc);
// Get one of the pre-created ColumnStatisticsObj
ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
// Add to DB
stats.addToStatsObj(obj);
List<String> parVals = new ArrayList<String>();
parVals.add(DECIMAL_VAL);
store.updatePartitionColumnStatistics(stats, parVals);
// Get from DB
List<String> partNames = new ArrayList<String>();
partNames.add(desc.getPartName());
List<String> colNames = new ArrayList<String>();
colNames.add(obj.getColName());
List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
// Compare ColumnStatisticsDesc
Assert.assertEquals(1, statsFromDB.size());
Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
// Compare ColumnStatisticsObj
Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
// Compare ColumnStatisticsData
Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
// Compare DecimalColumnStatsData
DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors());
}
use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.
the class ColumnStatsUpdateTask method constructColumnStatsFromInput.
private ColumnStatistics constructColumnStatsFromInput() throws SemanticException, MetaException {
String dbName = SessionState.get().getCurrentDatabase();
ColumnStatsDesc desc = work.getColStats();
String tableName = desc.getTableName();
String partName = work.getPartName();
List<String> colName = desc.getColName();
List<String> colType = desc.getColType();
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
// grammar prohibits more than 1 column so we are guaranteed to have only 1
// element in this lists.
statsObj.setColName(colName.get(0));
statsObj.setColType(colType.get(0));
ColumnStatisticsData statsData = new ColumnStatisticsData();
String columnType = colType.get(0);
if (columnType.equalsIgnoreCase("long") || columnType.equalsIgnoreCase("tinyint") || columnType.equalsIgnoreCase("smallint") || columnType.equalsIgnoreCase("int") || columnType.equalsIgnoreCase("bigint")) {
LongColumnStatsData longStats = new LongColumnStatsData();
longStats.setNumNullsIsSet(false);
longStats.setNumDVsIsSet(false);
longStats.setLowValueIsSet(false);
longStats.setHighValueIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
longStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
longStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
longStats.setLowValue(Long.parseLong(value));
} else if (fName.equals("highValue")) {
longStats.setHighValue(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setLongStats(longStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("double") || columnType.equalsIgnoreCase("float")) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
doubleStats.setNumNullsIsSet(false);
doubleStats.setNumDVsIsSet(false);
doubleStats.setLowValueIsSet(false);
doubleStats.setHighValueIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
doubleStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
doubleStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
doubleStats.setLowValue(Double.parseDouble(value));
} else if (fName.equals("highValue")) {
doubleStats.setHighValue(Double.parseDouble(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setDoubleStats(doubleStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("string") || columnType.toLowerCase().startsWith("char") || columnType.toLowerCase().startsWith("varchar")) {
//char(x),varchar(x) types
StringColumnStatsData stringStats = new StringColumnStatsData();
stringStats.setMaxColLenIsSet(false);
stringStats.setAvgColLenIsSet(false);
stringStats.setNumNullsIsSet(false);
stringStats.setNumDVsIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
stringStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
stringStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("avgColLen")) {
stringStats.setAvgColLen(Double.parseDouble(value));
} else if (fName.equals("maxColLen")) {
stringStats.setMaxColLen(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setStringStats(stringStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("boolean")) {
BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
booleanStats.setNumNullsIsSet(false);
booleanStats.setNumTruesIsSet(false);
booleanStats.setNumFalsesIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
booleanStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numTrues")) {
booleanStats.setNumTrues(Long.parseLong(value));
} else if (fName.equals("numFalses")) {
booleanStats.setNumFalses(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setBooleanStats(booleanStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
binaryStats.setNumNullsIsSet(false);
binaryStats.setAvgColLenIsSet(false);
binaryStats.setMaxColLenIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
binaryStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("avgColLen")) {
binaryStats.setAvgColLen(Double.parseDouble(value));
} else if (fName.equals("maxColLen")) {
binaryStats.setMaxColLen(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setBinaryStats(binaryStats);
statsObj.setStatsData(statsData);
} else if (columnType.toLowerCase().startsWith("decimal")) {
//decimal(a,b) type
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
decimalStats.setNumNullsIsSet(false);
decimalStats.setNumDVsIsSet(false);
decimalStats.setLowValueIsSet(false);
decimalStats.setHighValueIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
decimalStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
decimalStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
BigDecimal d = new BigDecimal(value);
decimalStats.setLowValue(new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
} else if (fName.equals("highValue")) {
BigDecimal d = new BigDecimal(value);
decimalStats.setHighValue(new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setDecimalStats(decimalStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("date") || columnType.equalsIgnoreCase("timestamp")) {
DateColumnStatsData dateStats = new DateColumnStatsData();
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
dateStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
dateStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
// Date high/low value is stored as long in stats DB, but allow users to set high/low
// value using either date format (yyyy-mm-dd) or numeric format (days since epoch)
dateStats.setLowValue(readDateValue(value));
} else if (fName.equals("highValue")) {
dateStats.setHighValue(readDateValue(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setDateStats(dateStats);
statsObj.setStatsData(statsData);
} else {
throw new SemanticException("Unsupported type");
}
String[] names = Utilities.getDbTableName(dbName, tableName);
ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, partName == null);
ColumnStatistics colStat = new ColumnStatistics();
colStat.setStatsDesc(statsDesc);
colStat.addToStatsObj(statsObj);
return colStat;
}
use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.
the class StatObjectConverter method convertToMPartitionColumnStatistics.
public static MPartitionColumnStatistics convertToMPartitionColumnStatistics(MPartition partition, ColumnStatisticsDesc statsDesc, ColumnStatisticsObj statsObj) throws MetaException, NoSuchObjectException {
if (statsDesc == null || statsObj == null) {
return null;
}
MPartitionColumnStatistics mColStats = new MPartitionColumnStatistics();
mColStats.setPartition(partition);
mColStats.setDbName(statsDesc.getDbName());
mColStats.setTableName(statsDesc.getTableName());
mColStats.setPartitionName(statsDesc.getPartName());
mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed());
mColStats.setColName(statsObj.getColName());
mColStats.setColType(statsObj.getColType());
if (statsObj.getStatsData().isSetBooleanStats()) {
BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats();
mColStats.setBooleanStats(boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null);
} else if (statsObj.getStatsData().isSetLongStats()) {
LongColumnStatsData longStats = statsObj.getStatsData().getLongStats();
mColStats.setLongStats(longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null);
} else if (statsObj.getStatsData().isSetDoubleStats()) {
DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
mColStats.setDoubleStats(doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null);
} else if (statsObj.getStatsData().isSetDecimalStats()) {
DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats();
String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null;
String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null;
mColStats.setDecimalStats(decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, low, high);
} else if (statsObj.getStatsData().isSetStringStats()) {
StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
mColStats.setStringStats(stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null);
} else if (statsObj.getStatsData().isSetBinaryStats()) {
BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats();
mColStats.setBinaryStats(binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null);
} else if (statsObj.getStatsData().isSetDateStats()) {
DateColumnStatsData dateStats = statsObj.getStatsData().getDateStats();
mColStats.setDateStats(dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null);
}
return mColStats;
}
use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.
the class StatObjectConverter method getTableColumnStatisticsObj.
public static ColumnStatisticsObj getTableColumnStatisticsObj(MTableColumnStatistics mStatsObj) {
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
statsObj.setColType(mStatsObj.getColType());
statsObj.setColName(mStatsObj.getColName());
String colType = mStatsObj.getColType().toLowerCase();
ColumnStatisticsData colStatsData = new ColumnStatisticsData();
if (colType.equals("boolean")) {
BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
boolStats.setNumFalses(mStatsObj.getNumFalses());
boolStats.setNumTrues(mStatsObj.getNumTrues());
boolStats.setNumNulls(mStatsObj.getNumNulls());
colStatsData.setBooleanStats(boolStats);
} else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
StringColumnStatsData stringStats = new StringColumnStatsData();
stringStats.setNumNulls(mStatsObj.getNumNulls());
stringStats.setAvgColLen(mStatsObj.getAvgColLen());
stringStats.setMaxColLen(mStatsObj.getMaxColLen());
stringStats.setNumDVs(mStatsObj.getNumDVs());
colStatsData.setStringStats(stringStats);
} else if (colType.equals("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
binaryStats.setNumNulls(mStatsObj.getNumNulls());
binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
colStatsData.setBinaryStats(binaryStats);
} else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint") || colType.equals("timestamp")) {
LongColumnStatsData longStats = new LongColumnStatsData();
longStats.setNumNulls(mStatsObj.getNumNulls());
Long longHighValue = mStatsObj.getLongHighValue();
if (longHighValue != null) {
longStats.setHighValue(longHighValue);
}
Long longLowValue = mStatsObj.getLongLowValue();
if (longLowValue != null) {
longStats.setLowValue(longLowValue);
}
longStats.setNumDVs(mStatsObj.getNumDVs());
colStatsData.setLongStats(longStats);
} else if (colType.equals("double") || colType.equals("float")) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
doubleStats.setNumNulls(mStatsObj.getNumNulls());
Double doubleHighValue = mStatsObj.getDoubleHighValue();
if (doubleHighValue != null) {
doubleStats.setHighValue(doubleHighValue);
}
Double doubleLowValue = mStatsObj.getDoubleLowValue();
if (doubleLowValue != null) {
doubleStats.setLowValue(doubleLowValue);
}
doubleStats.setNumDVs(mStatsObj.getNumDVs());
colStatsData.setDoubleStats(doubleStats);
} else if (colType.startsWith("decimal")) {
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
decimalStats.setNumNulls(mStatsObj.getNumNulls());
String decimalHighValue = mStatsObj.getDecimalHighValue();
if (decimalHighValue != null) {
decimalStats.setHighValue(createThriftDecimal(decimalHighValue));
}
String decimalLowValue = mStatsObj.getDecimalLowValue();
if (decimalLowValue != null) {
decimalStats.setLowValue(createThriftDecimal(decimalLowValue));
}
decimalStats.setNumDVs(mStatsObj.getNumDVs());
colStatsData.setDecimalStats(decimalStats);
} else if (colType.equals("date")) {
DateColumnStatsData dateStats = new DateColumnStatsData();
dateStats.setNumNulls(mStatsObj.getNumNulls());
Long highValue = mStatsObj.getLongHighValue();
if (highValue != null) {
dateStats.setHighValue(new Date(highValue));
}
Long lowValue = mStatsObj.getLongLowValue();
if (lowValue != null) {
dateStats.setLowValue(new Date(lowValue));
}
dateStats.setNumDVs(mStatsObj.getNumDVs());
colStatsData.setDateStats(dateStats);
}
statsObj.setStatsData(colStatsData);
return statsObj;
}
use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.
the class StatObjectConverter method getPartitionColumnStatisticsObj.
public static ColumnStatisticsObj getPartitionColumnStatisticsObj(MPartitionColumnStatistics mStatsObj) {
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
statsObj.setColType(mStatsObj.getColType());
statsObj.setColName(mStatsObj.getColName());
String colType = mStatsObj.getColType().toLowerCase();
ColumnStatisticsData colStatsData = new ColumnStatisticsData();
if (colType.equals("boolean")) {
BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
boolStats.setNumFalses(mStatsObj.getNumFalses());
boolStats.setNumTrues(mStatsObj.getNumTrues());
boolStats.setNumNulls(mStatsObj.getNumNulls());
colStatsData.setBooleanStats(boolStats);
} else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
StringColumnStatsData stringStats = new StringColumnStatsData();
stringStats.setNumNulls(mStatsObj.getNumNulls());
stringStats.setAvgColLen(mStatsObj.getAvgColLen());
stringStats.setMaxColLen(mStatsObj.getMaxColLen());
stringStats.setNumDVs(mStatsObj.getNumDVs());
colStatsData.setStringStats(stringStats);
} else if (colType.equals("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
binaryStats.setNumNulls(mStatsObj.getNumNulls());
binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
colStatsData.setBinaryStats(binaryStats);
} else if (colType.equals("tinyint") || colType.equals("smallint") || colType.equals("int") || colType.equals("bigint") || colType.equals("timestamp")) {
LongColumnStatsData longStats = new LongColumnStatsData();
longStats.setNumNulls(mStatsObj.getNumNulls());
if (mStatsObj.getLongHighValue() != null) {
longStats.setHighValue(mStatsObj.getLongHighValue());
}
if (mStatsObj.getLongLowValue() != null) {
longStats.setLowValue(mStatsObj.getLongLowValue());
}
longStats.setNumDVs(mStatsObj.getNumDVs());
colStatsData.setLongStats(longStats);
} else if (colType.equals("double") || colType.equals("float")) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
doubleStats.setNumNulls(mStatsObj.getNumNulls());
if (mStatsObj.getDoubleHighValue() != null) {
doubleStats.setHighValue(mStatsObj.getDoubleHighValue());
}
if (mStatsObj.getDoubleLowValue() != null) {
doubleStats.setLowValue(mStatsObj.getDoubleLowValue());
}
doubleStats.setNumDVs(mStatsObj.getNumDVs());
colStatsData.setDoubleStats(doubleStats);
} else if (colType.startsWith("decimal")) {
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
decimalStats.setNumNulls(mStatsObj.getNumNulls());
if (mStatsObj.getDecimalHighValue() != null) {
decimalStats.setHighValue(createThriftDecimal(mStatsObj.getDecimalHighValue()));
}
if (mStatsObj.getDecimalLowValue() != null) {
decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue()));
}
decimalStats.setNumDVs(mStatsObj.getNumDVs());
colStatsData.setDecimalStats(decimalStats);
} else if (colType.equals("date")) {
DateColumnStatsData dateStats = new DateColumnStatsData();
dateStats.setNumNulls(mStatsObj.getNumNulls());
dateStats.setHighValue(new Date(mStatsObj.getLongHighValue()));
dateStats.setLowValue(new Date(mStatsObj.getLongLowValue()));
dateStats.setNumDVs(mStatsObj.getNumDVs());
colStatsData.setDateStats(dateStats);
}
statsObj.setStatsData(colStatsData);
return statsObj;
}
Aggregations