use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.
the class DecimalColumnStatsMerger method merge.
@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
DecimalColumnStatsData aggregateData = aggregateColStats.getStatsData().getDecimalStats();
DecimalColumnStatsData newData = newColStats.getStatsData().getDecimalStats();
Decimal lowValue = aggregateData.getLowValue() != null && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData.getLowValue() : newData.getLowValue();
aggregateData.setLowValue(lowValue);
Decimal highValue = aggregateData.getHighValue() != null && (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData.getHighValue() : newData.getHighValue();
aggregateData.setHighValue(highValue);
aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
} else {
ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), ndvEstimator.getnumBitVectors()));
ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), ndvEstimator.getnumBitVectors()));
long ndv = ndvEstimator.estimateNumDistinctValues();
LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
aggregateData.setNumDVs(ndv);
aggregateData.setBitVectors(ndvEstimator.serialize().toString());
}
}
use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.
the class ColumnStatsTask method unpackPrimitiveObject.
private void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) {
if (o == null) {
return;
}
// First infer the type of object
if (fieldName.equals("columntype")) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
ColumnStatisticsData statsData = new ColumnStatisticsData();
if (s.equalsIgnoreCase("long")) {
LongColumnStatsData longStats = new LongColumnStatsData();
statsData.setLongStats(longStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("double")) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
statsData.setDoubleStats(doubleStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("string")) {
StringColumnStatsData stringStats = new StringColumnStatsData();
statsData.setStringStats(stringStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("boolean")) {
BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
statsData.setBooleanStats(booleanStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
statsData.setBinaryStats(binaryStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("decimal")) {
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
statsData.setDecimalStats(decimalStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("date")) {
DateColumnStatsData dateStats = new DateColumnStatsData();
statsData.setDateStats(dateStats);
statsObj.setStatsData(statsData);
}
} else {
// invoke the right unpack method depending on data type of the column
if (statsObj.getStatsData().isSetBooleanStats()) {
unpackBooleanStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetLongStats()) {
unpackLongStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetDoubleStats()) {
unpackDoubleStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetStringStats()) {
unpackStringStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetBinaryStats()) {
unpackBinaryStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetDecimalStats()) {
unpackDecimalStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetDateStats()) {
unpackDateStats(oi, o, fieldName, statsObj);
}
}
}
use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.
the class TestHBaseStoreBitVector method decimalTableStatistics.
@Test
public void decimalTableStatistics() throws Exception {
createMockTable(DECIMAL_COL, DECIMAL_TYPE);
ColumnStatistics stats = new ColumnStatistics();
// Get a default ColumnStatisticsDesc for table level stats
ColumnStatisticsDesc desc = getMockTblColStatsDesc();
stats.setStatsDesc(desc);
// Get one of the pre-created ColumnStatisticsObj
ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
// Add to DB
stats.addToStatsObj(obj);
store.updateTableColumnStatistics(stats);
// Get from DB
ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DECIMAL_COL));
// Compare ColumnStatisticsDesc
Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
// Compare ColumnStatisticsObj
Assert.assertEquals(1, statsFromDB.getStatsObjSize());
ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
// Compare ColumnStatisticsData
Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
// Compare DecimalColumnStatsData
DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors());
}
use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.
the class TestHBaseStoreBitVector method mockDecimalStats.
private static ColumnStatisticsObj mockDecimalStats(int i) {
Decimal high = new Decimal();
high.setScale((short) 3);
String strHigh = String.valueOf(3876 + 100 * i);
high.setUnscaled(strHigh.getBytes());
Decimal low = new Decimal();
low.setScale((short) 3);
String strLow = String.valueOf(38 + i);
low.setUnscaled(strLow.getBytes());
long nulls = 13 + i;
long dVs = 923947293L + 100 * i;
String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 2, 4, 5, 6, 7, 8}";
ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
colStatsObj.setColName(DECIMAL_COL);
colStatsObj.setColType(DECIMAL_TYPE);
ColumnStatisticsData data = new ColumnStatisticsData();
DecimalColumnStatsData decimalData = new DecimalColumnStatsData();
decimalData.setHighValue(high);
decimalData.setLowValue(low);
decimalData.setNumNulls(nulls);
decimalData.setNumDVs(dVs);
decimalData.setBitVectors(bitVectors);
data.setDecimalStats(decimalData);
colStatsObj.setStatsData(data);
return colStatsObj;
}
use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.
the class TestHBaseStoreBitVector method decimalPartitionStatistics.
@Test
public void decimalPartitionStatistics() throws Exception {
createMockTableAndPartition(DECIMAL_TYPE, DECIMAL_VAL);
// Add partition stats for: DECIMAL_COL and partition: {PART_KEY, DECIMAL_VAL} to DB
// Because of the way our mock implementation works we actually need to not create the table
// before we set statistics on it.
ColumnStatistics stats = new ColumnStatistics();
// Get a default ColumnStatisticsDesc for partition level stats
ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DECIMAL_VAL);
stats.setStatsDesc(desc);
// Get one of the pre-created ColumnStatisticsObj
ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
// Add to DB
stats.addToStatsObj(obj);
List<String> parVals = new ArrayList<String>();
parVals.add(DECIMAL_VAL);
store.updatePartitionColumnStatistics(stats, parVals);
// Get from DB
List<String> partNames = new ArrayList<String>();
partNames.add(desc.getPartName());
List<String> colNames = new ArrayList<String>();
colNames.add(obj.getColName());
List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
// Compare ColumnStatisticsDesc
Assert.assertEquals(1, statsFromDB.size());
Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
// Compare ColumnStatisticsObj
Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
// Compare ColumnStatisticsData
Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
// Compare DecimalColumnStatsData
DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors());
}
Aggregations