use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class TestHBaseStore method stringTableStatistics.
@Test
public void stringTableStatistics() throws Exception {
createMockTable(STRING_TYPE);
// Add a string table stats for STRING_COL to DB
// Because of the way our mock implementation works we actually need to not create the table
// before we set statistics on it.
ColumnStatistics stats = new ColumnStatistics();
// Get a default ColumnStatisticsDesc for table level stats
ColumnStatisticsDesc desc = getMockTblColStatsDesc();
stats.setStatsDesc(desc);
// Get one of the pre-created ColumnStatisticsObj
ColumnStatisticsObj obj = stringColStatsObjs.get(0);
StringColumnStatsData stringData = obj.getStatsData().getStringStats();
// Add to DB
stats.addToStatsObj(obj);
store.updateTableColumnStatistics(stats);
// Get from DB
ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(STRING_COL));
// Compare ColumnStatisticsDesc
Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
// Compare ColumnStatisticsObj
Assert.assertEquals(1, statsFromDB.getStatsObjSize());
ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
// Compare ColumnStatisticsData
Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField());
// Compare StringColumnStatsData
StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats();
Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen());
Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01);
Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls());
Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs());
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class TestHBaseStore method doubleTableStatistics.
@Test
public void doubleTableStatistics() throws Exception {
createMockTable(DOUBLE_TYPE);
// Add a double table stats for DOUBLE_COL to DB
// Because of the way our mock implementation works we actually need to not create the table
// before we set statistics on it.
ColumnStatistics stats = new ColumnStatistics();
// Get a default ColumnStatisticsDesc for table level stats
ColumnStatisticsDesc desc = getMockTblColStatsDesc();
stats.setStatsDesc(desc);
// Get one of the pre-created ColumnStatisticsObj
ColumnStatisticsObj obj = doubleColStatsObjs.get(0);
DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats();
// Add to DB
stats.addToStatsObj(obj);
store.updateTableColumnStatistics(stats);
// Get from DB
ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DOUBLE_COL));
// Compare ColumnStatisticsDesc
Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
// Compare ColumnStatisticsObj
Assert.assertEquals(1, statsFromDB.getStatsObjSize());
ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
// Compare ColumnStatisticsData
Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField());
// Compare DoubleColumnStatsData
DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats();
Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01);
Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01);
Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls());
Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs());
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class SessionHiveMetaStoreClient method setPartitionColumnStatistics.
/**
* {@inheritDoc}
*/
@Override
public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException {
if (request.getColStatsSize() == 1) {
ColumnStatistics colStats = request.getColStatsIterator().next();
ColumnStatisticsDesc desc = colStats.getStatsDesc();
String dbName = desc.getDbName().toLowerCase();
String tableName = desc.getTableName().toLowerCase();
if (getTempTable(dbName, tableName) != null) {
return updateTempTableColumnStats(dbName, tableName, colStats);
}
}
return super.setPartitionColumnStatistics(request);
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class ColStatsProcessor method constructColumnStatsFromPackedRows.
private List<ColumnStatistics> constructColumnStatsFromPackedRows(Table tbl1) throws HiveException, MetaException, IOException {
Table tbl = tbl1;
String partName = null;
List<String> colName = colStatDesc.getColName();
List<String> colType = colStatDesc.getColType();
boolean isTblLevel = colStatDesc.isTblLevel();
List<ColumnStatistics> stats = new ArrayList<ColumnStatistics>();
InspectableObject packedRow;
while ((packedRow = ftOp.getNextRow()) != null) {
if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) {
throw new HiveException("Unexpected object type encountered while unpacking row");
}
List<ColumnStatisticsObj> statsObjs = new ArrayList<ColumnStatisticsObj>();
StructObjectInspector soi = (StructObjectInspector) packedRow.oi;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
List<Object> list = soi.getStructFieldsDataAsList(packedRow.o);
List<FieldSchema> partColSchema = tbl.getPartCols();
// Partition columns are appended at end, we only care about stats column
int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size();
assert list != null;
for (int i = 0; i < numOfStatCols; i++) {
StructField structField = fields.get(i);
String columnName = colName.get(i);
String columnType = colType.get(i);
Object values = list.get(i);
try {
ColumnStatisticsObj statObj = ColumnStatisticsObjTranslator.readHiveStruct(columnName, columnType, structField, values);
statsObjs.add(statObj);
} catch (Exception e) {
if (isStatsReliable) {
throw new HiveException("Statistics collection failed while (hive.stats.reliable)", e);
} else {
LOG.debug("Because {} is infinite or NaN, we skip stats.", columnName, e);
}
}
}
if (!statsObjs.isEmpty()) {
if (!isTblLevel) {
List<String> partVals = new ArrayList<String>();
// Iterate over partition columns to figure out partition name
for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) {
Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()).getPrimitiveJavaObject(list.get(i));
partVals.add(// could be null for default partition
partVal == null ? this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString());
}
partName = Warehouse.makePartName(partColSchema, partVals);
}
ColumnStatisticsDesc statsDesc = buildColumnStatsDesc(tbl, partName, isTblLevel);
ColumnStatistics colStats = new ColumnStatistics();
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
stats.add(colStats);
}
}
ftOp.clearFetchContext();
return stats;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class MetaStoreUtils method aggrPartitionStats.
// Given a list of partStats, this function will give you an aggr stats
public static List<ColumnStatisticsObj> aggrPartitionStats(List<ColumnStatistics> partStats, String dbName, String tableName, List<String> partNames, List<String> colNames, boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
Map<ColumnStatsAggregator, List<ColStatsObjWithSourceInfo>> colStatsMap = new HashMap<ColumnStatsAggregator, List<ColStatsObjWithSourceInfo>>();
// Group stats by colName for each partition
Map<String, ColumnStatsAggregator> aliasToAggregator = new HashMap<String, ColumnStatsAggregator>();
for (ColumnStatistics css : partStats) {
List<ColumnStatisticsObj> objs = css.getStatsObj();
for (ColumnStatisticsObj obj : objs) {
String partName = css.getStatsDesc().getPartName();
if (aliasToAggregator.get(obj.getColName()) == null) {
aliasToAggregator.put(obj.getColName(), ColumnStatsAggregatorFactory.getColumnStatsAggregator(obj.getStatsData().getSetField(), useDensityFunctionForNDVEstimation, ndvTuner));
colStatsMap.put(aliasToAggregator.get(obj.getColName()), new ArrayList<ColStatsObjWithSourceInfo>());
}
colStatsMap.get(aliasToAggregator.get(obj.getColName())).add(new ColStatsObjWithSourceInfo(obj, dbName, tableName, partName));
}
}
if (colStatsMap.size() < 1) {
LOG.debug("No stats data found for: dbName= {}, tblName= {}, partNames= {}, colNames= {}", dbName, tableName, partNames, colNames);
return new ArrayList<ColumnStatisticsObj>();
}
return aggrPartitionStats(colStatsMap, partNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
}
Aggregations