use of org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator in project hive by apache.
the class MetaStoreServerUtils method aggrPartitionStats.
// Given a list of partStats, this function will give you an aggr stats
public static List<ColumnStatisticsObj> aggrPartitionStats(List<ColumnStatistics> partStats, String catName, String dbName, String tableName, List<String> partNames, List<String> colNames, boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
Map<ColumnStatsAggregator, List<ColStatsObjWithSourceInfo>> colStatsMap = new HashMap<ColumnStatsAggregator, List<ColStatsObjWithSourceInfo>>();
// Group stats by colName for each partition
Map<String, ColumnStatsAggregator> aliasToAggregator = new HashMap<String, ColumnStatsAggregator>();
for (ColumnStatistics css : partStats) {
List<ColumnStatisticsObj> objs = css.getStatsObj();
for (ColumnStatisticsObj obj : objs) {
String partName = css.getStatsDesc().getPartName();
if (aliasToAggregator.get(obj.getColName()) == null) {
aliasToAggregator.put(obj.getColName(), ColumnStatsAggregatorFactory.getColumnStatsAggregator(obj.getStatsData().getSetField(), useDensityFunctionForNDVEstimation, ndvTuner));
colStatsMap.put(aliasToAggregator.get(obj.getColName()), new ArrayList<ColStatsObjWithSourceInfo>());
}
colStatsMap.get(aliasToAggregator.get(obj.getColName())).add(new ColStatsObjWithSourceInfo(obj, catName, dbName, tableName, partName));
}
}
if (colStatsMap.size() < 1) {
LOG.debug("No stats data found for: tblName= {}, partNames= {}, colNames= {}", TableName.getQualified(catName, dbName, tableName), partNames, colNames);
return Collections.emptyList();
}
return aggrPartitionStats(colStatsMap, partNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
}
Aggregations