Search in sources :

Example 6 with ColumnStatsAggregator

use of org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator in project hive by apache.

the class MetaStoreServerUtils method aggrPartitionStats.

// Given a list of partStats, this function will give you an aggr stats
public static List<ColumnStatisticsObj> aggrPartitionStats(List<ColumnStatistics> partStats, String catName, String dbName, String tableName, List<String> partNames, List<String> colNames, boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
    Map<ColumnStatsAggregator, List<ColStatsObjWithSourceInfo>> colStatsMap = new HashMap<ColumnStatsAggregator, List<ColStatsObjWithSourceInfo>>();
    // Group stats by colName for each partition
    Map<String, ColumnStatsAggregator> aliasToAggregator = new HashMap<String, ColumnStatsAggregator>();
    for (ColumnStatistics css : partStats) {
        List<ColumnStatisticsObj> objs = css.getStatsObj();
        for (ColumnStatisticsObj obj : objs) {
            String partName = css.getStatsDesc().getPartName();
            if (aliasToAggregator.get(obj.getColName()) == null) {
                aliasToAggregator.put(obj.getColName(), ColumnStatsAggregatorFactory.getColumnStatsAggregator(obj.getStatsData().getSetField(), useDensityFunctionForNDVEstimation, ndvTuner));
                colStatsMap.put(aliasToAggregator.get(obj.getColName()), new ArrayList<ColStatsObjWithSourceInfo>());
            }
            colStatsMap.get(aliasToAggregator.get(obj.getColName())).add(new ColStatsObjWithSourceInfo(obj, catName, dbName, tableName, partName));
        }
    }
    if (colStatsMap.size() < 1) {
        LOG.debug("No stats data found for: tblName= {}, partNames= {}, colNames= {}", TableName.getQualified(catName, dbName, tableName), partNames, colNames);
        return Collections.emptyList();
    }
    return aggrPartitionStats(colStatsMap, partNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatsAggregator(org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator) HashMap(java.util.HashMap) MachineList(org.apache.hadoop.util.MachineList) List(java.util.List) ArrayList(java.util.ArrayList)

Aggregations

ArrayList (java.util.ArrayList)6 List (java.util.List)6 ColumnStatsAggregator (org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator)6 HashMap (java.util.HashMap)5 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)5 MachineList (org.apache.hadoop.util.MachineList)4 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)2 LinkedList (java.util.LinkedList)2 Callable (java.util.concurrent.Callable)2 ExecutionException (java.util.concurrent.ExecutionException)2 ExecutorService (java.util.concurrent.ExecutorService)2 Future (java.util.concurrent.Future)2 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)2 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)2 Map (java.util.Map)1 SortedMap (java.util.SortedMap)1 TreeMap (java.util.TreeMap)1 ColStatsObjWithSourceInfo (org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo)1 ColStatsObjWithSourceInfo (org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo)1