Search in sources :

Example 1 with ColumnStatsMerger

use of org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger in project hive by apache.

the class MetaStoreUtils method mergeColStats.

// this function will merge csOld into csNew.
public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) throws InvalidObjectException {
    List<ColumnStatisticsObj> list = new ArrayList<>();
    if (csNew.getStatsObj().size() != csOld.getStatsObjSize()) {
        // Some of the columns' stats are missing
        // This implies partition schema has changed. We will merge columns
        // present in both, overwrite stats for columns absent in metastore and
        // leave alone columns stats missing from stats task. This last case may
        // leave stats in stale state. This will be addressed later.
        LOG.debug("New ColumnStats size is {}, but old ColumnStats size is {}", csNew.getStatsObj().size(), csOld.getStatsObjSize());
    }
    // In this case, we have to find out which columns can be merged.
    Map<String, ColumnStatisticsObj> map = new HashMap<>();
    // We build a hash map from colName to object for old ColumnStats.
    for (ColumnStatisticsObj obj : csOld.getStatsObj()) {
        map.put(obj.getColName(), obj);
    }
    for (int index = 0; index < csNew.getStatsObj().size(); index++) {
        ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index);
        ColumnStatisticsObj statsObjOld = map.get(statsObjNew.getColName());
        if (statsObjOld != null) {
            // column stats is still accurate.
            assert (statsObjNew.getStatsData().getSetField() == statsObjOld.getStatsData().getSetField());
            // If statsObjOld is found, we can merge.
            ColumnStatsMerger merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew, statsObjOld);
            merger.merge(statsObjNew, statsObjOld);
        }
        // If statsObjOld is not found, we just use statsObjNew as it is accurate.
        list.add(statsObjNew);
    }
    // in all the other cases, we can not merge
    csNew.setStatsObj(list);
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnStatsMerger(org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger)

Aggregations

ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)1 ColumnStatsMerger (org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger)1