use of org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger in project hive by apache.
the class MetaStoreUtils method mergeColStats.
// this function will merge csOld into csNew.
public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) throws InvalidObjectException {
List<ColumnStatisticsObj> list = new ArrayList<>();
if (csNew.getStatsObj().size() != csOld.getStatsObjSize()) {
// Some of the columns' stats are missing
// This implies partition schema has changed. We will merge columns
// present in both, overwrite stats for columns absent in metastore and
// leave alone columns stats missing from stats task. This last case may
// leave stats in stale state. This will be addressed later.
LOG.debug("New ColumnStats size is {}, but old ColumnStats size is {}", csNew.getStatsObj().size(), csOld.getStatsObjSize());
}
// In this case, we have to find out which columns can be merged.
Map<String, ColumnStatisticsObj> map = new HashMap<>();
// We build a hash map from colName to object for old ColumnStats.
for (ColumnStatisticsObj obj : csOld.getStatsObj()) {
map.put(obj.getColName(), obj);
}
for (int index = 0; index < csNew.getStatsObj().size(); index++) {
ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index);
ColumnStatisticsObj statsObjOld = map.get(statsObjNew.getColName());
if (statsObjOld != null) {
// column stats is still accurate.
assert (statsObjNew.getStatsData().getSetField() == statsObjOld.getStatsData().getSetField());
// If statsObjOld is found, we can merge.
ColumnStatsMerger merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew, statsObjOld);
merger.merge(statsObjNew, statsObjOld);
}
// If statsObjOld is not found, we just use statsObjNew as it is accurate.
list.add(statsObjNew);
}
// in all the other cases, we can not merge
csNew.setStatsObj(list);
}
Aggregations