use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.
the class StatObjectConverter method getPartitionColumnStatisticsObj.
public static ColumnStatisticsObj getPartitionColumnStatisticsObj(MPartitionColumnStatistics mStatsObj, boolean enableBitVector) {
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
statsObj.setColType(mStatsObj.getColType());
statsObj.setColName(mStatsObj.getColName());
String colType = mStatsObj.getColType().toLowerCase();
ColumnStatisticsData colStatsData = new ColumnStatisticsData();
if (colType.equals("boolean")) {
BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
boolStats.setNumFalses(mStatsObj.getNumFalses());
boolStats.setNumTrues(mStatsObj.getNumTrues());
boolStats.setNumNulls(mStatsObj.getNumNulls());
colStatsData.setBooleanStats(boolStats);
} else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
stringStats.setNumNulls(mStatsObj.getNumNulls());
stringStats.setAvgColLen(mStatsObj.getAvgColLen());
stringStats.setMaxColLen(mStatsObj.getMaxColLen());
stringStats.setNumDVs(mStatsObj.getNumDVs());
stringStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
colStatsData.setStringStats(stringStats);
} else if (colType.equals("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
binaryStats.setNumNulls(mStatsObj.getNumNulls());
binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
colStatsData.setBinaryStats(binaryStats);
} else if (colType.equals("tinyint") || colType.equals("smallint") || colType.equals("int") || colType.equals("bigint")) {
LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setNumNulls(mStatsObj.getNumNulls());
if (mStatsObj.getLongHighValue() != null) {
longStats.setHighValue(mStatsObj.getLongHighValue());
}
if (mStatsObj.getLongLowValue() != null) {
longStats.setLowValue(mStatsObj.getLongLowValue());
}
longStats.setNumDVs(mStatsObj.getNumDVs());
longStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
colStatsData.setLongStats(longStats);
} else if (colType.equals("double") || colType.equals("float")) {
DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
doubleStats.setNumNulls(mStatsObj.getNumNulls());
if (mStatsObj.getDoubleHighValue() != null) {
doubleStats.setHighValue(mStatsObj.getDoubleHighValue());
}
if (mStatsObj.getDoubleLowValue() != null) {
doubleStats.setLowValue(mStatsObj.getDoubleLowValue());
}
doubleStats.setNumDVs(mStatsObj.getNumDVs());
doubleStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
colStatsData.setDoubleStats(doubleStats);
} else if (colType.startsWith("decimal")) {
DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
decimalStats.setNumNulls(mStatsObj.getNumNulls());
if (mStatsObj.getDecimalHighValue() != null) {
decimalStats.setHighValue(DecimalUtils.createThriftDecimal(mStatsObj.getDecimalHighValue()));
}
if (mStatsObj.getDecimalLowValue() != null) {
decimalStats.setLowValue(DecimalUtils.createThriftDecimal(mStatsObj.getDecimalLowValue()));
}
decimalStats.setNumDVs(mStatsObj.getNumDVs());
decimalStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
colStatsData.setDecimalStats(decimalStats);
} else if (colType.equals("date")) {
DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
dateStats.setNumNulls(mStatsObj.getNumNulls());
Long highValue = mStatsObj.getLongHighValue();
if (highValue != null) {
dateStats.setHighValue(new Date(highValue));
}
Long lowValue = mStatsObj.getLongLowValue();
if (lowValue != null) {
dateStats.setLowValue(new Date(lowValue));
}
dateStats.setNumDVs(mStatsObj.getNumDVs());
dateStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
colStatsData.setDateStats(dateStats);
} else if (colType.equals("timestamp")) {
TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector();
timestampStats.setNumNulls(mStatsObj.getNumNulls());
Long highValue = mStatsObj.getLongHighValue();
if (highValue != null) {
timestampStats.setHighValue(new Timestamp(highValue));
}
Long lowValue = mStatsObj.getLongLowValue();
if (lowValue != null) {
timestampStats.setLowValue(new Timestamp(lowValue));
}
timestampStats.setNumDVs(mStatsObj.getNumDVs());
timestampStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
colStatsData.setTimestampStats(timestampStats);
}
statsObj.setStatsData(colStatsData);
return statsObj;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.
the class SharedCache method populateTableInCache.
public boolean populateTableInCache(Table table, TableCacheObjects cacheObjects) {
String catName = StringUtils.normalizeIdentifier(table.getCatName());
String dbName = StringUtils.normalizeIdentifier(table.getDbName());
String tableName = StringUtils.normalizeIdentifier(table.getTableName());
SQLAllTableConstraints constraints = cacheObjects.getTableConstraints();
// 1. Don't add tables that were deleted while we were preparing list for prewarm
if (tablesDeletedDuringPrewarm.contains(CacheUtils.buildTableKey(catName, dbName, tableName))) {
return false;
}
TableWrapper tblWrapper = createTableWrapper(catName, dbName, tableName, table);
if (!table.isSetPartitionKeys() && (cacheObjects.getTableColStats() != null)) {
if (table.getPartitionKeys().isEmpty() && (cacheObjects.getTableColStats() != null)) {
return false;
}
} else {
if (cacheObjects.getPartitions() != null) {
// If the partitions were not added due to memory limit, return false
if (!tblWrapper.cachePartitions(cacheObjects.getPartitions(), this, true)) {
return false;
}
}
if (cacheObjects.getPartitionColStats() != null) {
for (ColumnStatistics cs : cacheObjects.getPartitionColStats()) {
List<String> partVal;
try {
partVal = Warehouse.makeValsFromName(cs.getStatsDesc().getPartName(), null);
List<ColumnStatisticsObj> colStats = cs.getStatsObj();
if (!tblWrapper.updatePartitionColStats(partVal, colStats)) {
return false;
}
} catch (MetaException e) {
LOG.debug("Unable to cache partition column stats for table: " + tableName, e);
}
}
}
tblWrapper.cacheAggrPartitionColStats(cacheObjects.getAggrStatsAllPartitions(), cacheObjects.getAggrStatsAllButDefaultPartition());
}
tblWrapper.setMemberCacheUpdated(MemberName.PARTITION_CACHE, false);
tblWrapper.setMemberCacheUpdated(MemberName.TABLE_COL_STATS_CACHE, false);
tblWrapper.setMemberCacheUpdated(MemberName.PARTITION_COL_STATS_CACHE, false);
tblWrapper.setMemberCacheUpdated(MemberName.AGGR_COL_STATS_CACHE, false);
if (tblWrapper.cacheConstraints(constraints, true)) {
tblWrapper.setMemberCacheUpdated(MemberName.PRIMARY_KEY_CACHE, false);
tblWrapper.setMemberCacheUpdated(MemberName.FOREIGN_KEY_CACHE, false);
tblWrapper.setMemberCacheUpdated(MemberName.NOTNULL_CONSTRAINT_CACHE, false);
tblWrapper.setMemberCacheUpdated(MemberName.UNIQUE_CONSTRAINT_CACHE, false);
tblWrapper.setMemberCacheUpdated(MemberName.DEFAULT_CONSTRAINT_CACHE, false);
tblWrapper.setMemberCacheUpdated(MemberName.CHECK_CONSTRAINT_CACHE, false);
} else {
return false;
}
try {
cacheLock.writeLock().lock();
// 2. Skip overwriting existing table object
// (which is present because it was added after prewarm started)
tableCache.put(CacheUtils.buildTableKey(catName, dbName, tableName), tblWrapper);
return true;
} finally {
cacheLock.writeLock().unlock();
}
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.
the class MetaStoreServerUtils method mergeColStats.
// this function will merge csOld into csNew.
public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) throws InvalidObjectException {
List<ColumnStatisticsObj> list = new ArrayList<>();
if (csNew.getStatsObj().size() != csOld.getStatsObjSize()) {
// Some of the columns' stats are missing
// This implies partition schema has changed. We will merge columns
// present in both, overwrite stats for columns absent in metastore and
// leave alone columns stats missing from stats task. This last case may
// leave stats in stale state. This will be addressed later.
LOG.debug("New ColumnStats size is {}, but old ColumnStats size is {}", csNew.getStatsObj().size(), csOld.getStatsObjSize());
}
// In this case, we have to find out which columns can be merged.
Map<String, ColumnStatisticsObj> map = new HashMap<>();
// We build a hash map from colName to object for old ColumnStats.
for (ColumnStatisticsObj obj : csOld.getStatsObj()) {
map.put(obj.getColName(), obj);
}
for (int index = 0; index < csNew.getStatsObj().size(); index++) {
ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index);
ColumnStatisticsObj statsObjOld = map.get(statsObjNew.getColName());
if (statsObjOld != null) {
// column stats is still accurate.
assert (statsObjNew.getStatsData().getSetField() == statsObjOld.getStatsData().getSetField());
// If statsObjOld is found, we can merge.
ColumnStatsMerger merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew, statsObjOld);
merger.merge(statsObjNew, statsObjOld);
}
// If statsObjOld is not found, we just use statsObjNew as it is accurate.
list.add(statsObjNew);
}
// in all the other cases, we can not merge
csNew.setStatsObj(list);
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.
the class MetaStoreServerUtils method aggrPartitionStats.
public static List<ColumnStatisticsObj> aggrPartitionStats(Map<ColumnStatsAggregator, List<ColStatsObjWithSourceInfo>> colStatsMap, final List<String> partNames, final boolean areAllPartsFound, final boolean useDensityFunctionForNDVEstimation, final double ndvTuner) throws MetaException {
List<ColumnStatisticsObj> aggrColStatObjs = new ArrayList<ColumnStatisticsObj>();
int numProcessors = Runtime.getRuntime().availableProcessors();
final ExecutorService pool = Executors.newFixedThreadPool(Math.min(colStatsMap.size(), numProcessors), new ThreadFactoryBuilder().setDaemon(true).setNameFormat("aggr-col-stats-%d").build());
final List<Future<ColumnStatisticsObj>> futures = Lists.newLinkedList();
LOG.debug("Aggregating column stats. Threads used: {}", Math.min(colStatsMap.size(), numProcessors));
long start = System.currentTimeMillis();
for (final Map.Entry<ColumnStatsAggregator, List<ColStatsObjWithSourceInfo>> entry : colStatsMap.entrySet()) {
futures.add(pool.submit(new Callable<ColumnStatisticsObj>() {
@Override
public ColumnStatisticsObj call() throws MetaException {
List<ColStatsObjWithSourceInfo> colStatWithSourceInfo = entry.getValue();
ColumnStatsAggregator aggregator = entry.getKey();
try {
ColumnStatisticsObj statsObj = aggregator.aggregate(colStatWithSourceInfo, partNames, areAllPartsFound);
return statsObj;
} catch (MetaException e) {
LOG.debug(e.getMessage());
throw e;
}
}
}));
}
pool.shutdown();
if (!futures.isEmpty()) {
for (Future<ColumnStatisticsObj> future : futures) {
try {
if (future.get() != null) {
aggrColStatObjs.add(future.get());
}
} catch (InterruptedException | ExecutionException e) {
LOG.debug(e.getMessage());
pool.shutdownNow();
throw new MetaException(e.toString());
}
}
}
LOG.debug("Time for aggr col stats in seconds: {} Threads used: {}", ((System.currentTimeMillis() - (double) start)) / 1000, Math.min(colStatsMap.size(), numProcessors));
return aggrColStatObjs;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.
the class DateColumnStatsMergerTest method testMergeNullMinMaxValues.
@Test
public void testMergeNullMinMaxValues() {
ColumnStatisticsObj old = new ColumnStatisticsObj();
createData(old, null, null);
merger.merge(old, old);
Assert.assertNull(old.getStatsData().getDateStats().getLowValue());
Assert.assertNull(old.getStatsData().getDateStats().getHighValue());
}
Aggregations