use of org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest in project hive by apache.
the class TestHiveMetaStoreStatsMerge method testStatsMerge.
public void testStatsMerge() throws Exception {
int listSize = 0;
List<ListenerEvent> notifyList = DummyListener.notifyList;
assertEquals(notifyList.size(), listSize);
msc.createDatabase(db);
listSize++;
assertEquals(listSize, notifyList.size());
CreateDatabaseEvent dbEvent = (CreateDatabaseEvent) (notifyList.get(listSize - 1));
assert dbEvent.getStatus();
msc.createTable(table);
listSize++;
assertEquals(notifyList.size(), listSize);
CreateTableEvent tblEvent = (CreateTableEvent) (notifyList.get(listSize - 1));
assert tblEvent.getStatus();
table = msc.getTable(dbName, tblName);
ColumnStatistics cs = new ColumnStatistics();
ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tblName);
cs.setStatsDesc(desc);
ColumnStatisticsObj obj = new ColumnStatisticsObj();
obj.setColName("a");
obj.setColType("string");
ColumnStatisticsData data = new ColumnStatisticsData();
StringColumnStatsData scsd = new StringColumnStatsData();
scsd.setAvgColLen(10);
scsd.setMaxColLen(20);
scsd.setNumNulls(30);
scsd.setNumDVs(123);
scsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}");
data.setStringStats(scsd);
obj.setStatsData(data);
cs.addToStatsObj(obj);
List<ColumnStatistics> colStats = new ArrayList<>();
colStats.add(cs);
SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
msc.setPartitionColumnStatistics(request);
List<String> colNames = new ArrayList<>();
colNames.add("a");
StringColumnStatsData getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0).getStatsData().getStringStats();
assertEquals(getScsd.getNumDVs(), 123);
cs = new ColumnStatistics();
scsd = new StringColumnStatsData();
scsd.setAvgColLen(20);
scsd.setMaxColLen(5);
scsd.setNumNulls(70);
scsd.setNumDVs(456);
scsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}");
data.setStringStats(scsd);
obj.setStatsData(data);
cs.addToStatsObj(obj);
request = new SetPartitionsStatsRequest(colStats);
request.setNeedMerge(true);
msc.setPartitionColumnStatistics(request);
getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0).getStatsData().getStringStats();
assertEquals(getScsd.getAvgColLen(), 20.0);
assertEquals(getScsd.getMaxColLen(), 20);
assertEquals(getScsd.getNumNulls(), 100);
// since metastore is ObjectStore, we use the max function to merge.
assertEquals(getScsd.getNumDVs(), 456);
}
use of org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest in project hive by apache.
the class ColumnStatsTask method persistColumnStats.
private int persistColumnStats(Hive db) throws HiveException, MetaException, IOException {
// Construct a column statistics object from the result
List<ColumnStatistics> colStats = constructColumnStatsFromPackedRows(db);
// Persist the column statistics object to the metastore
// Note, this function is shared for both table and partition column stats.
SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
if (work.getColStats() != null && work.getColStats().getNumBitVector() > 0) {
request.setNeedMerge(true);
}
db.setPartitionColumnStatistics(request);
return 0;
}
use of org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest in project hive by apache.
the class ColStatsProcessor method persistColumnStats.
public int persistColumnStats(Hive db, Table tbl) throws HiveException, MetaException, IOException {
// Construct a column statistics object from the result
long writeId = -1;
ValidWriteIdList validWriteIdList = null;
HiveTxnManager txnMgr = AcidUtils.isTransactionalTable(tbl) ? SessionState.get().getTxnMgr() : null;
if (txnMgr != null) {
writeId = txnMgr.getAllocatedTableWriteId(tbl.getDbName(), tbl.getTableName());
validWriteIdList = AcidUtils.getTableValidWriteIdList(conf, AcidUtils.getFullTableName(tbl.getDbName(), tbl.getTableName()));
}
boolean done = false;
long maxNumStats = conf.getLongVar(HiveConf.ConfVars.HIVE_STATS_MAX_NUM_STATS);
while (!done) {
List<ColumnStatistics> colStats = new ArrayList<>();
long start = System.currentTimeMillis();
done = constructColumnStatsFromPackedRows(tbl, colStats, maxNumStats);
long end = System.currentTimeMillis();
LOG.info("Time taken to build " + colStats.size() + " stats desc : " + ((end - start) / 1000F) + " seconds.");
// Note, this function is shared for both table and partition column stats.
if (colStats.isEmpty()) {
continue;
}
SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats, Constants.HIVE_ENGINE);
request.setNeedMerge(colStatDesc.isNeedMerge());
if (txnMgr != null) {
request.setWriteId(writeId);
if (validWriteIdList != null) {
request.setValidWriteIdList(validWriteIdList.toString());
}
}
start = System.currentTimeMillis();
db.setPartitionColumnStatistics(request);
end = System.currentTimeMillis();
LOG.info("Time taken to update " + colStats.size() + " stats : " + ((end - start) / 1000F) + " seconds.");
}
return 0;
}
use of org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest in project hive by apache.
the class TestStats method createMetadata.
private List<String> createMetadata(String catName, String dbName, String tableName, String partKey, List<String> partVals, Map<String, Column> colMap) throws TException {
if (!DEFAULT_CATALOG_NAME.equals(catName) && !NO_CAT.equals(catName)) {
Catalog cat = new CatalogBuilder().setName(catName).setLocation(MetaStoreTestUtils.getTestWarehouseDir(catName)).build();
client.createCatalog(cat);
}
Database db;
if (!DEFAULT_DATABASE_NAME.equals(dbName)) {
DatabaseBuilder dbBuilder = new DatabaseBuilder().setName(dbName);
if (!NO_CAT.equals(catName))
dbBuilder.setCatalogName(catName);
db = dbBuilder.create(client, conf);
} else {
db = client.getDatabase(DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME);
}
TableBuilder tb = new TableBuilder().inDb(db).setTableName(tableName);
for (Column col : colMap.values()) tb.addCol(col.colName, col.colType);
if (partKey != null) {
assert partVals != null && !partVals.isEmpty() : "Must provide partition values for partitioned table";
tb.addPartCol(partKey, ColumnType.STRING_TYPE_NAME);
}
Table table = tb.create(client, conf);
if (partKey != null) {
for (String partVal : partVals) {
new PartitionBuilder().inTable(table).addValue(partVal).addToTable(client, conf);
}
}
SetPartitionsStatsRequest rqst = new SetPartitionsStatsRequest();
List<String> partNames = new ArrayList<>();
if (partKey == null) {
rqst.addToColStats(buildStatsForOneTableOrPartition(catName, dbName, tableName, null, colMap.values()));
} else {
for (String partVal : partVals) {
String partName = partKey + "=" + partVal;
rqst.addToColStats(buildStatsForOneTableOrPartition(catName, dbName, tableName, partName, colMap.values()));
partNames.add(partName);
}
}
rqst.setEngine(ENGINE);
client.setPartitionColumnStatistics(rqst);
return partNames;
}
use of org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest in project hive by apache.
the class TestPartitionStat method updatePartColStat.
private List<String> updatePartColStat(Map<List<String>, ColumnStatisticsData> partitionStats) throws Exception {
SetPartitionsStatsRequest rqst = new SetPartitionsStatsRequest();
rqst.setEngine(HIVE_ENGINE);
List<String> pNameList = new ArrayList<>();
for (Map.Entry entry : partitionStats.entrySet()) {
ColumnStatistics colStats = createPartColStats((List<String>) entry.getKey(), (ColumnStatisticsData) entry.getValue());
String pName = FileUtils.makePartName(Collections.singletonList(PART_COL_NAME), (List<String>) entry.getKey());
rqst.addToColStats(colStats);
pNameList.add(pName);
}
client.setPartitionColumnStatistics(rqst);
return pNameList;
}
Aggregations