use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class MetaStoreDirectSql method getTableStats.
/**
* Retrieve the column statistics for the specified columns of the table. NULL
* is returned if the columns are not provided.
* @param catName the catalog name of the table
* @param dbName the database name of the table
* @param tableName the table name
* @param colNames the list of the column names
* @param engine engine making the request
* @return the column statistics for the specified columns
* @throws MetaException
*/
public ColumnStatistics getTableStats(final String catName, final String dbName, final String tableName, List<String> colNames, String engine, boolean enableBitVector) throws MetaException {
if (colNames == null || colNames.isEmpty()) {
return null;
}
final boolean doTrace = LOG.isDebugEnabled();
final String queryText0 = "select " + getStatsList(enableBitVector) + " from " + TAB_COL_STATS + " where \"CAT_NAME\" = ? and \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"ENGINE\" = ? and \"COLUMN_NAME\" in (";
Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
@Override
public List<Object[]> run(List<String> input) throws MetaException {
String queryText = queryText0 + makeParams(input.size()) + ")";
Object[] params = new Object[input.size() + 4];
params[0] = catName;
params[1] = dbName;
params[2] = tableName;
params[3] = engine;
for (int i = 0; i < input.size(); ++i) {
params[i + 4] = input.get(i);
}
long start = doTrace ? System.nanoTime() : 0;
Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
try {
Object qResult = executeWithArray(query, params, queryText);
MetastoreDirectSqlUtils.timingTrace(doTrace, queryText0 + "...)", start, (doTrace ? System.nanoTime() : 0));
if (qResult == null) {
return null;
}
return MetastoreDirectSqlUtils.ensureList(qResult);
} finally {
addQueryAfterUse(query);
}
}
};
List<Object[]> list;
try {
list = Batchable.runBatched(batchSize, colNames, b);
if (list != null) {
list = new ArrayList<>(list);
}
} finally {
b.closeAllQueries();
}
if (list == null || list.isEmpty()) {
return null;
}
ColumnStatisticsDesc csd = new ColumnStatisticsDesc(true, dbName, tableName);
csd.setCatName(catName);
ColumnStatistics result = makeColumnStats(list, csd, 0, engine);
return result;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class StatObjectConverter method getPartitionColumnStatisticsDesc.
public static ColumnStatisticsDesc getPartitionColumnStatisticsDesc(MPartitionColumnStatistics mStatsObj) {
ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
statsDesc.setIsTblLevel(false);
statsDesc.setCatName(mStatsObj.getCatName());
statsDesc.setDbName(mStatsObj.getDbName());
statsDesc.setTableName(mStatsObj.getTableName());
statsDesc.setPartName(mStatsObj.getPartitionName());
statsDesc.setLastAnalyzed(mStatsObj.getLastAnalyzed());
return statsDesc;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class SharedCache method getTableColStatsFromCache.
public ColumnStatistics getTableColStatsFromCache(String catName, String dbName, String tblName, List<String> colNames, String validWriteIds, boolean areTxnStatsSupported) throws MetaException {
try {
cacheLock.readLock().lock();
TableWrapper tblWrapper = tableCache.getIfPresent(CacheUtils.buildTableKey(catName, dbName, tblName));
if (tblWrapper == null) {
LOG.info("Table " + tblName + " is missing from cache.");
return null;
}
ColumnStatisticsDesc csd = new ColumnStatisticsDesc(true, dbName, tblName);
return tblWrapper.getCachedTableColStats(csd, colNames, validWriteIds, areTxnStatsSupported);
} finally {
cacheLock.readLock().unlock();
}
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class TestObjectStore method createPartitionedTable.
/**
* Creates DB1 database, TABLE1 table with 3 partitions.
* @param withPrivileges Should we create privileges as well
* @param withStatistics Should we create statitics as well
*/
private void createPartitionedTable(boolean withPrivileges, boolean withStatistics) throws Exception {
Database db1 = new DatabaseBuilder().setName(DB1).setDescription("description").setLocation("locationurl").build(conf);
try (AutoCloseable c = deadline()) {
objectStore.createDatabase(db1);
}
Table tbl1 = new TableBuilder().setDbName(DB1).setTableName(TABLE1).addCol("test_col1", "int").addCol("test_col2", "int").addPartCol("test_part_col", "int").addCol("test_bucket_col", "int", "test bucket col comment").addCol("test_skewed_col", "int", "test skewed col comment").addCol("test_sort_col", "int", "test sort col comment").build(conf);
try (AutoCloseable c = deadline()) {
objectStore.createTable(tbl1);
}
PrivilegeBag privilegeBag = new PrivilegeBag();
// Create partitions for the partitioned table
for (int i = 0; i < 3; i++) {
Partition part = new PartitionBuilder().inTable(tbl1).addValue("a" + i).addSerdeParam("serdeParam", "serdeParamValue").addStorageDescriptorParam("sdParam", "sdParamValue").addBucketCol("test_bucket_col").addSkewedColName("test_skewed_col").addSortCol("test_sort_col", 1).build(conf);
try (AutoCloseable c = deadline()) {
objectStore.addPartition(part);
}
if (withPrivileges) {
HiveObjectRef partitionReference = new HiveObjectRefBuilder().buildPartitionReference(part);
HiveObjectRef partitionColumnReference = new HiveObjectRefBuilder().buildPartitionColumnReference(tbl1, "test_part_col", part.getValues());
PrivilegeGrantInfo privilegeGrantInfo = new PrivilegeGrantInfoBuilder().setPrivilege("a").build();
HiveObjectPrivilege partitionPriv = new HiveObjectPrivilegeBuilder().setHiveObjectRef(partitionReference).setPrincipleName("a").setPrincipalType(PrincipalType.USER).setGrantInfo(privilegeGrantInfo).build();
privilegeBag.addToPrivileges(partitionPriv);
HiveObjectPrivilege partitionColPriv = new HiveObjectPrivilegeBuilder().setHiveObjectRef(partitionColumnReference).setPrincipleName("a").setPrincipalType(PrincipalType.USER).setGrantInfo(privilegeGrantInfo).build();
privilegeBag.addToPrivileges(partitionColPriv);
}
if (withStatistics) {
ColumnStatistics stats = new ColumnStatistics();
ColumnStatisticsDesc desc = new ColumnStatisticsDesc();
desc.setCatName(tbl1.getCatName());
desc.setDbName(tbl1.getDbName());
desc.setTableName(tbl1.getTableName());
desc.setPartName("test_part_col=a" + i);
stats.setStatsDesc(desc);
List<ColumnStatisticsObj> statsObjList = new ArrayList<>(1);
stats.setStatsObj(statsObjList);
stats.setEngine(ENGINE);
ColumnStatisticsData data = new ColumnStatisticsData();
LongColumnStatsData longStats = new LongColumnStatsData();
longStats.setNumNulls(1);
longStats.setNumDVs(2);
longStats.setLowValue(3);
longStats.setHighValue(4);
data.setLongStats(longStats);
ColumnStatisticsObj partStats = new ColumnStatisticsObj("test_part_col", "int", data);
statsObjList.add(partStats);
try (AutoCloseable c = deadline()) {
objectStore.updatePartitionColumnStatistics(stats, part.getValues(), null, -1);
}
}
}
if (withPrivileges) {
try (AutoCloseable c = deadline()) {
objectStore.grantPrivileges(privilegeBag);
}
}
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class TestHiveMetaStore method testColumnStatistics.
@Test
public void testColumnStatistics() throws Throwable {
String dbName = "columnstatstestdb";
String tblName = "tbl";
String typeName = "Person";
String tblOwner = "testowner";
int lastAccessed = 6796;
try {
cleanUp(dbName, tblName, typeName);
new DatabaseBuilder().setName(dbName).create(client, conf);
createTableForTestFilter(dbName, tblName, tblOwner, lastAccessed, true);
// Create a ColumnStatistics Obj
String[] colName = new String[] { "income", "name" };
double lowValue = 50000.21;
double highValue = 1200000.4525;
long numNulls = 3;
long numDVs = 22;
double avgColLen = 50.30;
long maxColLen = 102;
String[] colType = new String[] { "double", "string" };
boolean isTblLevel = true;
String partName = null;
List<ColumnStatisticsObj> statsObjs = new ArrayList<>();
ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
statsDesc.setDbName(dbName);
statsDesc.setTableName(tblName);
statsDesc.setIsTblLevel(isTblLevel);
statsDesc.setPartName(partName);
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
statsObj.setColName(colName[0]);
statsObj.setColType(colType[0]);
ColumnStatisticsData statsData = new ColumnStatisticsData();
DoubleColumnStatsData numericStats = new DoubleColumnStatsData();
statsData.setDoubleStats(numericStats);
statsData.getDoubleStats().setHighValue(highValue);
statsData.getDoubleStats().setLowValue(lowValue);
statsData.getDoubleStats().setNumDVs(numDVs);
statsData.getDoubleStats().setNumNulls(numNulls);
statsObj.setStatsData(statsData);
statsObjs.add(statsObj);
statsObj = new ColumnStatisticsObj();
statsObj.setColName(colName[1]);
statsObj.setColType(colType[1]);
statsData = new ColumnStatisticsData();
StringColumnStatsData stringStats = new StringColumnStatsData();
statsData.setStringStats(stringStats);
statsData.getStringStats().setAvgColLen(avgColLen);
statsData.getStringStats().setMaxColLen(maxColLen);
statsData.getStringStats().setNumDVs(numDVs);
statsData.getStringStats().setNumNulls(numNulls);
statsObj.setStatsData(statsData);
statsObjs.add(statsObj);
ColumnStatistics colStats = new ColumnStatistics();
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
colStats.setEngine(ENGINE);
// write stats objs persistently
client.updateTableColumnStatistics(colStats);
// retrieve the stats obj that was just written
ColumnStatisticsObj colStats2 = client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[0]), ENGINE).get(0);
// compare stats obj to ensure what we get is what we wrote
assertNotNull(colStats2);
assertEquals(colStats2.getColName(), colName[0]);
assertEquals(colStats2.getStatsData().getDoubleStats().getLowValue(), lowValue, 0.01);
assertEquals(colStats2.getStatsData().getDoubleStats().getHighValue(), highValue, 0.01);
assertEquals(colStats2.getStatsData().getDoubleStats().getNumNulls(), numNulls);
assertEquals(colStats2.getStatsData().getDoubleStats().getNumDVs(), numDVs);
// test delete column stats; if no col name is passed all column stats associated with the
// table is deleted
boolean status = client.deleteTableColumnStatistics(dbName, tblName, null, ENGINE);
assertTrue(status);
// try to query stats for a column for which stats doesn't exist
assertTrue(client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[1]), ENGINE).isEmpty());
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
// update table level column stats
client.updateTableColumnStatistics(colStats);
// query column stats for column whose stats were updated in the previous call
colStats2 = client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[0]), ENGINE).get(0);
// partition level column statistics test
// create a table with multiple partitions
cleanUp(dbName, tblName, typeName);
List<List<String>> values = new ArrayList<>();
values.add(makeVals("2008-07-01 14:13:12", "14"));
values.add(makeVals("2008-07-01 14:13:12", "15"));
values.add(makeVals("2008-07-02 14:13:12", "15"));
values.add(makeVals("2008-07-03 14:13:12", "151"));
createMultiPartitionTableSchema(dbName, tblName, typeName, values);
List<String> partitions = client.listPartitionNames(dbName, tblName, (short) -1);
partName = partitions.get(0);
isTblLevel = false;
// create a new columnstatistics desc to represent partition level column stats
statsDesc = new ColumnStatisticsDesc();
statsDesc.setDbName(dbName);
statsDesc.setTableName(tblName);
statsDesc.setPartName(partName);
statsDesc.setIsTblLevel(isTblLevel);
colStats = new ColumnStatistics();
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
colStats.setEngine(ENGINE);
client.updatePartitionColumnStatistics(colStats);
colStats2 = client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[1]), ENGINE).get(partName).get(0);
// compare stats obj to ensure what we get is what we wrote
assertNotNull(colStats2);
assertEquals(colStats.getStatsDesc().getPartName(), partName);
assertEquals(colStats2.getColName(), colName[1]);
assertEquals(colStats2.getStatsData().getStringStats().getMaxColLen(), maxColLen);
assertEquals(colStats2.getStatsData().getStringStats().getAvgColLen(), avgColLen, 0.01);
assertEquals(colStats2.getStatsData().getStringStats().getNumNulls(), numNulls);
assertEquals(colStats2.getStatsData().getStringStats().getNumDVs(), numDVs);
// test stats deletion at partition level
client.deletePartitionColumnStatistics(dbName, tblName, partName, colName[1], ENGINE);
colStats2 = client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[0]), ENGINE).get(partName).get(0);
// test get stats on a column for which stats doesn't exist
assertTrue(client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[1]), ENGINE).isEmpty());
} catch (Exception e) {
System.err.println(StringUtils.stringifyException(e));
System.err.println("testColumnStatistics() failed.");
throw e;
} finally {
cleanUp(dbName, tblName, typeName);
}
}
Aggregations