use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class MetaStoreDirectSql method getPartitionStats.
public List<ColumnStatistics> getPartitionStats(final String dbName, final String tableName, final List<String> partNames, List<String> colNames) throws MetaException {
if (colNames.isEmpty() || partNames.isEmpty()) {
return Lists.newArrayList();
}
final boolean doTrace = LOG.isDebugEnabled();
final String queryText0 = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from " + " \"PART_COL_STATS\" where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\"" + " in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by \"PARTITION_NAME\"";
Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
public List<Object[]> run(final List<String> inputColNames) throws MetaException {
Batchable<String, Object[]> b2 = new Batchable<String, Object[]>() {
public List<Object[]> run(List<String> inputPartNames) throws MetaException {
String queryText = String.format(queryText0, makeParams(inputColNames.size()), makeParams(inputPartNames.size()));
long start = doTrace ? System.nanoTime() : 0;
Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
Object qResult = executeWithArray(query, prepareParams(dbName, tableName, inputPartNames, inputColNames), queryText);
timingTrace(doTrace, queryText0, start, (doTrace ? System.nanoTime() : 0));
if (qResult == null) {
query.closeAll();
return Lists.newArrayList();
}
addQueryAfterUse(query);
return ensureList(qResult);
}
};
try {
return runBatched(partNames, b2);
} finally {
addQueryAfterUse(b2);
}
}
};
List<Object[]> list = runBatched(colNames, b);
List<ColumnStatistics> result = new ArrayList<ColumnStatistics>(Math.min(list.size(), partNames.size()));
String lastPartName = null;
int from = 0;
for (int i = 0; i <= list.size(); ++i) {
boolean isLast = i == list.size();
String partName = isLast ? null : (String) list.get(i)[0];
if (!isLast && partName.equals(lastPartName)) {
continue;
} else if (from != i) {
ColumnStatisticsDesc csd = new ColumnStatisticsDesc(false, dbName, tableName);
csd.setPartName(lastPartName);
result.add(makeColumnStats(list.subList(from, i), csd, 1));
}
lastPartName = partName;
from = i;
Deadline.checkTimeout();
}
b.closeAllQueries();
return result;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class TestHiveMetaStoreStatsMerge method testStatsMerge.
public void testStatsMerge() throws Exception {
int listSize = 0;
List<ListenerEvent> notifyList = DummyListener.notifyList;
assertEquals(notifyList.size(), listSize);
msc.createDatabase(db);
listSize++;
assertEquals(listSize, notifyList.size());
CreateDatabaseEvent dbEvent = (CreateDatabaseEvent) (notifyList.get(listSize - 1));
assert dbEvent.getStatus();
msc.createTable(table);
listSize++;
assertEquals(notifyList.size(), listSize);
CreateTableEvent tblEvent = (CreateTableEvent) (notifyList.get(listSize - 1));
assert tblEvent.getStatus();
table = msc.getTable(dbName, tblName);
ColumnStatistics cs = new ColumnStatistics();
ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tblName);
cs.setStatsDesc(desc);
ColumnStatisticsObj obj = new ColumnStatisticsObj();
obj.setColName("a");
obj.setColType("string");
ColumnStatisticsData data = new ColumnStatisticsData();
StringColumnStatsData scsd = new StringColumnStatsData();
scsd.setAvgColLen(10);
scsd.setMaxColLen(20);
scsd.setNumNulls(30);
scsd.setNumDVs(123);
scsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}");
data.setStringStats(scsd);
obj.setStatsData(data);
cs.addToStatsObj(obj);
List<ColumnStatistics> colStats = new ArrayList<>();
colStats.add(cs);
SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
msc.setPartitionColumnStatistics(request);
List<String> colNames = new ArrayList<>();
colNames.add("a");
StringColumnStatsData getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0).getStatsData().getStringStats();
assertEquals(getScsd.getNumDVs(), 123);
cs = new ColumnStatistics();
scsd = new StringColumnStatsData();
scsd.setAvgColLen(20);
scsd.setMaxColLen(5);
scsd.setNumNulls(70);
scsd.setNumDVs(456);
scsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}");
data.setStringStats(scsd);
obj.setStatsData(data);
cs.addToStatsObj(obj);
request = new SetPartitionsStatsRequest(colStats);
request.setNeedMerge(true);
msc.setPartitionColumnStatistics(request);
getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0).getStatsData().getStringStats();
assertEquals(getScsd.getAvgColLen(), 20.0);
assertEquals(getScsd.getMaxColLen(), 20);
assertEquals(getScsd.getNumNulls(), 100);
// since metastore is ObjectStore, we use the max function to merge.
assertEquals(getScsd.getNumDVs(), 456);
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class HBaseReadWrite method buildColStats.
private ColumnStatistics buildColStats(byte[] key, boolean fromTable) throws IOException {
// We initialize this late so that we don't create extras in the case of
// partitions with no stats
ColumnStatistics colStats = new ColumnStatistics();
ColumnStatisticsDesc csd = new ColumnStatisticsDesc();
// If this is a table key, parse it as one
List<String> reconstructedKey;
if (fromTable) {
reconstructedKey = Arrays.asList(HBaseUtils.deserializeKey(key));
csd.setIsTblLevel(true);
} else {
reconstructedKey = HBaseUtils.deserializePartitionKey(key, this);
csd.setIsTblLevel(false);
}
csd.setDbName(reconstructedKey.get(0));
csd.setTableName(reconstructedKey.get(1));
if (!fromTable) {
// Build the part name, for which we need the table
Table table = getTable(reconstructedKey.get(0), reconstructedKey.get(1));
if (table == null) {
throw new RuntimeException("Unable to find table " + reconstructedKey.get(0) + "." + reconstructedKey.get(1) + " even though I have a partition for it!");
}
csd.setPartName(HBaseStore.buildExternalPartName(table, reconstructedKey.subList(2, reconstructedKey.size())));
}
colStats.setStatsDesc(csd);
return colStats;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class TestHBaseStoreBitVector method doubleTableStatistics.
@Test
public void doubleTableStatistics() throws Exception {
createMockTable(DOUBLE_COL, DOUBLE_TYPE);
ColumnStatistics stats = new ColumnStatistics();
// Get a default ColumnStatisticsDesc for table level stats
ColumnStatisticsDesc desc = getMockTblColStatsDesc();
stats.setStatsDesc(desc);
// Get one of the pre-created ColumnStatisticsObj
ColumnStatisticsObj obj = doubleColStatsObjs.get(0);
DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats();
// Add to DB
stats.addToStatsObj(obj);
store.updateTableColumnStatistics(stats);
// Get from DB
ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DOUBLE_COL));
// Compare ColumnStatisticsDesc
Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
// Compare ColumnStatisticsObj
Assert.assertEquals(1, statsFromDB.getStatsObjSize());
ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
// Compare ColumnStatisticsData
Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField());
// Compare DoubleColumnStatsData
DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats();
Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01);
Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01);
Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls());
Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs());
Assert.assertEquals(doubleData.getBitVectors(), doubleDataFromDB.getBitVectors());
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class TestHBaseStoreBitVector method longTableStatistics.
@Test
public void longTableStatistics() throws Exception {
createMockTable(LONG_COL, LONG_TYPE);
ColumnStatistics stats = new ColumnStatistics();
// Get a default ColumnStatisticsDesc for table level stats
ColumnStatisticsDesc desc = getMockTblColStatsDesc();
stats.setStatsDesc(desc);
// Get one of the pre-created ColumnStatisticsObj
ColumnStatisticsObj obj = longColStatsObjs.get(0);
LongColumnStatsData longData = obj.getStatsData().getLongStats();
// Add to DB
stats.addToStatsObj(obj);
store.updateTableColumnStatistics(stats);
// Get from DB
ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(LONG_COL));
// Compare ColumnStatisticsDesc
Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
// Compare ColumnStatisticsObj
Assert.assertEquals(1, statsFromDB.getStatsObjSize());
ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
// Compare ColumnStatisticsData
Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField());
// Compare LongColumnStatsData
LongColumnStatsData longDataFromDB = dataFromDB.getLongStats();
Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue());
Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue());
Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls());
Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs());
Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors());
}
Aggregations