use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.
the class TestCachedStore method testAggrStatsRepeatedRead.
// @Test
public void testAggrStatsRepeatedRead() throws Exception {
Configuration conf = MetastoreConf.newMetastoreConf();
MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.HIVE_IN_TEST, true);
MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CACHED_RAW_STORE_MAX_CACHE_MEMORY, "-1Kb");
MetaStoreTestUtils.setConfForStandloneMode(conf);
CachedStore cachedStore = new CachedStore();
CachedStore.clearSharedCache();
cachedStore.setConfForTest(conf);
ObjectStore objectStore = (ObjectStore) cachedStore.getRawStore();
String dbName = "testTableColStatsOps";
String tblName = "tbl";
String colName = "f1";
Database db = new DatabaseBuilder().setName(dbName).setLocation("some_location").build(conf);
cachedStore.createDatabase(db);
List<FieldSchema> cols = new ArrayList<>();
cols.add(new FieldSchema(colName, "int", null));
List<FieldSchema> partCols = new ArrayList<>();
partCols.add(new FieldSchema("col", "int", null));
StorageDescriptor sd = new StorageDescriptor(cols, null, "input", "output", false, 0, new SerDeInfo("serde", "seriallib", new HashMap<>()), null, null, null);
Table tbl = new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<>(), null, null, TableType.MANAGED_TABLE.toString());
tbl.setCatName(DEFAULT_CATALOG_NAME);
cachedStore.createTable(tbl);
List<String> partVals1 = new ArrayList<>();
partVals1.add("1");
List<String> partVals2 = new ArrayList<>();
partVals2.add("2");
Partition ptn1 = new Partition(partVals1, dbName, tblName, 0, 0, sd, new HashMap<>());
ptn1.setCatName(DEFAULT_CATALOG_NAME);
cachedStore.addPartition(ptn1);
Partition ptn2 = new Partition(partVals2, dbName, tblName, 0, 0, sd, new HashMap<>());
ptn2.setCatName(DEFAULT_CATALOG_NAME);
cachedStore.addPartition(ptn2);
ColumnStatistics stats = new ColumnStatistics();
ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName);
statsDesc.setPartName("col");
List<ColumnStatisticsObj> colStatObjs = new ArrayList<>();
ColumnStatisticsData data = new ColumnStatisticsData();
ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setLowValue(0);
longStats.setHighValue(100);
longStats.setNumNulls(50);
longStats.setNumDVs(30);
data.setLongStats(longStats);
colStatObjs.add(colStats);
stats.setStatsDesc(statsDesc);
stats.setStatsObj(colStatObjs);
stats.setEngine(CacheUtils.HIVE_ENGINE);
cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1, null, -1);
cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2, null, -1);
List<String> colNames = new ArrayList<>();
colNames.add(colName);
List<String> aggrPartVals = new ArrayList<>();
aggrPartVals.add("1");
aggrPartVals.add("2");
AggrStats aggrStats = cachedStore.get_aggr_stats_for(DEFAULT_CATALOG_NAME, dbName, tblName, aggrPartVals, colNames, CacheUtils.HIVE_ENGINE);
Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
aggrStats = cachedStore.get_aggr_stats_for(DEFAULT_CATALOG_NAME, dbName, tblName, aggrPartVals, colNames, CacheUtils.HIVE_ENGINE);
Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
objectStore.deletePartitionColumnStatistics(DEFAULT_CATALOG_NAME, db.getName(), tbl.getTableName(), Warehouse.makePartName(tbl.getPartitionKeys(), partVals1), partVals1, colName, CacheUtils.HIVE_ENGINE);
objectStore.deletePartitionColumnStatistics(DEFAULT_CATALOG_NAME, db.getName(), tbl.getTableName(), Warehouse.makePartName(tbl.getPartitionKeys(), partVals2), partVals2, colName, CacheUtils.HIVE_ENGINE);
objectStore.dropPartition(DEFAULT_CATALOG_NAME, db.getName(), tbl.getTableName(), partVals1);
objectStore.dropPartition(DEFAULT_CATALOG_NAME, db.getName(), tbl.getTableName(), partVals2);
objectStore.dropTable(DEFAULT_CATALOG_NAME, db.getName(), tbl.getTableName());
objectStore.dropDatabase(DEFAULT_CATALOG_NAME, db.getName());
cachedStore.shutdown();
}
use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.
the class TestCachedStore method createLongStats.
private ColumnStatisticsData createLongStats(long numNulls, long numDVs, Long low, Long high) {
ColumnStatisticsData data = new ColumnStatisticsData();
LongColumnStatsDataInspector stats = new LongColumnStatsDataInspector();
if (low != null) {
stats.setLowValue(low.longValue());
}
if (high != null) {
stats.setHighValue(high.longValue());
}
stats.setNumNulls(numNulls);
stats.setNumDVs(numDVs);
data.setLongStats(stats);
return data;
}
use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.
the class ColumnStatisticsObjTranslator method unpackPrimitiveObject.
private static void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException {
if (o == null) {
return;
}
// First infer the type of object
if (fieldName.equals("columntype")) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
ColumnStatisticsData statsData = new ColumnStatisticsData();
if (s.equalsIgnoreCase("long")) {
LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
statsData.setLongStats(longStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("double")) {
DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
statsData.setDoubleStats(doubleStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("string")) {
StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
statsData.setStringStats(stringStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("boolean")) {
BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
statsData.setBooleanStats(booleanStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
statsData.setBinaryStats(binaryStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("decimal")) {
DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
statsData.setDecimalStats(decimalStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase("date")) {
DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
statsData.setDateStats(dateStats);
statsObj.setStatsData(statsData);
}
} else {
// invoke the right unpack method depending on data type of the column
if (statsObj.getStatsData().isSetBooleanStats()) {
unpackBooleanStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetLongStats()) {
unpackLongStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetDoubleStats()) {
unpackDoubleStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetStringStats()) {
unpackStringStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetBinaryStats()) {
unpackBinaryStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetDecimalStats()) {
unpackDecimalStats(oi, o, fieldName, statsObj);
} else if (statsObj.getStatsData().isSetDateStats()) {
unpackDateStats(oi, o, fieldName, statsObj);
}
}
}
use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.
the class TestCachedStore method testTableColStatsOps.
// @Test
public void testTableColStatsOps() throws Exception {
// Add a db via ObjectStore
String dbName = "testTableColStatsOps";
String dbOwner = "user1";
Database db = createTestDb(dbName, dbOwner);
objectStore.createDatabase(db);
db = objectStore.getDatabase(dbName);
// Add a table via ObjectStore
final String tblName = "tbl";
final String tblOwner = "user1";
final FieldSchema col1 = new FieldSchema("col1", "int", "integer column");
// Stats values for col1
long col1LowVal = 5;
long col1HighVal = 500;
long col1Nulls = 10;
long col1DV = 20;
final FieldSchema col2 = new FieldSchema("col2", "string", "string column");
// Stats values for col2
long col2MaxColLen = 100;
double col2AvgColLen = 45.5;
long col2Nulls = 5;
long col2DV = 40;
final FieldSchema col3 = new FieldSchema("col3", "boolean", "boolean column");
// Stats values for col3
long col3NumTrues = 100;
long col3NumFalses = 30;
long col3Nulls = 10;
final List<FieldSchema> cols = new ArrayList<>();
cols.add(col1);
cols.add(col2);
cols.add(col3);
FieldSchema ptnCol1 = new FieldSchema("part1", "string", "string partition column");
List<FieldSchema> ptnCols = new ArrayList<FieldSchema>();
ptnCols.add(ptnCol1);
Table tbl = createTestTbl(dbName, tblName, tblOwner, cols, ptnCols);
objectStore.createTable(tbl);
tbl = objectStore.getTable(dbName, tblName);
// Add ColumnStatistics for tbl to metastore DB via ObjectStore
ColumnStatistics stats = new ColumnStatistics();
ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName);
List<ColumnStatisticsObj> colStatObjs = new ArrayList<>();
// Col1
ColumnStatisticsData data1 = new ColumnStatisticsData();
ColumnStatisticsObj col1Stats = new ColumnStatisticsObj(col1.getName(), col1.getType(), data1);
LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setLowValue(col1LowVal);
longStats.setHighValue(col1HighVal);
longStats.setNumNulls(col1Nulls);
longStats.setNumDVs(col1DV);
data1.setLongStats(longStats);
colStatObjs.add(col1Stats);
// Col2
ColumnStatisticsData data2 = new ColumnStatisticsData();
ColumnStatisticsObj col2Stats = new ColumnStatisticsObj(col2.getName(), col2.getType(), data2);
StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
stringStats.setMaxColLen(col2MaxColLen);
stringStats.setAvgColLen(col2AvgColLen);
stringStats.setNumNulls(col2Nulls);
stringStats.setNumDVs(col2DV);
data2.setStringStats(stringStats);
colStatObjs.add(col2Stats);
// Col3
ColumnStatisticsData data3 = new ColumnStatisticsData();
ColumnStatisticsObj col3Stats = new ColumnStatisticsObj(col3.getName(), col3.getType(), data3);
BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
boolStats.setNumTrues(col3NumTrues);
boolStats.setNumFalses(col3NumFalses);
boolStats.setNumNulls(col3Nulls);
data3.setBooleanStats(boolStats);
colStatObjs.add(col3Stats);
stats.setStatsDesc(statsDesc);
stats.setStatsObj(colStatObjs);
// Save to DB
objectStore.updateTableColumnStatistics(stats);
// Prewarm CachedStore
CachedStore.setCachePrewarmedState(false);
CachedStore.prewarm(objectStore);
// Read table stats via CachedStore
ColumnStatistics newStats = cachedStore.getTableColumnStatistics(dbName, tblName, Arrays.asList(col1.getName(), col2.getName(), col3.getName()));
Assert.assertEquals(stats, newStats);
// Clean up
objectStore.dropTable(dbName, tblName);
objectStore.dropDatabase(dbName);
sharedCache.getDatabaseCache().clear();
sharedCache.getTableCache().clear();
sharedCache.getSdCache().clear();
}
use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.
the class ColumnStatisticsObjTranslator method unpackPrimitiveObject.
private static void unpackPrimitiveObject(ObjectInspector oi, Object o, ColumnStatsField csf, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException {
if (o == null) {
return;
}
// First infer the type of object
if (csf == ColumnStatsField.COLUMN_STATS_TYPE) {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
ColumnStatisticsData statsData = new ColumnStatisticsData();
if (s.equalsIgnoreCase(ColumnStatsType.LONG.toString())) {
LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
statsData.setLongStats(longStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase(ColumnStatsType.DOUBLE.toString())) {
DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
statsData.setDoubleStats(doubleStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase(ColumnStatsType.STRING.toString())) {
StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
statsData.setStringStats(stringStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase(ColumnStatsType.BOOLEAN.toString())) {
BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
statsData.setBooleanStats(booleanStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase(ColumnStatsType.BINARY.toString())) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
statsData.setBinaryStats(binaryStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase(ColumnStatsType.DECIMAL.toString())) {
DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
statsData.setDecimalStats(decimalStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase(ColumnStatsType.DATE.toString())) {
DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
statsData.setDateStats(dateStats);
statsObj.setStatsData(statsData);
} else if (s.equalsIgnoreCase(ColumnStatsType.TIMESTAMP.toString())) {
TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector();
statsData.setTimestampStats(timestampStats);
statsObj.setStatsData(statsData);
}
} else {
// invoke the right unpack method depending on data type of the column
if (statsObj.getStatsData().isSetBooleanStats()) {
unpackBooleanStats(oi, o, csf, statsObj);
} else if (statsObj.getStatsData().isSetLongStats()) {
unpackLongStats(oi, o, csf, statsObj);
} else if (statsObj.getStatsData().isSetDoubleStats()) {
unpackDoubleStats(oi, o, csf, statsObj);
} else if (statsObj.getStatsData().isSetStringStats()) {
unpackStringStats(oi, o, csf, statsObj);
} else if (statsObj.getStatsData().isSetBinaryStats()) {
unpackBinaryStats(oi, o, csf, statsObj);
} else if (statsObj.getStatsData().isSetDecimalStats()) {
unpackDecimalStats(oi, o, csf, statsObj);
} else if (statsObj.getStatsData().isSetDateStats()) {
unpackDateStats(oi, o, csf, statsObj);
} else if (statsObj.getStatsData().isSetTimestampStats()) {
unpackTimestampStats(oi, o, csf, statsObj);
}
}
}
Aggregations