use of org.apache.hadoop.hive.common.ndv.hll.HyperLogLog in project hive by apache.
the class TestOldSchema method setUp.
@Before
public void setUp() throws Exception {
Configuration conf = MetastoreConf.newMetastoreConf();
MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.STATS_FETCH_BITVECTOR, false);
MetaStoreTestUtils.setConfForStandloneMode(conf);
store = new ObjectStore();
store.setConf(conf);
dropAllStoreObjects(store);
HyperLogLog hll = HyperLogLog.builder().build();
hll.addLong(1);
bitVectors[1] = hll.serialize();
hll = HyperLogLog.builder().build();
hll.addLong(2);
hll.addLong(3);
hll.addLong(3);
hll.addLong(4);
bitVectors[0] = hll.serialize();
}
use of org.apache.hadoop.hive.common.ndv.hll.HyperLogLog in project hive by apache.
the class TestCachedStore method testPartitionAggrStatsBitVector.
@Test
public void testPartitionAggrStatsBitVector() throws Exception {
String dbName = "testTableColStatsOps2";
String tblName = "tbl2";
String colName = "f1";
Database db = new Database(dbName, null, "some_location", null);
cachedStore.createDatabase(db);
List<FieldSchema> cols = new ArrayList<>();
cols.add(new FieldSchema(colName, "int", null));
List<FieldSchema> partCols = new ArrayList<>();
partCols.add(new FieldSchema("col", "int", null));
StorageDescriptor sd = new StorageDescriptor(cols, null, "input", "output", false, 0, new SerDeInfo("serde", "seriallib", new HashMap<>()), null, null, null);
Table tbl = new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<>(), null, null, TableType.MANAGED_TABLE.toString());
cachedStore.createTable(tbl);
List<String> partVals1 = new ArrayList<>();
partVals1.add("1");
List<String> partVals2 = new ArrayList<>();
partVals2.add("2");
Partition ptn1 = new Partition(partVals1, dbName, tblName, 0, 0, sd, new HashMap<>());
cachedStore.addPartition(ptn1);
Partition ptn2 = new Partition(partVals2, dbName, tblName, 0, 0, sd, new HashMap<>());
cachedStore.addPartition(ptn2);
ColumnStatistics stats = new ColumnStatistics();
ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName);
statsDesc.setPartName("col");
List<ColumnStatisticsObj> colStatObjs = new ArrayList<>();
ColumnStatisticsData data = new ColumnStatisticsData();
ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setLowValue(0);
longStats.setHighValue(100);
longStats.setNumNulls(50);
longStats.setNumDVs(30);
HyperLogLog hll = HyperLogLog.builder().build();
hll.addLong(1);
hll.addLong(2);
hll.addLong(3);
longStats.setBitVectors(hll.serialize());
data.setLongStats(longStats);
colStatObjs.add(colStats);
stats.setStatsDesc(statsDesc);
stats.setStatsObj(colStatObjs);
cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1);
longStats.setNumDVs(40);
hll = HyperLogLog.builder().build();
hll.addLong(2);
hll.addLong(3);
hll.addLong(4);
hll.addLong(5);
longStats.setBitVectors(hll.serialize());
cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2);
List<String> colNames = new ArrayList<>();
colNames.add(colName);
List<String> aggrPartVals = new ArrayList<>();
aggrPartVals.add("1");
aggrPartVals.add("2");
AggrStats aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames);
Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 5);
aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames);
Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 5);
}
Aggregations