Search in sources :

Example 1 with HyperLogLog

use of org.apache.hadoop.hive.common.ndv.hll.HyperLogLog in project hive by apache.

the class TestOldSchema method setUp.

@Before
public void setUp() throws Exception {
    Configuration conf = MetastoreConf.newMetastoreConf();
    MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.STATS_FETCH_BITVECTOR, false);
    MetaStoreTestUtils.setConfForStandloneMode(conf);
    store = new ObjectStore();
    store.setConf(conf);
    dropAllStoreObjects(store);
    HyperLogLog hll = HyperLogLog.builder().build();
    hll.addLong(1);
    bitVectors[1] = hll.serialize();
    hll = HyperLogLog.builder().build();
    hll.addLong(2);
    hll.addLong(3);
    hll.addLong(3);
    hll.addLong(4);
    bitVectors[0] = hll.serialize();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HyperLogLog(org.apache.hadoop.hive.common.ndv.hll.HyperLogLog) Before(org.junit.Before)

Example 2 with HyperLogLog

use of org.apache.hadoop.hive.common.ndv.hll.HyperLogLog in project hive by apache.

the class TestCachedStore method testPartitionAggrStatsBitVector.

@Test
public void testPartitionAggrStatsBitVector() throws Exception {
    String dbName = "testTableColStatsOps2";
    String tblName = "tbl2";
    String colName = "f1";
    Database db = new Database(dbName, null, "some_location", null);
    cachedStore.createDatabase(db);
    List<FieldSchema> cols = new ArrayList<>();
    cols.add(new FieldSchema(colName, "int", null));
    List<FieldSchema> partCols = new ArrayList<>();
    partCols.add(new FieldSchema("col", "int", null));
    StorageDescriptor sd = new StorageDescriptor(cols, null, "input", "output", false, 0, new SerDeInfo("serde", "seriallib", new HashMap<>()), null, null, null);
    Table tbl = new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<>(), null, null, TableType.MANAGED_TABLE.toString());
    cachedStore.createTable(tbl);
    List<String> partVals1 = new ArrayList<>();
    partVals1.add("1");
    List<String> partVals2 = new ArrayList<>();
    partVals2.add("2");
    Partition ptn1 = new Partition(partVals1, dbName, tblName, 0, 0, sd, new HashMap<>());
    cachedStore.addPartition(ptn1);
    Partition ptn2 = new Partition(partVals2, dbName, tblName, 0, 0, sd, new HashMap<>());
    cachedStore.addPartition(ptn2);
    ColumnStatistics stats = new ColumnStatistics();
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName);
    statsDesc.setPartName("col");
    List<ColumnStatisticsObj> colStatObjs = new ArrayList<>();
    ColumnStatisticsData data = new ColumnStatisticsData();
    ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
    LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
    longStats.setLowValue(0);
    longStats.setHighValue(100);
    longStats.setNumNulls(50);
    longStats.setNumDVs(30);
    HyperLogLog hll = HyperLogLog.builder().build();
    hll.addLong(1);
    hll.addLong(2);
    hll.addLong(3);
    longStats.setBitVectors(hll.serialize());
    data.setLongStats(longStats);
    colStatObjs.add(colStats);
    stats.setStatsDesc(statsDesc);
    stats.setStatsObj(colStatObjs);
    cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1);
    longStats.setNumDVs(40);
    hll = HyperLogLog.builder().build();
    hll.addLong(2);
    hll.addLong(3);
    hll.addLong(4);
    hll.addLong(5);
    longStats.setBitVectors(hll.serialize());
    cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2);
    List<String> colNames = new ArrayList<>();
    colNames.add(colName);
    List<String> aggrPartVals = new ArrayList<>();
    aggrPartVals.add("1");
    aggrPartVals.add("2");
    AggrStats aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames);
    Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
    Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 5);
    aggrStats = cachedStore.get_aggr_stats_for(dbName, tblName, aggrPartVals, colNames);
    Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
    Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 5);
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) HashMap(java.util.HashMap) AggrStats(org.apache.hadoop.hive.metastore.api.AggrStats) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) Database(org.apache.hadoop.hive.metastore.api.Database) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) HyperLogLog(org.apache.hadoop.hive.common.ndv.hll.HyperLogLog) Test(org.junit.Test) MetastoreCheckinTest(org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)

Aggregations

HyperLogLog (org.apache.hadoop.hive.common.ndv.hll.HyperLogLog)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Configuration (org.apache.hadoop.conf.Configuration)1 MetastoreCheckinTest (org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest)1 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)1 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)1 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)1 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)1 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)1 Database (org.apache.hadoop.hive.metastore.api.Database)1 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)1 Partition (org.apache.hadoop.hive.metastore.api.Partition)1 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)1 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)1 Table (org.apache.hadoop.hive.metastore.api.Table)1 LongColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)1 Before (org.junit.Before)1 Test (org.junit.Test)1