Search in sources :

Example 16 with LongColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.

the class TestCachedStore method testPartitionAggrStatsBitVector.

// @Test
public void testPartitionAggrStatsBitVector() throws Exception {
    Configuration conf = MetastoreConf.newMetastoreConf();
    MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.HIVE_IN_TEST, true);
    MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CACHED_RAW_STORE_MAX_CACHE_MEMORY, "-1Kb");
    MetaStoreTestUtils.setConfForStandloneMode(conf);
    CachedStore cachedStore = new CachedStore();
    CachedStore.clearSharedCache();
    cachedStore.setConfForTest(conf);
    String dbName = "testTableColStatsOps2";
    String tblName = "tbl2";
    String colName = "f1";
    Database db = new Database(dbName, null, "some_location", null);
    db.setCatalogName(DEFAULT_CATALOG_NAME);
    cachedStore.createDatabase(db);
    List<FieldSchema> cols = new ArrayList<>();
    cols.add(new FieldSchema(colName, "int", null));
    List<FieldSchema> partCols = new ArrayList<>();
    partCols.add(new FieldSchema("col", "int", null));
    StorageDescriptor sd = new StorageDescriptor(cols, null, "input", "output", false, 0, new SerDeInfo("serde", "seriallib", new HashMap<>()), null, null, null);
    Table tbl = new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<>(), null, null, TableType.MANAGED_TABLE.toString());
    tbl.setCatName(DEFAULT_CATALOG_NAME);
    cachedStore.createTable(tbl);
    List<String> partVals1 = new ArrayList<>();
    partVals1.add("1");
    List<String> partVals2 = new ArrayList<>();
    partVals2.add("2");
    Partition ptn1 = new Partition(partVals1, dbName, tblName, 0, 0, sd, new HashMap<>());
    ptn1.setCatName(DEFAULT_CATALOG_NAME);
    cachedStore.addPartition(ptn1);
    Partition ptn2 = new Partition(partVals2, dbName, tblName, 0, 0, sd, new HashMap<>());
    ptn2.setCatName(DEFAULT_CATALOG_NAME);
    cachedStore.addPartition(ptn2);
    ColumnStatistics stats = new ColumnStatistics();
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName);
    statsDesc.setPartName("col");
    List<ColumnStatisticsObj> colStatObjs = new ArrayList<>();
    ColumnStatisticsData data = new ColumnStatisticsData();
    ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
    LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
    longStats.setLowValue(0);
    longStats.setHighValue(100);
    longStats.setNumNulls(50);
    longStats.setNumDVs(30);
    HyperLogLog hll = HyperLogLog.builder().build();
    hll.addLong(1);
    hll.addLong(2);
    hll.addLong(3);
    longStats.setBitVectors(hll.serialize());
    data.setLongStats(longStats);
    colStatObjs.add(colStats);
    stats.setStatsDesc(statsDesc);
    stats.setStatsObj(colStatObjs);
    stats.setEngine(CacheUtils.HIVE_ENGINE);
    cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1, null, -1);
    longStats.setNumDVs(40);
    hll = HyperLogLog.builder().build();
    hll.addLong(2);
    hll.addLong(3);
    hll.addLong(4);
    hll.addLong(5);
    longStats.setBitVectors(hll.serialize());
    cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2, null, -1);
    List<String> colNames = new ArrayList<>();
    colNames.add(colName);
    List<String> aggrPartVals = new ArrayList<>();
    aggrPartVals.add("1");
    aggrPartVals.add("2");
    AggrStats aggrStats = cachedStore.get_aggr_stats_for(DEFAULT_CATALOG_NAME, dbName, tblName, aggrPartVals, colNames, CacheUtils.HIVE_ENGINE);
    Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
    Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 5);
    aggrStats = cachedStore.get_aggr_stats_for(DEFAULT_CATALOG_NAME, dbName, tblName, aggrPartVals, colNames, CacheUtils.HIVE_ENGINE);
    Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
    Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumDVs(), 5);
    cachedStore.shutdown();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) HyperLogLog(org.apache.hadoop.hive.common.ndv.hll.HyperLogLog)

Example 17 with LongColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.

the class TestPartitionStat method createStatsData.

private ColumnStatisticsData createStatsData(long numNulls, long numDVs, long low, long high) {
    ColumnStatisticsData data = new ColumnStatisticsData();
    LongColumnStatsDataInspector stats = new LongColumnStatsDataInspector();
    stats.setLowValue(low);
    stats.setHighValue(high);
    stats.setNumNulls(numNulls);
    stats.setNumDVs(numDVs);
    data.setLongStats(stats);
    return data;
}
Also used : LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 18 with LongColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.

the class StatObjectConverter method fillColumnStatisticsData.

// JAVA
public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, Object nulls, Object dist, Object bitVector, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException {
    colType = colType.toLowerCase();
    if (colType.equals("boolean")) {
        BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
        boolStats.setNumFalses(MetastoreDirectSqlUtils.extractSqlLong(falses));
        boolStats.setNumTrues(MetastoreDirectSqlUtils.extractSqlLong(trues));
        boolStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        data.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
        stringStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        stringStats.setAvgColLen(MetastoreDirectSqlUtils.extractSqlDouble(avglen));
        stringStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen));
        stringStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        stringStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        binaryStats.setAvgColLen(MetastoreDirectSqlUtils.extractSqlDouble(avglen));
        binaryStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen));
        data.setBinaryStats(binaryStats);
    } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint")) {
        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
        longStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (lhigh != null) {
            longStats.setHighValue(MetastoreDirectSqlUtils.extractSqlLong(lhigh));
        }
        if (llow != null) {
            longStats.setLowValue(MetastoreDirectSqlUtils.extractSqlLong(llow));
        }
        longStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        longStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setLongStats(longStats);
    } else if (colType.equals("double") || colType.equals("float")) {
        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
        doubleStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (dhigh != null) {
            doubleStats.setHighValue(MetastoreDirectSqlUtils.extractSqlDouble(dhigh));
        }
        if (dlow != null) {
            doubleStats.setLowValue(MetastoreDirectSqlUtils.extractSqlDouble(dlow));
        }
        doubleStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        doubleStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
        decimalStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (dechigh != null) {
            decimalStats.setHighValue(DecimalUtils.createThriftDecimal((String) dechigh));
        }
        if (declow != null) {
            decimalStats.setLowValue(DecimalUtils.createThriftDecimal((String) declow));
        }
        decimalStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        decimalStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setDecimalStats(decimalStats);
    } else if (colType.equals("date")) {
        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
        dateStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (lhigh != null) {
            dateStats.setHighValue(new Date(MetastoreDirectSqlUtils.extractSqlLong(lhigh)));
        }
        if (llow != null) {
            dateStats.setLowValue(new Date(MetastoreDirectSqlUtils.extractSqlLong(llow)));
        }
        dateStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        dateStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setDateStats(dateStats);
    } else if (colType.equals("timestamp")) {
        TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector();
        timestampStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (lhigh != null) {
            timestampStats.setHighValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(lhigh)));
        }
        if (llow != null) {
            timestampStats.setLowValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(llow)));
        }
        timestampStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        timestampStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setTimestampStats(timestampStats);
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) TimestampColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) Timestamp(org.apache.hadoop.hive.metastore.api.Timestamp) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Date(org.apache.hadoop.hive.metastore.api.Date)

Aggregations

LongColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)18 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)11 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)10 StringColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector)10 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)9 DateColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector)9 DecimalColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector)9 DoubleColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector)9 TimestampColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector)8 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)7 HashMap (java.util.HashMap)5 ArrayList (java.util.ArrayList)4 Date (org.apache.hadoop.hive.metastore.api.Date)4 Timestamp (org.apache.hadoop.hive.metastore.api.Timestamp)4 Configuration (org.apache.hadoop.conf.Configuration)3 BigDecimal (java.math.BigDecimal)2 Map (java.util.Map)2 NumDistinctValueEstimator (org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator)2 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)2 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)2