Search in sources :

Example 6 with LongColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.

the class TestCachedStore method testAggrStatsRepeatedRead.

// @Test
public void testAggrStatsRepeatedRead() throws Exception {
    Configuration conf = MetastoreConf.newMetastoreConf();
    MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.HIVE_IN_TEST, true);
    MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CACHED_RAW_STORE_MAX_CACHE_MEMORY, "-1Kb");
    MetaStoreTestUtils.setConfForStandloneMode(conf);
    CachedStore cachedStore = new CachedStore();
    CachedStore.clearSharedCache();
    cachedStore.setConfForTest(conf);
    ObjectStore objectStore = (ObjectStore) cachedStore.getRawStore();
    String dbName = "testTableColStatsOps";
    String tblName = "tbl";
    String colName = "f1";
    Database db = new DatabaseBuilder().setName(dbName).setLocation("some_location").build(conf);
    cachedStore.createDatabase(db);
    List<FieldSchema> cols = new ArrayList<>();
    cols.add(new FieldSchema(colName, "int", null));
    List<FieldSchema> partCols = new ArrayList<>();
    partCols.add(new FieldSchema("col", "int", null));
    StorageDescriptor sd = new StorageDescriptor(cols, null, "input", "output", false, 0, new SerDeInfo("serde", "seriallib", new HashMap<>()), null, null, null);
    Table tbl = new Table(tblName, dbName, null, 0, 0, 0, sd, partCols, new HashMap<>(), null, null, TableType.MANAGED_TABLE.toString());
    tbl.setCatName(DEFAULT_CATALOG_NAME);
    cachedStore.createTable(tbl);
    List<String> partVals1 = new ArrayList<>();
    partVals1.add("1");
    List<String> partVals2 = new ArrayList<>();
    partVals2.add("2");
    Partition ptn1 = new Partition(partVals1, dbName, tblName, 0, 0, sd, new HashMap<>());
    ptn1.setCatName(DEFAULT_CATALOG_NAME);
    cachedStore.addPartition(ptn1);
    Partition ptn2 = new Partition(partVals2, dbName, tblName, 0, 0, sd, new HashMap<>());
    ptn2.setCatName(DEFAULT_CATALOG_NAME);
    cachedStore.addPartition(ptn2);
    ColumnStatistics stats = new ColumnStatistics();
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName);
    statsDesc.setPartName("col");
    List<ColumnStatisticsObj> colStatObjs = new ArrayList<>();
    ColumnStatisticsData data = new ColumnStatisticsData();
    ColumnStatisticsObj colStats = new ColumnStatisticsObj(colName, "int", data);
    LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
    longStats.setLowValue(0);
    longStats.setHighValue(100);
    longStats.setNumNulls(50);
    longStats.setNumDVs(30);
    data.setLongStats(longStats);
    colStatObjs.add(colStats);
    stats.setStatsDesc(statsDesc);
    stats.setStatsObj(colStatObjs);
    stats.setEngine(CacheUtils.HIVE_ENGINE);
    cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals1, null, -1);
    cachedStore.updatePartitionColumnStatistics(stats.deepCopy(), partVals2, null, -1);
    List<String> colNames = new ArrayList<>();
    colNames.add(colName);
    List<String> aggrPartVals = new ArrayList<>();
    aggrPartVals.add("1");
    aggrPartVals.add("2");
    AggrStats aggrStats = cachedStore.get_aggr_stats_for(DEFAULT_CATALOG_NAME, dbName, tblName, aggrPartVals, colNames, CacheUtils.HIVE_ENGINE);
    Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
    aggrStats = cachedStore.get_aggr_stats_for(DEFAULT_CATALOG_NAME, dbName, tblName, aggrPartVals, colNames, CacheUtils.HIVE_ENGINE);
    Assert.assertEquals(aggrStats.getColStats().get(0).getStatsData().getLongStats().getNumNulls(), 100);
    objectStore.deletePartitionColumnStatistics(DEFAULT_CATALOG_NAME, db.getName(), tbl.getTableName(), Warehouse.makePartName(tbl.getPartitionKeys(), partVals1), partVals1, colName, CacheUtils.HIVE_ENGINE);
    objectStore.deletePartitionColumnStatistics(DEFAULT_CATALOG_NAME, db.getName(), tbl.getTableName(), Warehouse.makePartName(tbl.getPartitionKeys(), partVals2), partVals2, colName, CacheUtils.HIVE_ENGINE);
    objectStore.dropPartition(DEFAULT_CATALOG_NAME, db.getName(), tbl.getTableName(), partVals1);
    objectStore.dropPartition(DEFAULT_CATALOG_NAME, db.getName(), tbl.getTableName(), partVals2);
    objectStore.dropTable(DEFAULT_CATALOG_NAME, db.getName(), tbl.getTableName());
    objectStore.dropDatabase(DEFAULT_CATALOG_NAME, db.getName());
    cachedStore.shutdown();
}
Also used : ObjectStore(org.apache.hadoop.hive.metastore.ObjectStore) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DatabaseBuilder(org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)

Example 7 with LongColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.

the class TestCachedStore method createLongStats.

private ColumnStatisticsData createLongStats(long numNulls, long numDVs, Long low, Long high) {
    ColumnStatisticsData data = new ColumnStatisticsData();
    LongColumnStatsDataInspector stats = new LongColumnStatsDataInspector();
    if (low != null) {
        stats.setLowValue(low.longValue());
    }
    if (high != null) {
        stats.setHighValue(high.longValue());
    }
    stats.setNumNulls(numNulls);
    stats.setNumDVs(numDVs);
    data.setLongStats(stats);
    return data;
}
Also used : LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)

Example 8 with LongColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.

the class ColumnStatisticsObjTranslator method unpackPrimitiveObject.

private static void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException {
    if (o == null) {
        return;
    }
    // First infer the type of object
    if (fieldName.equals("columntype")) {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
        ColumnStatisticsData statsData = new ColumnStatisticsData();
        if (s.equalsIgnoreCase("long")) {
            LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
            statsData.setLongStats(longStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("double")) {
            DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
            statsData.setDoubleStats(doubleStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("string")) {
            StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
            statsData.setStringStats(stringStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("boolean")) {
            BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
            statsData.setBooleanStats(booleanStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("binary")) {
            BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
            statsData.setBinaryStats(binaryStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("decimal")) {
            DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
            statsData.setDecimalStats(decimalStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("date")) {
            DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
            statsData.setDateStats(dateStats);
            statsObj.setStatsData(statsData);
        }
    } else {
        // invoke the right unpack method depending on data type of the column
        if (statsObj.getStatsData().isSetBooleanStats()) {
            unpackBooleanStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetLongStats()) {
            unpackLongStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetDoubleStats()) {
            unpackDoubleStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetStringStats()) {
            unpackStringStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetBinaryStats()) {
            unpackBinaryStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetDecimalStats()) {
            unpackDecimalStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetDateStats()) {
            unpackDateStats(oi, o, fieldName, statsObj);
        }
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 9 with LongColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.

the class TestCachedStore method testTableColStatsOps.

// @Test
public void testTableColStatsOps() throws Exception {
    // Add a db via ObjectStore
    String dbName = "testTableColStatsOps";
    String dbOwner = "user1";
    Database db = createTestDb(dbName, dbOwner);
    objectStore.createDatabase(db);
    db = objectStore.getDatabase(dbName);
    // Add a table via ObjectStore
    final String tblName = "tbl";
    final String tblOwner = "user1";
    final FieldSchema col1 = new FieldSchema("col1", "int", "integer column");
    // Stats values for col1
    long col1LowVal = 5;
    long col1HighVal = 500;
    long col1Nulls = 10;
    long col1DV = 20;
    final FieldSchema col2 = new FieldSchema("col2", "string", "string column");
    // Stats values for col2
    long col2MaxColLen = 100;
    double col2AvgColLen = 45.5;
    long col2Nulls = 5;
    long col2DV = 40;
    final FieldSchema col3 = new FieldSchema("col3", "boolean", "boolean column");
    // Stats values for col3
    long col3NumTrues = 100;
    long col3NumFalses = 30;
    long col3Nulls = 10;
    final List<FieldSchema> cols = new ArrayList<>();
    cols.add(col1);
    cols.add(col2);
    cols.add(col3);
    FieldSchema ptnCol1 = new FieldSchema("part1", "string", "string partition column");
    List<FieldSchema> ptnCols = new ArrayList<FieldSchema>();
    ptnCols.add(ptnCol1);
    Table tbl = createTestTbl(dbName, tblName, tblOwner, cols, ptnCols);
    objectStore.createTable(tbl);
    tbl = objectStore.getTable(dbName, tblName);
    // Add ColumnStatistics for tbl to metastore DB via ObjectStore
    ColumnStatistics stats = new ColumnStatistics();
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName);
    List<ColumnStatisticsObj> colStatObjs = new ArrayList<>();
    // Col1
    ColumnStatisticsData data1 = new ColumnStatisticsData();
    ColumnStatisticsObj col1Stats = new ColumnStatisticsObj(col1.getName(), col1.getType(), data1);
    LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
    longStats.setLowValue(col1LowVal);
    longStats.setHighValue(col1HighVal);
    longStats.setNumNulls(col1Nulls);
    longStats.setNumDVs(col1DV);
    data1.setLongStats(longStats);
    colStatObjs.add(col1Stats);
    // Col2
    ColumnStatisticsData data2 = new ColumnStatisticsData();
    ColumnStatisticsObj col2Stats = new ColumnStatisticsObj(col2.getName(), col2.getType(), data2);
    StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
    stringStats.setMaxColLen(col2MaxColLen);
    stringStats.setAvgColLen(col2AvgColLen);
    stringStats.setNumNulls(col2Nulls);
    stringStats.setNumDVs(col2DV);
    data2.setStringStats(stringStats);
    colStatObjs.add(col2Stats);
    // Col3
    ColumnStatisticsData data3 = new ColumnStatisticsData();
    ColumnStatisticsObj col3Stats = new ColumnStatisticsObj(col3.getName(), col3.getType(), data3);
    BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
    boolStats.setNumTrues(col3NumTrues);
    boolStats.setNumFalses(col3NumFalses);
    boolStats.setNumNulls(col3Nulls);
    data3.setBooleanStats(boolStats);
    colStatObjs.add(col3Stats);
    stats.setStatsDesc(statsDesc);
    stats.setStatsObj(colStatObjs);
    // Save to DB
    objectStore.updateTableColumnStatistics(stats);
    // Prewarm CachedStore
    CachedStore.setCachePrewarmedState(false);
    CachedStore.prewarm(objectStore);
    // Read table stats via CachedStore
    ColumnStatistics newStats = cachedStore.getTableColumnStatistics(dbName, tblName, Arrays.asList(col1.getName(), col2.getName(), col3.getName()));
    Assert.assertEquals(stats, newStats);
    // Clean up
    objectStore.dropTable(dbName, tblName);
    objectStore.dropDatabase(dbName);
    sharedCache.getDatabaseCache().clear();
    sharedCache.getTableCache().clear();
    sharedCache.getSdCache().clear();
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) Database(org.apache.hadoop.hive.metastore.api.Database) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 10 with LongColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector in project hive by apache.

the class ColumnStatisticsObjTranslator method unpackPrimitiveObject.

private static void unpackPrimitiveObject(ObjectInspector oi, Object o, ColumnStatsField csf, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException {
    if (o == null) {
        return;
    }
    // First infer the type of object
    if (csf == ColumnStatsField.COLUMN_STATS_TYPE) {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
        ColumnStatisticsData statsData = new ColumnStatisticsData();
        if (s.equalsIgnoreCase(ColumnStatsType.LONG.toString())) {
            LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
            statsData.setLongStats(longStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase(ColumnStatsType.DOUBLE.toString())) {
            DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
            statsData.setDoubleStats(doubleStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase(ColumnStatsType.STRING.toString())) {
            StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
            statsData.setStringStats(stringStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase(ColumnStatsType.BOOLEAN.toString())) {
            BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
            statsData.setBooleanStats(booleanStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase(ColumnStatsType.BINARY.toString())) {
            BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
            statsData.setBinaryStats(binaryStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase(ColumnStatsType.DECIMAL.toString())) {
            DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
            statsData.setDecimalStats(decimalStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase(ColumnStatsType.DATE.toString())) {
            DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
            statsData.setDateStats(dateStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase(ColumnStatsType.TIMESTAMP.toString())) {
            TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector();
            statsData.setTimestampStats(timestampStats);
            statsObj.setStatsData(statsData);
        }
    } else {
        // invoke the right unpack method depending on data type of the column
        if (statsObj.getStatsData().isSetBooleanStats()) {
            unpackBooleanStats(oi, o, csf, statsObj);
        } else if (statsObj.getStatsData().isSetLongStats()) {
            unpackLongStats(oi, o, csf, statsObj);
        } else if (statsObj.getStatsData().isSetDoubleStats()) {
            unpackDoubleStats(oi, o, csf, statsObj);
        } else if (statsObj.getStatsData().isSetStringStats()) {
            unpackStringStats(oi, o, csf, statsObj);
        } else if (statsObj.getStatsData().isSetBinaryStats()) {
            unpackBinaryStats(oi, o, csf, statsObj);
        } else if (statsObj.getStatsData().isSetDecimalStats()) {
            unpackDecimalStats(oi, o, csf, statsObj);
        } else if (statsObj.getStatsData().isSetDateStats()) {
            unpackDateStats(oi, o, csf, statsObj);
        } else if (statsObj.getStatsData().isSetTimestampStats()) {
            unpackTimestampStats(oi, o, csf, statsObj);
        }
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) TimestampColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Aggregations

LongColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)18 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)11 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)10 StringColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector)10 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)9 DateColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector)9 DecimalColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector)9 DoubleColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector)9 TimestampColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector)8 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)7 HashMap (java.util.HashMap)5 ArrayList (java.util.ArrayList)4 Date (org.apache.hadoop.hive.metastore.api.Date)4 Timestamp (org.apache.hadoop.hive.metastore.api.Timestamp)4 Configuration (org.apache.hadoop.conf.Configuration)3 BigDecimal (java.math.BigDecimal)2 Map (java.util.Map)2 NumDistinctValueEstimator (org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator)2 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)2 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)2