Search in sources :

Example 36 with BooleanColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData in project hive by apache.

the class StatObjectConverter method convertToMTableColumnStatistics.

// JDO
public static MTableColumnStatistics convertToMTableColumnStatistics(MTable table, ColumnStatisticsDesc statsDesc, ColumnStatisticsObj statsObj) throws NoSuchObjectException, MetaException, InvalidObjectException {
    if (statsObj == null || statsDesc == null) {
        throw new InvalidObjectException("Invalid column stats object");
    }
    MTableColumnStatistics mColStats = new MTableColumnStatistics();
    mColStats.setTable(table);
    mColStats.setDbName(statsDesc.getDbName());
    mColStats.setTableName(statsDesc.getTableName());
    mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed());
    mColStats.setColName(statsObj.getColName());
    mColStats.setColType(statsObj.getColType());
    if (statsObj.getStatsData().isSetBooleanStats()) {
        BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats();
        mColStats.setBooleanStats(boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null);
    } else if (statsObj.getStatsData().isSetLongStats()) {
        LongColumnStatsData longStats = statsObj.getStatsData().getLongStats();
        mColStats.setLongStats(longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetBitVectors() ? longStats.getBitVectors() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null);
    } else if (statsObj.getStatsData().isSetDoubleStats()) {
        DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
        mColStats.setDoubleStats(doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetBitVectors() ? doubleStats.getBitVectors() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null);
    } else if (statsObj.getStatsData().isSetDecimalStats()) {
        DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats();
        String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null;
        String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null;
        mColStats.setDecimalStats(decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, decimalStats.isSetBitVectors() ? decimalStats.getBitVectors() : null, low, high);
    } else if (statsObj.getStatsData().isSetStringStats()) {
        StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
        mColStats.setStringStats(stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetBitVectors() ? stringStats.getBitVectors() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null);
    } else if (statsObj.getStatsData().isSetBinaryStats()) {
        BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats();
        mColStats.setBinaryStats(binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null);
    } else if (statsObj.getStatsData().isSetDateStats()) {
        DateColumnStatsData dateStats = statsObj.getStatsData().getDateStats();
        mColStats.setDateStats(dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null);
    }
    return mColStats;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) MTableColumnStatistics(org.apache.hadoop.hive.metastore.model.MTableColumnStatistics) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 37 with BooleanColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData in project hive by apache.

the class StatObjectConverter method fillColumnStatisticsData.

// JAVA
public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, Object nulls, Object dist, Object bitVector, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException {
    colType = colType.toLowerCase();
    if (colType.equals("boolean")) {
        BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
        boolStats.setNumFalses(MetaStoreDirectSql.extractSqlLong(falses));
        boolStats.setNumTrues(MetaStoreDirectSql.extractSqlLong(trues));
        boolStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        data.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
        stringStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        stringStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen));
        stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen));
        stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
        stringStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
        data.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        binaryStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen));
        binaryStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen));
        data.setBinaryStats(binaryStats);
    } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint") || colType.equals("timestamp")) {
        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
        longStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        if (lhigh != null) {
            longStats.setHighValue(MetaStoreDirectSql.extractSqlLong(lhigh));
        }
        if (llow != null) {
            longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow));
        }
        longStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
        longStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
        data.setLongStats(longStats);
    } else if (colType.equals("double") || colType.equals("float")) {
        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
        doubleStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        if (dhigh != null) {
            doubleStats.setHighValue(MetaStoreDirectSql.extractSqlDouble(dhigh));
        }
        if (dlow != null) {
            doubleStats.setLowValue(MetaStoreDirectSql.extractSqlDouble(dlow));
        }
        doubleStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
        doubleStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
        data.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
        decimalStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        if (dechigh != null) {
            decimalStats.setHighValue(createThriftDecimal((String) dechigh));
        }
        if (declow != null) {
            decimalStats.setLowValue(createThriftDecimal((String) declow));
        }
        decimalStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
        decimalStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
        data.setDecimalStats(decimalStats);
    } else if (colType.equals("date")) {
        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
        dateStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        if (lhigh != null) {
            dateStats.setHighValue(new Date(MetaStoreDirectSql.extractSqlLong(lhigh)));
        }
        if (llow != null) {
            dateStats.setLowValue(new Date(MetaStoreDirectSql.extractSqlLong(llow)));
        }
        dateStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
        dateStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
        data.setDateStats(dateStats);
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Date(org.apache.hadoop.hive.metastore.api.Date)

Example 38 with BooleanColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData in project hive by apache.

the class TestCachedStore method testTableColStatsOps.

// @Test
public void testTableColStatsOps() throws Exception {
    // Add a db via ObjectStore
    String dbName = "testTableColStatsOps";
    String dbOwner = "user1";
    Database db = createTestDb(dbName, dbOwner);
    objectStore.createDatabase(db);
    db = objectStore.getDatabase(dbName);
    // Add a table via ObjectStore
    final String tblName = "tbl";
    final String tblOwner = "user1";
    final FieldSchema col1 = new FieldSchema("col1", "int", "integer column");
    // Stats values for col1
    long col1LowVal = 5;
    long col1HighVal = 500;
    long col1Nulls = 10;
    long col1DV = 20;
    final FieldSchema col2 = new FieldSchema("col2", "string", "string column");
    // Stats values for col2
    long col2MaxColLen = 100;
    double col2AvgColLen = 45.5;
    long col2Nulls = 5;
    long col2DV = 40;
    final FieldSchema col3 = new FieldSchema("col3", "boolean", "boolean column");
    // Stats values for col3
    long col3NumTrues = 100;
    long col3NumFalses = 30;
    long col3Nulls = 10;
    final List<FieldSchema> cols = new ArrayList<>();
    cols.add(col1);
    cols.add(col2);
    cols.add(col3);
    FieldSchema ptnCol1 = new FieldSchema("part1", "string", "string partition column");
    List<FieldSchema> ptnCols = new ArrayList<FieldSchema>();
    ptnCols.add(ptnCol1);
    Table tbl = createTestTbl(dbName, tblName, tblOwner, cols, ptnCols);
    objectStore.createTable(tbl);
    tbl = objectStore.getTable(dbName, tblName);
    // Add ColumnStatistics for tbl to metastore DB via ObjectStore
    ColumnStatistics stats = new ColumnStatistics();
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, dbName, tblName);
    List<ColumnStatisticsObj> colStatObjs = new ArrayList<>();
    // Col1
    ColumnStatisticsData data1 = new ColumnStatisticsData();
    ColumnStatisticsObj col1Stats = new ColumnStatisticsObj(col1.getName(), col1.getType(), data1);
    LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
    longStats.setLowValue(col1LowVal);
    longStats.setHighValue(col1HighVal);
    longStats.setNumNulls(col1Nulls);
    longStats.setNumDVs(col1DV);
    data1.setLongStats(longStats);
    colStatObjs.add(col1Stats);
    // Col2
    ColumnStatisticsData data2 = new ColumnStatisticsData();
    ColumnStatisticsObj col2Stats = new ColumnStatisticsObj(col2.getName(), col2.getType(), data2);
    StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
    stringStats.setMaxColLen(col2MaxColLen);
    stringStats.setAvgColLen(col2AvgColLen);
    stringStats.setNumNulls(col2Nulls);
    stringStats.setNumDVs(col2DV);
    data2.setStringStats(stringStats);
    colStatObjs.add(col2Stats);
    // Col3
    ColumnStatisticsData data3 = new ColumnStatisticsData();
    ColumnStatisticsObj col3Stats = new ColumnStatisticsObj(col3.getName(), col3.getType(), data3);
    BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
    boolStats.setNumTrues(col3NumTrues);
    boolStats.setNumFalses(col3NumFalses);
    boolStats.setNumNulls(col3Nulls);
    data3.setBooleanStats(boolStats);
    colStatObjs.add(col3Stats);
    stats.setStatsDesc(statsDesc);
    stats.setStatsObj(colStatObjs);
    // Save to DB
    objectStore.updateTableColumnStatistics(stats);
    // Prewarm CachedStore
    CachedStore.setCachePrewarmedState(false);
    CachedStore.prewarm(objectStore);
    // Read table stats via CachedStore
    ColumnStatistics newStats = cachedStore.getTableColumnStatistics(dbName, tblName, Arrays.asList(col1.getName(), col2.getName(), col3.getName()));
    Assert.assertEquals(stats, newStats);
    // Clean up
    objectStore.dropTable(dbName, tblName);
    objectStore.dropDatabase(dbName);
    sharedCache.getDatabaseCache().clear();
    sharedCache.getTableCache().clear();
    sharedCache.getSdCache().clear();
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) Table(org.apache.hadoop.hive.metastore.api.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) Database(org.apache.hadoop.hive.metastore.api.Database) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Aggregations

BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)38 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)30 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)26 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)23 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)17 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)15 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)15 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)15 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)13 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)12 ArrayList (java.util.ArrayList)11 DateColumnStatsData (org.apache.hadoop.hive.metastore.api.DateColumnStatsData)10 Test (org.junit.Test)10 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)9 Table (org.apache.hadoop.hive.metastore.api.Table)9 LongColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)9 StringColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector)9 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)8 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)8 DateColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector)8