Search in sources :

Example 46 with ColumnStatisticsData._Fields

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields in project hive by apache.

the class StatObjectConverter method getPartitionColumnStatisticsObj.

public static ColumnStatisticsObj getPartitionColumnStatisticsObj(MPartitionColumnStatistics mStatsObj, boolean enableBitVector) {
    ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
    statsObj.setColType(mStatsObj.getColType());
    statsObj.setColName(mStatsObj.getColName());
    String colType = mStatsObj.getColType().toLowerCase();
    ColumnStatisticsData colStatsData = new ColumnStatisticsData();
    if (colType.equals("boolean")) {
        BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
        boolStats.setNumFalses(mStatsObj.getNumFalses());
        boolStats.setNumTrues(mStatsObj.getNumTrues());
        boolStats.setNumNulls(mStatsObj.getNumNulls());
        colStatsData.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
        stringStats.setNumNulls(mStatsObj.getNumNulls());
        stringStats.setAvgColLen(mStatsObj.getAvgColLen());
        stringStats.setMaxColLen(mStatsObj.getMaxColLen());
        stringStats.setNumDVs(mStatsObj.getNumDVs());
        stringStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNulls(mStatsObj.getNumNulls());
        binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
        binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
        colStatsData.setBinaryStats(binaryStats);
    } else if (colType.equals("tinyint") || colType.equals("smallint") || colType.equals("int") || colType.equals("bigint") || colType.equals("timestamp")) {
        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
        longStats.setNumNulls(mStatsObj.getNumNulls());
        if (mStatsObj.getLongHighValue() != null) {
            longStats.setHighValue(mStatsObj.getLongHighValue());
        }
        if (mStatsObj.getLongLowValue() != null) {
            longStats.setLowValue(mStatsObj.getLongLowValue());
        }
        longStats.setNumDVs(mStatsObj.getNumDVs());
        longStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setLongStats(longStats);
    } else if (colType.equals("double") || colType.equals("float")) {
        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
        doubleStats.setNumNulls(mStatsObj.getNumNulls());
        if (mStatsObj.getDoubleHighValue() != null) {
            doubleStats.setHighValue(mStatsObj.getDoubleHighValue());
        }
        if (mStatsObj.getDoubleLowValue() != null) {
            doubleStats.setLowValue(mStatsObj.getDoubleLowValue());
        }
        doubleStats.setNumDVs(mStatsObj.getNumDVs());
        doubleStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
        decimalStats.setNumNulls(mStatsObj.getNumNulls());
        if (mStatsObj.getDecimalHighValue() != null) {
            decimalStats.setHighValue(createThriftDecimal(mStatsObj.getDecimalHighValue()));
        }
        if (mStatsObj.getDecimalLowValue() != null) {
            decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue()));
        }
        decimalStats.setNumDVs(mStatsObj.getNumDVs());
        decimalStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setDecimalStats(decimalStats);
    } else if (colType.equals("date")) {
        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
        dateStats.setNumNulls(mStatsObj.getNumNulls());
        dateStats.setHighValue(new Date(mStatsObj.getLongHighValue()));
        dateStats.setLowValue(new Date(mStatsObj.getLongLowValue()));
        dateStats.setNumDVs(mStatsObj.getNumDVs());
        dateStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setDateStats(dateStats);
    }
    statsObj.setStatsData(colStatsData);
    return statsObj;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Date(org.apache.hadoop.hive.metastore.api.Date)

Example 47 with ColumnStatisticsData._Fields

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields in project hive by apache.

the class StatObjectConverter method setFieldsIntoOldStats.

/**
 * Set field values in oldStatObj from newStatObj
 * @param oldStatObj
 * @param newStatObj
 */
public static void setFieldsIntoOldStats(ColumnStatisticsObj oldStatObj, ColumnStatisticsObj newStatObj) {
    _Fields typeNew = newStatObj.getStatsData().getSetField();
    _Fields typeOld = oldStatObj.getStatsData().getSetField();
    typeNew = typeNew == typeOld ? typeNew : null;
    switch(typeNew) {
        case BOOLEAN_STATS:
            BooleanColumnStatsData oldBooleanStatsData = oldStatObj.getStatsData().getBooleanStats();
            BooleanColumnStatsData newBooleanStatsData = newStatObj.getStatsData().getBooleanStats();
            if (newBooleanStatsData.isSetNumTrues()) {
                oldBooleanStatsData.setNumTrues(newBooleanStatsData.getNumTrues());
            }
            if (newBooleanStatsData.isSetNumFalses()) {
                oldBooleanStatsData.setNumFalses(newBooleanStatsData.getNumFalses());
            }
            if (newBooleanStatsData.isSetNumNulls()) {
                oldBooleanStatsData.setNumNulls(newBooleanStatsData.getNumNulls());
            }
            if (newBooleanStatsData.isSetBitVectors()) {
                oldBooleanStatsData.setBitVectors(newBooleanStatsData.getBitVectors());
            }
            break;
        case LONG_STATS:
            {
                LongColumnStatsData oldLongStatsData = oldStatObj.getStatsData().getLongStats();
                LongColumnStatsData newLongStatsData = newStatObj.getStatsData().getLongStats();
                if (newLongStatsData.isSetHighValue()) {
                    oldLongStatsData.setHighValue(newLongStatsData.getHighValue());
                }
                if (newLongStatsData.isSetLowValue()) {
                    oldLongStatsData.setLowValue(newLongStatsData.getLowValue());
                }
                if (newLongStatsData.isSetNumNulls()) {
                    oldLongStatsData.setNumNulls(newLongStatsData.getNumNulls());
                }
                if (newLongStatsData.isSetNumDVs()) {
                    oldLongStatsData.setNumDVs(newLongStatsData.getNumDVs());
                }
                if (newLongStatsData.isSetBitVectors()) {
                    oldLongStatsData.setBitVectors(newLongStatsData.getBitVectors());
                }
                break;
            }
        case DOUBLE_STATS:
            {
                DoubleColumnStatsData oldDoubleStatsData = oldStatObj.getStatsData().getDoubleStats();
                DoubleColumnStatsData newDoubleStatsData = newStatObj.getStatsData().getDoubleStats();
                if (newDoubleStatsData.isSetHighValue()) {
                    oldDoubleStatsData.setHighValue(newDoubleStatsData.getHighValue());
                }
                if (newDoubleStatsData.isSetLowValue()) {
                    oldDoubleStatsData.setLowValue(newDoubleStatsData.getLowValue());
                }
                if (newDoubleStatsData.isSetNumNulls()) {
                    oldDoubleStatsData.setNumNulls(newDoubleStatsData.getNumNulls());
                }
                if (newDoubleStatsData.isSetNumDVs()) {
                    oldDoubleStatsData.setNumDVs(newDoubleStatsData.getNumDVs());
                }
                if (newDoubleStatsData.isSetBitVectors()) {
                    oldDoubleStatsData.setBitVectors(newDoubleStatsData.getBitVectors());
                }
                break;
            }
        case STRING_STATS:
            {
                StringColumnStatsData oldStringStatsData = oldStatObj.getStatsData().getStringStats();
                StringColumnStatsData newStringStatsData = newStatObj.getStatsData().getStringStats();
                if (newStringStatsData.isSetMaxColLen()) {
                    oldStringStatsData.setMaxColLen(newStringStatsData.getMaxColLen());
                }
                if (newStringStatsData.isSetAvgColLen()) {
                    oldStringStatsData.setAvgColLen(newStringStatsData.getAvgColLen());
                }
                if (newStringStatsData.isSetNumNulls()) {
                    oldStringStatsData.setNumNulls(newStringStatsData.getNumNulls());
                }
                if (newStringStatsData.isSetNumDVs()) {
                    oldStringStatsData.setNumDVs(newStringStatsData.getNumDVs());
                }
                if (newStringStatsData.isSetBitVectors()) {
                    oldStringStatsData.setBitVectors(newStringStatsData.getBitVectors());
                }
                break;
            }
        case BINARY_STATS:
            BinaryColumnStatsData oldBinaryStatsData = oldStatObj.getStatsData().getBinaryStats();
            BinaryColumnStatsData newBinaryStatsData = newStatObj.getStatsData().getBinaryStats();
            if (newBinaryStatsData.isSetMaxColLen()) {
                oldBinaryStatsData.setMaxColLen(newBinaryStatsData.getMaxColLen());
            }
            if (newBinaryStatsData.isSetAvgColLen()) {
                oldBinaryStatsData.setAvgColLen(newBinaryStatsData.getAvgColLen());
            }
            if (newBinaryStatsData.isSetNumNulls()) {
                oldBinaryStatsData.setNumNulls(newBinaryStatsData.getNumNulls());
            }
            if (newBinaryStatsData.isSetBitVectors()) {
                oldBinaryStatsData.setBitVectors(newBinaryStatsData.getBitVectors());
            }
            break;
        case DECIMAL_STATS:
            {
                DecimalColumnStatsData oldDecimalStatsData = oldStatObj.getStatsData().getDecimalStats();
                DecimalColumnStatsData newDecimalStatsData = newStatObj.getStatsData().getDecimalStats();
                if (newDecimalStatsData.isSetHighValue()) {
                    oldDecimalStatsData.setHighValue(newDecimalStatsData.getHighValue());
                }
                if (newDecimalStatsData.isSetLowValue()) {
                    oldDecimalStatsData.setLowValue(newDecimalStatsData.getLowValue());
                }
                if (newDecimalStatsData.isSetNumNulls()) {
                    oldDecimalStatsData.setNumNulls(newDecimalStatsData.getNumNulls());
                }
                if (newDecimalStatsData.isSetNumDVs()) {
                    oldDecimalStatsData.setNumDVs(newDecimalStatsData.getNumDVs());
                }
                if (newDecimalStatsData.isSetBitVectors()) {
                    oldDecimalStatsData.setBitVectors(newDecimalStatsData.getBitVectors());
                }
                break;
            }
        case DATE_STATS:
            {
                DateColumnStatsData oldDateStatsData = oldStatObj.getStatsData().getDateStats();
                DateColumnStatsData newDateStatsData = newStatObj.getStatsData().getDateStats();
                if (newDateStatsData.isSetHighValue()) {
                    oldDateStatsData.setHighValue(newDateStatsData.getHighValue());
                }
                if (newDateStatsData.isSetLowValue()) {
                    oldDateStatsData.setLowValue(newDateStatsData.getLowValue());
                }
                if (newDateStatsData.isSetNumNulls()) {
                    oldDateStatsData.setNumNulls(newDateStatsData.getNumNulls());
                }
                if (newDateStatsData.isSetNumDVs()) {
                    oldDateStatsData.setNumDVs(newDateStatsData.getNumDVs());
                }
                if (newDateStatsData.isSetBitVectors()) {
                    oldDateStatsData.setBitVectors(newDateStatsData.getBitVectors());
                }
                break;
            }
        default:
            throw new IllegalArgumentException("Unknown stats type: " + typeNew.toString());
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) ColumnStatisticsData._Fields(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 48 with ColumnStatisticsData._Fields

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields in project hive by apache.

the class StatObjectConverter method getTableColumnStatisticsObj.

public static ColumnStatisticsObj getTableColumnStatisticsObj(MTableColumnStatistics mStatsObj, boolean enableBitVector) {
    ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
    statsObj.setColType(mStatsObj.getColType());
    statsObj.setColName(mStatsObj.getColName());
    String colType = mStatsObj.getColType().toLowerCase();
    ColumnStatisticsData colStatsData = new ColumnStatisticsData();
    if (colType.equals("boolean")) {
        BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
        boolStats.setNumFalses(mStatsObj.getNumFalses());
        boolStats.setNumTrues(mStatsObj.getNumTrues());
        boolStats.setNumNulls(mStatsObj.getNumNulls());
        colStatsData.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
        stringStats.setNumNulls(mStatsObj.getNumNulls());
        stringStats.setAvgColLen(mStatsObj.getAvgColLen());
        stringStats.setMaxColLen(mStatsObj.getMaxColLen());
        stringStats.setNumDVs(mStatsObj.getNumDVs());
        stringStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNulls(mStatsObj.getNumNulls());
        binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
        binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
        colStatsData.setBinaryStats(binaryStats);
    } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint") || colType.equals("timestamp")) {
        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
        longStats.setNumNulls(mStatsObj.getNumNulls());
        Long longHighValue = mStatsObj.getLongHighValue();
        if (longHighValue != null) {
            longStats.setHighValue(longHighValue);
        }
        Long longLowValue = mStatsObj.getLongLowValue();
        if (longLowValue != null) {
            longStats.setLowValue(longLowValue);
        }
        longStats.setNumDVs(mStatsObj.getNumDVs());
        longStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setLongStats(longStats);
    } else if (colType.equals("double") || colType.equals("float")) {
        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
        doubleStats.setNumNulls(mStatsObj.getNumNulls());
        Double doubleHighValue = mStatsObj.getDoubleHighValue();
        if (doubleHighValue != null) {
            doubleStats.setHighValue(doubleHighValue);
        }
        Double doubleLowValue = mStatsObj.getDoubleLowValue();
        if (doubleLowValue != null) {
            doubleStats.setLowValue(doubleLowValue);
        }
        doubleStats.setNumDVs(mStatsObj.getNumDVs());
        doubleStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
        decimalStats.setNumNulls(mStatsObj.getNumNulls());
        String decimalHighValue = mStatsObj.getDecimalHighValue();
        if (decimalHighValue != null) {
            decimalStats.setHighValue(createThriftDecimal(decimalHighValue));
        }
        String decimalLowValue = mStatsObj.getDecimalLowValue();
        if (decimalLowValue != null) {
            decimalStats.setLowValue(createThriftDecimal(decimalLowValue));
        }
        decimalStats.setNumDVs(mStatsObj.getNumDVs());
        decimalStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setDecimalStats(decimalStats);
    } else if (colType.equals("date")) {
        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
        dateStats.setNumNulls(mStatsObj.getNumNulls());
        Long highValue = mStatsObj.getLongHighValue();
        if (highValue != null) {
            dateStats.setHighValue(new Date(highValue));
        }
        Long lowValue = mStatsObj.getLongLowValue();
        if (lowValue != null) {
            dateStats.setLowValue(new Date(lowValue));
        }
        dateStats.setNumDVs(mStatsObj.getNumDVs());
        dateStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setDateStats(dateStats);
    }
    statsObj.setStatsData(colStatsData);
    return statsObj;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Date(org.apache.hadoop.hive.metastore.api.Date) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 49 with ColumnStatisticsData._Fields

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields in project hive by apache.

the class TestHiveMetaStore method testColumnStatistics.

@Test
public void testColumnStatistics() throws Throwable {
    String dbName = "columnstatstestdb";
    String tblName = "tbl";
    String typeName = "Person";
    String tblOwner = "testowner";
    int lastAccessed = 6796;
    try {
        cleanUp(dbName, tblName, typeName);
        Database db = new Database();
        db.setName(dbName);
        client.createDatabase(db);
        createTableForTestFilter(dbName, tblName, tblOwner, lastAccessed, true);
        // Create a ColumnStatistics Obj
        String[] colName = new String[] { "income", "name" };
        double lowValue = 50000.21;
        double highValue = 1200000.4525;
        long numNulls = 3;
        long numDVs = 22;
        double avgColLen = 50.30;
        long maxColLen = 102;
        String[] colType = new String[] { "double", "string" };
        boolean isTblLevel = true;
        String partName = null;
        List<ColumnStatisticsObj> statsObjs = new ArrayList<>();
        ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
        statsDesc.setDbName(dbName);
        statsDesc.setTableName(tblName);
        statsDesc.setIsTblLevel(isTblLevel);
        statsDesc.setPartName(partName);
        ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
        statsObj.setColName(colName[0]);
        statsObj.setColType(colType[0]);
        ColumnStatisticsData statsData = new ColumnStatisticsData();
        DoubleColumnStatsData numericStats = new DoubleColumnStatsData();
        statsData.setDoubleStats(numericStats);
        statsData.getDoubleStats().setHighValue(highValue);
        statsData.getDoubleStats().setLowValue(lowValue);
        statsData.getDoubleStats().setNumDVs(numDVs);
        statsData.getDoubleStats().setNumNulls(numNulls);
        statsObj.setStatsData(statsData);
        statsObjs.add(statsObj);
        statsObj = new ColumnStatisticsObj();
        statsObj.setColName(colName[1]);
        statsObj.setColType(colType[1]);
        statsData = new ColumnStatisticsData();
        StringColumnStatsData stringStats = new StringColumnStatsData();
        statsData.setStringStats(stringStats);
        statsData.getStringStats().setAvgColLen(avgColLen);
        statsData.getStringStats().setMaxColLen(maxColLen);
        statsData.getStringStats().setNumDVs(numDVs);
        statsData.getStringStats().setNumNulls(numNulls);
        statsObj.setStatsData(statsData);
        statsObjs.add(statsObj);
        ColumnStatistics colStats = new ColumnStatistics();
        colStats.setStatsDesc(statsDesc);
        colStats.setStatsObj(statsObjs);
        // write stats objs persistently
        client.updateTableColumnStatistics(colStats);
        // retrieve the stats obj that was just written
        ColumnStatisticsObj colStats2 = client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[0])).get(0);
        // compare stats obj to ensure what we get is what we wrote
        assertNotNull(colStats2);
        assertEquals(colStats2.getColName(), colName[0]);
        assertEquals(colStats2.getStatsData().getDoubleStats().getLowValue(), lowValue, 0.01);
        assertEquals(colStats2.getStatsData().getDoubleStats().getHighValue(), highValue, 0.01);
        assertEquals(colStats2.getStatsData().getDoubleStats().getNumNulls(), numNulls);
        assertEquals(colStats2.getStatsData().getDoubleStats().getNumDVs(), numDVs);
        // test delete column stats; if no col name is passed all column stats associated with the
        // table is deleted
        boolean status = client.deleteTableColumnStatistics(dbName, tblName, null);
        assertTrue(status);
        // try to query stats for a column for which stats doesn't exist
        assertTrue(client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[1])).isEmpty());
        colStats.setStatsDesc(statsDesc);
        colStats.setStatsObj(statsObjs);
        // update table level column stats
        client.updateTableColumnStatistics(colStats);
        // query column stats for column whose stats were updated in the previous call
        colStats2 = client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[0])).get(0);
        // partition level column statistics test
        // create a table with multiple partitions
        cleanUp(dbName, tblName, typeName);
        List<List<String>> values = new ArrayList<>();
        values.add(makeVals("2008-07-01 14:13:12", "14"));
        values.add(makeVals("2008-07-01 14:13:12", "15"));
        values.add(makeVals("2008-07-02 14:13:12", "15"));
        values.add(makeVals("2008-07-03 14:13:12", "151"));
        createMultiPartitionTableSchema(dbName, tblName, typeName, values);
        List<String> partitions = client.listPartitionNames(dbName, tblName, (short) -1);
        partName = partitions.get(0);
        isTblLevel = false;
        // create a new columnstatistics desc to represent partition level column stats
        statsDesc = new ColumnStatisticsDesc();
        statsDesc.setDbName(dbName);
        statsDesc.setTableName(tblName);
        statsDesc.setPartName(partName);
        statsDesc.setIsTblLevel(isTblLevel);
        colStats = new ColumnStatistics();
        colStats.setStatsDesc(statsDesc);
        colStats.setStatsObj(statsObjs);
        client.updatePartitionColumnStatistics(colStats);
        colStats2 = client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[1])).get(partName).get(0);
        // compare stats obj to ensure what we get is what we wrote
        assertNotNull(colStats2);
        assertEquals(colStats.getStatsDesc().getPartName(), partName);
        assertEquals(colStats2.getColName(), colName[1]);
        assertEquals(colStats2.getStatsData().getStringStats().getMaxColLen(), maxColLen);
        assertEquals(colStats2.getStatsData().getStringStats().getAvgColLen(), avgColLen, 0.01);
        assertEquals(colStats2.getStatsData().getStringStats().getNumNulls(), numNulls);
        assertEquals(colStats2.getStatsData().getStringStats().getNumDVs(), numDVs);
        // test stats deletion at partition level
        client.deletePartitionColumnStatistics(dbName, tblName, partName, colName[1]);
        colStats2 = client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[0])).get(partName).get(0);
        // test get stats on a column for which stats doesn't exist
        assertTrue(client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[1])).isEmpty());
    } catch (Exception e) {
        System.err.println(StringUtils.stringifyException(e));
        System.err.println("testColumnStatistics() failed.");
        throw e;
    } finally {
        cleanUp(dbName, tblName, typeName);
    }
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ArrayList(java.util.ArrayList) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) ConfigValSecurityException(org.apache.hadoop.hive.metastore.api.ConfigValSecurityException) SQLException(java.sql.SQLException) UnknownDBException(org.apache.hadoop.hive.metastore.api.UnknownDBException) TException(org.apache.thrift.TException) IOException(java.io.IOException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) Database(org.apache.hadoop.hive.metastore.api.Database) List(java.util.List) ArrayList(java.util.ArrayList) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 50 with ColumnStatisticsData._Fields

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields in project hive by apache.

the class BooleanColumnStatsAggregator method aggregate.

@Override
public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWithSourceInfo, List<String> partNames, boolean areAllPartsFound) throws MetaException {
    ColumnStatisticsObj statsObj = null;
    String colType = null;
    String colName = null;
    BooleanColumnStatsData aggregateData = null;
    for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
        ColumnStatisticsObj cso = csp.getColStatsObj();
        if (statsObj == null) {
            colName = cso.getColName();
            colType = cso.getColType();
            statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField());
        }
        BooleanColumnStatsData newData = cso.getStatsData().getBooleanStats();
        if (aggregateData == null) {
            aggregateData = newData.deepCopy();
        } else {
            aggregateData.setNumTrues(aggregateData.getNumTrues() + newData.getNumTrues());
            aggregateData.setNumFalses(aggregateData.getNumFalses() + newData.getNumFalses());
            aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
        }
    }
    ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
    columnStatisticsData.setBooleanStats(aggregateData);
    statsObj.setStatsData(columnStatisticsData);
    return statsObj;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColStatsObjWithSourceInfo(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Aggregations

ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)108 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)95 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)62 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)56 Test (org.junit.Test)53 ArrayList (java.util.ArrayList)47 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)36 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)34 Table (org.apache.hadoop.hive.metastore.api.Table)33 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)32 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)31 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)31 Partition (org.apache.hadoop.hive.metastore.api.Partition)30 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)29 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)28 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)26 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)24 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)23 HashMap (java.util.HashMap)22 List (java.util.List)19