Search in sources :

Example 6 with StringColumnStatsData

use of org.apache.hadoop.hive.metastore.api.StringColumnStatsData in project hive by apache.

the class ColumnStatsMergerFactory method newColumnStaticsObj.

public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) {
    ColumnStatisticsObj cso = new ColumnStatisticsObj();
    ColumnStatisticsData csd = new ColumnStatisticsData();
    cso.setColName(colName);
    cso.setColType(colType);
    switch(type) {
        case BOOLEAN_STATS:
            csd.setBooleanStats(new BooleanColumnStatsData());
            break;
        case LONG_STATS:
            csd.setLongStats(new LongColumnStatsData());
            break;
        case DOUBLE_STATS:
            csd.setDoubleStats(new DoubleColumnStatsData());
            break;
        case STRING_STATS:
            csd.setStringStats(new StringColumnStatsData());
            break;
        case BINARY_STATS:
            csd.setBinaryStats(new BinaryColumnStatsData());
            break;
        case DECIMAL_STATS:
            csd.setDecimalStats(new DecimalColumnStatsData());
            break;
        default:
            throw new RuntimeException("Woh, bad.  Unknown stats type!");
    }
    cso.setStatsData(csd);
    return cso;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 7 with StringColumnStatsData

use of org.apache.hadoop.hive.metastore.api.StringColumnStatsData in project hive by apache.

the class ColumnStatsTask method unpackPrimitiveObject.

private void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) {
    if (o == null) {
        return;
    }
    // First infer the type of object
    if (fieldName.equals("columntype")) {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
        ColumnStatisticsData statsData = new ColumnStatisticsData();
        if (s.equalsIgnoreCase("long")) {
            LongColumnStatsData longStats = new LongColumnStatsData();
            statsData.setLongStats(longStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("double")) {
            DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
            statsData.setDoubleStats(doubleStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("string")) {
            StringColumnStatsData stringStats = new StringColumnStatsData();
            statsData.setStringStats(stringStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("boolean")) {
            BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
            statsData.setBooleanStats(booleanStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("binary")) {
            BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
            statsData.setBinaryStats(binaryStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("decimal")) {
            DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
            statsData.setDecimalStats(decimalStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("date")) {
            DateColumnStatsData dateStats = new DateColumnStatsData();
            statsData.setDateStats(dateStats);
            statsObj.setStatsData(statsData);
        }
    } else {
        // invoke the right unpack method depending on data type of the column
        if (statsObj.getStatsData().isSetBooleanStats()) {
            unpackBooleanStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetLongStats()) {
            unpackLongStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetDoubleStats()) {
            unpackDoubleStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetStringStats()) {
            unpackStringStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetBinaryStats()) {
            unpackBinaryStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetDecimalStats()) {
            unpackDecimalStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetDateStats()) {
            unpackDateStats(oi, o, fieldName, statsObj);
        }
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 8 with StringColumnStatsData

use of org.apache.hadoop.hive.metastore.api.StringColumnStatsData in project hive by apache.

the class TestHBaseStoreBitVector method stringPartitionStatistics.

@Test
public void stringPartitionStatistics() throws Exception {
    createMockTableAndPartition(STRING_TYPE, STRING_VAL);
    // Add partition stats for: STRING_COL and partition: {PART_KEY, STRING_VAL} to DB
    // Because of the way our mock implementation works we actually need to not create the table
    // before we set statistics on it.
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for partition level stats
    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, STRING_VAL);
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = stringColStatsObjs.get(0);
    StringColumnStatsData stringData = obj.getStatsData().getStringStats();
    // Add to DB
    stats.addToStatsObj(obj);
    List<String> parVals = new ArrayList<String>();
    parVals.add(STRING_VAL);
    store.updatePartitionColumnStatistics(stats, parVals);
    // Get from DB
    List<String> partNames = new ArrayList<String>();
    partNames.add(desc.getPartName());
    List<String> colNames = new ArrayList<String>();
    colNames.add(obj.getColName());
    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(1, statsFromDB.size());
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField());
    // Compare StringColumnStatsData
    StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats();
    Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen());
    Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01);
    Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls());
    Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs());
    Assert.assertEquals(stringData.getBitVectors(), stringDataFromDB.getBitVectors());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ArrayList(java.util.ArrayList) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 9 with StringColumnStatsData

use of org.apache.hadoop.hive.metastore.api.StringColumnStatsData in project hive by apache.

the class ColumnStatsUpdateTask method constructColumnStatsFromInput.

private ColumnStatistics constructColumnStatsFromInput() throws SemanticException, MetaException {
    String dbName = SessionState.get().getCurrentDatabase();
    ColumnStatsDesc desc = work.getColStats();
    String tableName = desc.getTableName();
    String partName = work.getPartName();
    List<String> colName = desc.getColName();
    List<String> colType = desc.getColType();
    ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
    // grammar prohibits more than 1 column so we are guaranteed to have only 1
    // element in this lists.
    statsObj.setColName(colName.get(0));
    statsObj.setColType(colType.get(0));
    ColumnStatisticsData statsData = new ColumnStatisticsData();
    String columnType = colType.get(0);
    if (columnType.equalsIgnoreCase("long") || columnType.equalsIgnoreCase("tinyint") || columnType.equalsIgnoreCase("smallint") || columnType.equalsIgnoreCase("int") || columnType.equalsIgnoreCase("bigint")) {
        LongColumnStatsData longStats = new LongColumnStatsData();
        longStats.setNumNullsIsSet(false);
        longStats.setNumDVsIsSet(false);
        longStats.setLowValueIsSet(false);
        longStats.setHighValueIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                longStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                longStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                longStats.setLowValue(Long.parseLong(value));
            } else if (fName.equals("highValue")) {
                longStats.setHighValue(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setLongStats(longStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("double") || columnType.equalsIgnoreCase("float")) {
        DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
        doubleStats.setNumNullsIsSet(false);
        doubleStats.setNumDVsIsSet(false);
        doubleStats.setLowValueIsSet(false);
        doubleStats.setHighValueIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                doubleStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                doubleStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                doubleStats.setLowValue(Double.parseDouble(value));
            } else if (fName.equals("highValue")) {
                doubleStats.setHighValue(Double.parseDouble(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setDoubleStats(doubleStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("string") || columnType.toLowerCase().startsWith("char") || columnType.toLowerCase().startsWith("varchar")) {
        //char(x),varchar(x) types
        StringColumnStatsData stringStats = new StringColumnStatsData();
        stringStats.setMaxColLenIsSet(false);
        stringStats.setAvgColLenIsSet(false);
        stringStats.setNumNullsIsSet(false);
        stringStats.setNumDVsIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                stringStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                stringStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("avgColLen")) {
                stringStats.setAvgColLen(Double.parseDouble(value));
            } else if (fName.equals("maxColLen")) {
                stringStats.setMaxColLen(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setStringStats(stringStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("boolean")) {
        BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
        booleanStats.setNumNullsIsSet(false);
        booleanStats.setNumTruesIsSet(false);
        booleanStats.setNumFalsesIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                booleanStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numTrues")) {
                booleanStats.setNumTrues(Long.parseLong(value));
            } else if (fName.equals("numFalses")) {
                booleanStats.setNumFalses(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setBooleanStats(booleanStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNullsIsSet(false);
        binaryStats.setAvgColLenIsSet(false);
        binaryStats.setMaxColLenIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                binaryStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("avgColLen")) {
                binaryStats.setAvgColLen(Double.parseDouble(value));
            } else if (fName.equals("maxColLen")) {
                binaryStats.setMaxColLen(Long.parseLong(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setBinaryStats(binaryStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.toLowerCase().startsWith("decimal")) {
        //decimal(a,b) type
        DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
        decimalStats.setNumNullsIsSet(false);
        decimalStats.setNumDVsIsSet(false);
        decimalStats.setLowValueIsSet(false);
        decimalStats.setHighValueIsSet(false);
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                decimalStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                decimalStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                BigDecimal d = new BigDecimal(value);
                decimalStats.setLowValue(new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
            } else if (fName.equals("highValue")) {
                BigDecimal d = new BigDecimal(value);
                decimalStats.setHighValue(new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setDecimalStats(decimalStats);
        statsObj.setStatsData(statsData);
    } else if (columnType.equalsIgnoreCase("date") || columnType.equalsIgnoreCase("timestamp")) {
        DateColumnStatsData dateStats = new DateColumnStatsData();
        Map<String, String> mapProp = work.getMapProp();
        for (Entry<String, String> entry : mapProp.entrySet()) {
            String fName = entry.getKey();
            String value = entry.getValue();
            if (fName.equals("numNulls")) {
                dateStats.setNumNulls(Long.parseLong(value));
            } else if (fName.equals("numDVs")) {
                dateStats.setNumDVs(Long.parseLong(value));
            } else if (fName.equals("lowValue")) {
                // Date high/low value is stored as long in stats DB, but allow users to set high/low
                // value using either date format (yyyy-mm-dd) or numeric format (days since epoch)
                dateStats.setLowValue(readDateValue(value));
            } else if (fName.equals("highValue")) {
                dateStats.setHighValue(readDateValue(value));
            } else {
                throw new SemanticException("Unknown stat");
            }
        }
        statsData.setDateStats(dateStats);
        statsObj.setStatsData(statsData);
    } else {
        throw new SemanticException("Unsupported type");
    }
    String[] names = Utilities.getDbTableName(dbName, tableName);
    ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, partName == null);
    ColumnStatistics colStat = new ColumnStatistics();
    colStat.setStatsDesc(statsDesc);
    colStat.addToStatsObj(statsObj);
    return colStat;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatsDesc(org.apache.hadoop.hive.ql.plan.ColumnStatsDesc) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) BigDecimal(java.math.BigDecimal) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) Entry(java.util.Map.Entry) BigDecimal(java.math.BigDecimal) Decimal(org.apache.hadoop.hive.metastore.api.Decimal) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) Map(java.util.Map) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 10 with StringColumnStatsData

use of org.apache.hadoop.hive.metastore.api.StringColumnStatsData in project hive by apache.

the class TestHiveMetaStore method testColumnStatistics.

public void testColumnStatistics() throws Throwable {
    String dbName = "columnstatstestdb";
    String tblName = "tbl";
    String typeName = "Person";
    String tblOwner = "testowner";
    int lastAccessed = 6796;
    try {
        cleanUp(dbName, tblName, typeName);
        Database db = new Database();
        db.setName(dbName);
        client.createDatabase(db);
        createTableForTestFilter(dbName, tblName, tblOwner, lastAccessed, true);
        // Create a ColumnStatistics Obj
        String[] colName = new String[] { "income", "name" };
        double lowValue = 50000.21;
        double highValue = 1200000.4525;
        long numNulls = 3;
        long numDVs = 22;
        double avgColLen = 50.30;
        long maxColLen = 102;
        String[] colType = new String[] { "double", "string" };
        boolean isTblLevel = true;
        String partName = null;
        List<ColumnStatisticsObj> statsObjs = new ArrayList<ColumnStatisticsObj>();
        ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
        statsDesc.setDbName(dbName);
        statsDesc.setTableName(tblName);
        statsDesc.setIsTblLevel(isTblLevel);
        statsDesc.setPartName(partName);
        ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
        statsObj.setColName(colName[0]);
        statsObj.setColType(colType[0]);
        ColumnStatisticsData statsData = new ColumnStatisticsData();
        DoubleColumnStatsData numericStats = new DoubleColumnStatsData();
        statsData.setDoubleStats(numericStats);
        statsData.getDoubleStats().setHighValue(highValue);
        statsData.getDoubleStats().setLowValue(lowValue);
        statsData.getDoubleStats().setNumDVs(numDVs);
        statsData.getDoubleStats().setNumNulls(numNulls);
        statsObj.setStatsData(statsData);
        statsObjs.add(statsObj);
        statsObj = new ColumnStatisticsObj();
        statsObj.setColName(colName[1]);
        statsObj.setColType(colType[1]);
        statsData = new ColumnStatisticsData();
        StringColumnStatsData stringStats = new StringColumnStatsData();
        statsData.setStringStats(stringStats);
        statsData.getStringStats().setAvgColLen(avgColLen);
        statsData.getStringStats().setMaxColLen(maxColLen);
        statsData.getStringStats().setNumDVs(numDVs);
        statsData.getStringStats().setNumNulls(numNulls);
        statsObj.setStatsData(statsData);
        statsObjs.add(statsObj);
        ColumnStatistics colStats = new ColumnStatistics();
        colStats.setStatsDesc(statsDesc);
        colStats.setStatsObj(statsObjs);
        // write stats objs persistently
        client.updateTableColumnStatistics(colStats);
        // retrieve the stats obj that was just written
        ColumnStatisticsObj colStats2 = client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[0])).get(0);
        // compare stats obj to ensure what we get is what we wrote
        assertNotNull(colStats2);
        assertEquals(colStats2.getColName(), colName[0]);
        assertEquals(colStats2.getStatsData().getDoubleStats().getLowValue(), lowValue);
        assertEquals(colStats2.getStatsData().getDoubleStats().getHighValue(), highValue);
        assertEquals(colStats2.getStatsData().getDoubleStats().getNumNulls(), numNulls);
        assertEquals(colStats2.getStatsData().getDoubleStats().getNumDVs(), numDVs);
        // test delete column stats; if no col name is passed all column stats associated with the
        // table is deleted
        boolean status = client.deleteTableColumnStatistics(dbName, tblName, null);
        assertTrue(status);
        // try to query stats for a column for which stats doesn't exist
        assertTrue(client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[1])).isEmpty());
        colStats.setStatsDesc(statsDesc);
        colStats.setStatsObj(statsObjs);
        // update table level column stats
        client.updateTableColumnStatistics(colStats);
        // query column stats for column whose stats were updated in the previous call
        colStats2 = client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[0])).get(0);
        // partition level column statistics test
        // create a table with multiple partitions
        cleanUp(dbName, tblName, typeName);
        List<List<String>> values = new ArrayList<List<String>>();
        values.add(makeVals("2008-07-01 14:13:12", "14"));
        values.add(makeVals("2008-07-01 14:13:12", "15"));
        values.add(makeVals("2008-07-02 14:13:12", "15"));
        values.add(makeVals("2008-07-03 14:13:12", "151"));
        createMultiPartitionTableSchema(dbName, tblName, typeName, values);
        List<String> partitions = client.listPartitionNames(dbName, tblName, (short) -1);
        partName = partitions.get(0);
        isTblLevel = false;
        // create a new columnstatistics desc to represent partition level column stats
        statsDesc = new ColumnStatisticsDesc();
        statsDesc.setDbName(dbName);
        statsDesc.setTableName(tblName);
        statsDesc.setPartName(partName);
        statsDesc.setIsTblLevel(isTblLevel);
        colStats = new ColumnStatistics();
        colStats.setStatsDesc(statsDesc);
        colStats.setStatsObj(statsObjs);
        client.updatePartitionColumnStatistics(colStats);
        colStats2 = client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[1])).get(partName).get(0);
        // compare stats obj to ensure what we get is what we wrote
        assertNotNull(colStats2);
        assertEquals(colStats.getStatsDesc().getPartName(), partName);
        assertEquals(colStats2.getColName(), colName[1]);
        assertEquals(colStats2.getStatsData().getStringStats().getMaxColLen(), maxColLen);
        assertEquals(colStats2.getStatsData().getStringStats().getAvgColLen(), avgColLen);
        assertEquals(colStats2.getStatsData().getStringStats().getNumNulls(), numNulls);
        assertEquals(colStats2.getStatsData().getStringStats().getNumDVs(), numDVs);
        // test stats deletion at partition level
        client.deletePartitionColumnStatistics(dbName, tblName, partName, colName[1]);
        colStats2 = client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[0])).get(partName).get(0);
        // test get stats on a column for which stats doesn't exist
        assertTrue(client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[1])).isEmpty());
    } catch (Exception e) {
        System.err.println(StringUtils.stringifyException(e));
        System.err.println("testColumnStatistics() failed.");
        throw e;
    } finally {
        cleanUp(dbName, tblName, typeName);
    }
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ArrayList(java.util.ArrayList) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) ConfigValSecurityException(org.apache.hadoop.hive.metastore.api.ConfigValSecurityException) SQLException(java.sql.SQLException) UnknownDBException(org.apache.hadoop.hive.metastore.api.UnknownDBException) TException(org.apache.thrift.TException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) Database(org.apache.hadoop.hive.metastore.api.Database) List(java.util.List) ArrayList(java.util.ArrayList) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Aggregations

StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)30 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)24 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)23 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)17 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)17 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)16 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)15 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)15 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)13 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)12 ArrayList (java.util.ArrayList)10 DateColumnStatsData (org.apache.hadoop.hive.metastore.api.DateColumnStatsData)10 Test (org.junit.Test)10 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)5 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)5 Table (org.apache.hadoop.hive.metastore.api.Table)5 List (java.util.List)4 Date (org.apache.hadoop.hive.metastore.api.Date)4 Decimal (org.apache.hadoop.hive.metastore.api.Decimal)4 Partition (org.apache.hadoop.hive.metastore.api.Partition)4