Search in sources :

Example 91 with ColumnStatisticsData

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData in project hive by apache.

the class TestHBaseStore method doubleTableStatistics.

@Test
public void doubleTableStatistics() throws Exception {
    createMockTable(DOUBLE_TYPE);
    // Add a double table stats for DOUBLE_COL to DB
    // Because of the way our mock implementation works we actually need to not create the table
    // before we set statistics on it.
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for table level stats
    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = doubleColStatsObjs.get(0);
    DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats();
    // Add to DB
    stats.addToStatsObj(obj);
    store.updateTableColumnStatistics(stats);
    // Get from DB
    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DOUBLE_COL));
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField());
    // Compare DoubleColumnStatsData
    DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats();
    Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01);
    Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01);
    Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls());
    Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 92 with ColumnStatisticsData

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData in project hive by apache.

the class MetaDataFormatUtils method extractColumnValues.

public static String[] extractColumnValues(FieldSchema col, boolean isColStatsAvailable, ColumnStatisticsObj columnStatisticsObj) {
    List<String> ret = new ArrayList<>();
    ret.add(col.getName());
    ret.add(col.getType());
    if (isColStatsAvailable) {
        if (columnStatisticsObj != null) {
            ColumnStatisticsData csd = columnStatisticsObj.getStatsData();
            // @formatter:off
            if (csd.isSetBinaryStats()) {
                BinaryColumnStatsData bcsd = csd.getBinaryStats();
                ret.addAll(Lists.newArrayList("", "", "" + bcsd.getNumNulls(), "", "" + bcsd.getAvgColLen(), "" + bcsd.getMaxColLen(), "", "", convertToString(bcsd.getBitVectors())));
            } else if (csd.isSetStringStats()) {
                StringColumnStatsData scsd = csd.getStringStats();
                ret.addAll(Lists.newArrayList("", "", "" + scsd.getNumNulls(), "" + scsd.getNumDVs(), "" + scsd.getAvgColLen(), "" + scsd.getMaxColLen(), "", "", convertToString(scsd.getBitVectors())));
            } else if (csd.isSetBooleanStats()) {
                BooleanColumnStatsData bcsd = csd.getBooleanStats();
                ret.addAll(Lists.newArrayList("", "", "" + bcsd.getNumNulls(), "", "", "", "" + bcsd.getNumTrues(), "" + bcsd.getNumFalses(), convertToString(bcsd.getBitVectors())));
            } else if (csd.isSetDecimalStats()) {
                DecimalColumnStatsData dcsd = csd.getDecimalStats();
                ret.addAll(Lists.newArrayList(convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), "" + dcsd.getNumNulls(), "" + dcsd.getNumDVs(), "", "", "", "", convertToString(dcsd.getBitVectors())));
            } else if (csd.isSetDoubleStats()) {
                DoubleColumnStatsData dcsd = csd.getDoubleStats();
                ret.addAll(Lists.newArrayList("" + dcsd.getLowValue(), "" + dcsd.getHighValue(), "" + dcsd.getNumNulls(), "" + dcsd.getNumDVs(), "", "", "", "", convertToString(dcsd.getBitVectors())));
            } else if (csd.isSetLongStats()) {
                LongColumnStatsData lcsd = csd.getLongStats();
                ret.addAll(Lists.newArrayList("" + lcsd.getLowValue(), "" + lcsd.getHighValue(), "" + lcsd.getNumNulls(), "" + lcsd.getNumDVs(), "", "", "", "", convertToString(lcsd.getBitVectors())));
            } else if (csd.isSetDateStats()) {
                DateColumnStatsData dcsd = csd.getDateStats();
                ret.addAll(Lists.newArrayList(convertToString(dcsd.getLowValue()), convertToString(dcsd.getHighValue()), "" + dcsd.getNumNulls(), "" + dcsd.getNumDVs(), "", "", "", "", convertToString(dcsd.getBitVectors())));
            }
        // @formatter:on
        } else {
            ret.addAll(Lists.newArrayList("", "", "", "", "", "", "", "", ""));
        }
    }
    ret.add(getComment(col));
    return ret.toArray(new String[] {});
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) ArrayList(java.util.ArrayList) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 93 with ColumnStatisticsData

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData in project hive by apache.

the class ColumnStatisticsObjTranslator method unpackPrimitiveObject.

private static void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException {
    if (o == null) {
        return;
    }
    // First infer the type of object
    if (fieldName.equals("columntype")) {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
        ColumnStatisticsData statsData = new ColumnStatisticsData();
        if (s.equalsIgnoreCase("long")) {
            LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
            statsData.setLongStats(longStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("double")) {
            DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
            statsData.setDoubleStats(doubleStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("string")) {
            StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
            statsData.setStringStats(stringStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("boolean")) {
            BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
            statsData.setBooleanStats(booleanStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("binary")) {
            BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
            statsData.setBinaryStats(binaryStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("decimal")) {
            DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
            statsData.setDecimalStats(decimalStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("date")) {
            DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
            statsData.setDateStats(dateStats);
            statsObj.setStatsData(statsData);
        }
    } else {
        // invoke the right unpack method depending on data type of the column
        if (statsObj.getStatsData().isSetBooleanStats()) {
            unpackBooleanStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetLongStats()) {
            unpackLongStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetDoubleStats()) {
            unpackDoubleStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetStringStats()) {
            unpackStringStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetBinaryStats()) {
            unpackBinaryStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetDecimalStats()) {
            unpackDecimalStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetDateStats()) {
            unpackDateStats(oi, o, fieldName, statsObj);
        }
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 94 with ColumnStatisticsData

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData in project hive by apache.

the class BinaryColumnStatsAggregator method aggregate.

@Override
public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWithSourceInfo, List<String> partNames, boolean areAllPartsFound) throws MetaException {
    ColumnStatisticsObj statsObj = null;
    String colType = null;
    String colName = null;
    BinaryColumnStatsData aggregateData = null;
    for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
        ColumnStatisticsObj cso = csp.getColStatsObj();
        if (statsObj == null) {
            colName = cso.getColName();
            colType = cso.getColType();
            statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField());
        }
        BinaryColumnStatsData newData = cso.getStatsData().getBinaryStats();
        if (aggregateData == null) {
            aggregateData = newData.deepCopy();
        } else {
            aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
            aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
            aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
        }
    }
    ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
    columnStatisticsData.setBinaryStats(aggregateData);
    statsObj.setStatsData(columnStatisticsData);
    return statsObj;
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColStatsObjWithSourceInfo(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 95 with ColumnStatisticsData

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsData in project hive by apache.

the class ColumnStatsAggregatorFactory method newColumnStaticsObj.

public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) {
    ColumnStatisticsObj cso = new ColumnStatisticsObj();
    ColumnStatisticsData csd = new ColumnStatisticsData();
    cso.setColName(colName);
    cso.setColType(colType);
    switch(type) {
        case BOOLEAN_STATS:
            csd.setBooleanStats(new BooleanColumnStatsData());
            break;
        case LONG_STATS:
            csd.setLongStats(new LongColumnStatsDataInspector());
            break;
        case DATE_STATS:
            csd.setDateStats(new DateColumnStatsDataInspector());
            break;
        case DOUBLE_STATS:
            csd.setDoubleStats(new DoubleColumnStatsDataInspector());
            break;
        case STRING_STATS:
            csd.setStringStats(new StringColumnStatsDataInspector());
            break;
        case BINARY_STATS:
            csd.setBinaryStats(new BinaryColumnStatsData());
            break;
        case DECIMAL_STATS:
            csd.setDecimalStats(new DecimalColumnStatsDataInspector());
            break;
        default:
            throw new RuntimeException("Woh, bad.  Unknown stats type!");
    }
    cso.setStatsData(csd);
    return cso;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Aggregations

ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)108 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)95 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)62 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)56 Test (org.junit.Test)53 ArrayList (java.util.ArrayList)47 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)35 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)34 Table (org.apache.hadoop.hive.metastore.api.Table)33 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)32 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)31 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)30 Partition (org.apache.hadoop.hive.metastore.api.Partition)30 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)29 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)27 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)25 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)23 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)22 HashMap (java.util.HashMap)20 List (java.util.List)18