Search in sources :

Example 6 with DecimalColumnStatsData

use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.

the class DecimalColumnStatsMerger method merge.

@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
    DecimalColumnStatsData aggregateData = aggregateColStats.getStatsData().getDecimalStats();
    DecimalColumnStatsData newData = newColStats.getStatsData().getDecimalStats();
    Decimal lowValue = aggregateData.getLowValue() != null && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData.getLowValue() : newData.getLowValue();
    aggregateData.setLowValue(lowValue);
    Decimal highValue = aggregateData.getHighValue() != null && (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData.getHighValue() : newData.getHighValue();
    aggregateData.setHighValue(highValue);
    aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
    if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
        aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
    } else {
        ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), ndvEstimator.getnumBitVectors()));
        ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), ndvEstimator.getnumBitVectors()));
        long ndv = ndvEstimator.estimateNumDistinctValues();
        LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
        aggregateData.setNumDVs(ndv);
        aggregateData.setBitVectors(ndvEstimator.serialize().toString());
    }
}
Also used : DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) Decimal(org.apache.hadoop.hive.metastore.api.Decimal) NumDistinctValueEstimator(org.apache.hadoop.hive.metastore.NumDistinctValueEstimator)

Example 7 with DecimalColumnStatsData

use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.

the class ColumnStatsTask method unpackPrimitiveObject.

private void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) {
    if (o == null) {
        return;
    }
    // First infer the type of object
    if (fieldName.equals("columntype")) {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
        ColumnStatisticsData statsData = new ColumnStatisticsData();
        if (s.equalsIgnoreCase("long")) {
            LongColumnStatsData longStats = new LongColumnStatsData();
            statsData.setLongStats(longStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("double")) {
            DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
            statsData.setDoubleStats(doubleStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("string")) {
            StringColumnStatsData stringStats = new StringColumnStatsData();
            statsData.setStringStats(stringStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("boolean")) {
            BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
            statsData.setBooleanStats(booleanStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("binary")) {
            BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
            statsData.setBinaryStats(binaryStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("decimal")) {
            DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
            statsData.setDecimalStats(decimalStats);
            statsObj.setStatsData(statsData);
        } else if (s.equalsIgnoreCase("date")) {
            DateColumnStatsData dateStats = new DateColumnStatsData();
            statsData.setDateStats(dateStats);
            statsObj.setStatsData(statsData);
        }
    } else {
        // invoke the right unpack method depending on data type of the column
        if (statsObj.getStatsData().isSetBooleanStats()) {
            unpackBooleanStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetLongStats()) {
            unpackLongStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetDoubleStats()) {
            unpackDoubleStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetStringStats()) {
            unpackStringStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetBinaryStats()) {
            unpackBinaryStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetDecimalStats()) {
            unpackDecimalStats(oi, o, fieldName, statsObj);
        } else if (statsObj.getStatsData().isSetDateStats()) {
            unpackDateStats(oi, o, fieldName, statsObj);
        }
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 8 with DecimalColumnStatsData

use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.

the class TestHBaseStoreBitVector method decimalTableStatistics.

@Test
public void decimalTableStatistics() throws Exception {
    createMockTable(DECIMAL_COL, DECIMAL_TYPE);
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for table level stats
    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
    DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
    // Add to DB
    stats.addToStatsObj(obj);
    store.updateTableColumnStatistics(stats);
    // Get from DB
    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DECIMAL_COL));
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
    // Compare DecimalColumnStatsData
    DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
    Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
    Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
    Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
    Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
    Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 9 with DecimalColumnStatsData

use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.

the class TestHBaseStoreBitVector method mockDecimalStats.

private static ColumnStatisticsObj mockDecimalStats(int i) {
    Decimal high = new Decimal();
    high.setScale((short) 3);
    String strHigh = String.valueOf(3876 + 100 * i);
    high.setUnscaled(strHigh.getBytes());
    Decimal low = new Decimal();
    low.setScale((short) 3);
    String strLow = String.valueOf(38 + i);
    low.setUnscaled(strLow.getBytes());
    long nulls = 13 + i;
    long dVs = 923947293L + 100 * i;
    String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 2, 4, 5, 6, 7, 8}";
    ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
    colStatsObj.setColName(DECIMAL_COL);
    colStatsObj.setColType(DECIMAL_TYPE);
    ColumnStatisticsData data = new ColumnStatisticsData();
    DecimalColumnStatsData decimalData = new DecimalColumnStatsData();
    decimalData.setHighValue(high);
    decimalData.setLowValue(low);
    decimalData.setNumNulls(nulls);
    decimalData.setNumDVs(dVs);
    decimalData.setBitVectors(bitVectors);
    data.setDecimalStats(decimalData);
    colStatsObj.setStatsData(data);
    return colStatsObj;
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) Decimal(org.apache.hadoop.hive.metastore.api.Decimal) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 10 with DecimalColumnStatsData

use of org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData in project hive by apache.

the class TestHBaseStoreBitVector method decimalPartitionStatistics.

@Test
public void decimalPartitionStatistics() throws Exception {
    createMockTableAndPartition(DECIMAL_TYPE, DECIMAL_VAL);
    // Add partition stats for: DECIMAL_COL and partition: {PART_KEY, DECIMAL_VAL} to DB
    // Because of the way our mock implementation works we actually need to not create the table
    // before we set statistics on it.
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for partition level stats
    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DECIMAL_VAL);
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
    DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
    // Add to DB
    stats.addToStatsObj(obj);
    List<String> parVals = new ArrayList<String>();
    parVals.add(DECIMAL_VAL);
    store.updatePartitionColumnStatistics(stats, parVals);
    // Get from DB
    List<String> partNames = new ArrayList<String>();
    partNames.add(desc.getPartName());
    List<String> colNames = new ArrayList<String>();
    colNames.add(obj.getColName());
    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(1, statsFromDB.size());
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
    // Compare DecimalColumnStatsData
    DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
    Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
    Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
    Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
    Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
    Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ArrayList(java.util.ArrayList) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Aggregations

DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)28 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)23 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)18 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)15 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)15 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)15 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)15 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)15 DateColumnStatsData (org.apache.hadoop.hive.metastore.api.DateColumnStatsData)10 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)8 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)7 Test (org.junit.Test)7 ArrayList (java.util.ArrayList)6 Decimal (org.apache.hadoop.hive.metastore.api.Decimal)6 HashMap (java.util.HashMap)4 Date (org.apache.hadoop.hive.metastore.api.Date)3 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)3 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)3 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)3 Table (org.apache.hadoop.hive.metastore.api.Table)3