Search in sources :

Example 1 with Decimal

use of org.apache.hadoop.hive.metastore.api.Decimal in project hive by apache.

the class DecimalColumnStatsMerger method merge.

@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
    DecimalColumnStatsData aggregateData = aggregateColStats.getStatsData().getDecimalStats();
    DecimalColumnStatsData newData = newColStats.getStatsData().getDecimalStats();
    Decimal lowValue = aggregateData.getLowValue() != null && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData.getLowValue() : newData.getLowValue();
    aggregateData.setLowValue(lowValue);
    Decimal highValue = aggregateData.getHighValue() != null && (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData.getHighValue() : newData.getHighValue();
    aggregateData.setHighValue(highValue);
    aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
    if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
        aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
    } else {
        ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), ndvEstimator.getnumBitVectors()));
        ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), ndvEstimator.getnumBitVectors()));
        long ndv = ndvEstimator.estimateNumDistinctValues();
        LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
        aggregateData.setNumDVs(ndv);
        aggregateData.setBitVectors(ndvEstimator.serialize().toString());
    }
}
Also used : DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) Decimal(org.apache.hadoop.hive.metastore.api.Decimal) NumDistinctValueEstimator(org.apache.hadoop.hive.metastore.NumDistinctValueEstimator)

Example 2 with Decimal

use of org.apache.hadoop.hive.metastore.api.Decimal in project hive by apache.

the class TestHBaseStoreBitVector method mockDecimalStats.

private static ColumnStatisticsObj mockDecimalStats(int i) {
    Decimal high = new Decimal();
    high.setScale((short) 3);
    String strHigh = String.valueOf(3876 + 100 * i);
    high.setUnscaled(strHigh.getBytes());
    Decimal low = new Decimal();
    low.setScale((short) 3);
    String strLow = String.valueOf(38 + i);
    low.setUnscaled(strLow.getBytes());
    long nulls = 13 + i;
    long dVs = 923947293L + 100 * i;
    String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 2, 4, 5, 6, 7, 8}";
    ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
    colStatsObj.setColName(DECIMAL_COL);
    colStatsObj.setColType(DECIMAL_TYPE);
    ColumnStatisticsData data = new ColumnStatisticsData();
    DecimalColumnStatsData decimalData = new DecimalColumnStatsData();
    decimalData.setHighValue(high);
    decimalData.setLowValue(low);
    decimalData.setNumNulls(nulls);
    decimalData.setNumDVs(dVs);
    decimalData.setBitVectors(bitVectors);
    data.setDecimalStats(decimalData);
    colStatsObj.setStatsData(data);
    return colStatsObj;
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) Decimal(org.apache.hadoop.hive.metastore.api.Decimal) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 3 with Decimal

use of org.apache.hadoop.hive.metastore.api.Decimal in project hive by apache.

the class TestHBaseStore method mockDecimalStats.

private static ColumnStatisticsObj mockDecimalStats(int i) {
    Decimal high = new Decimal();
    high.setScale((short) 3);
    String strHigh = String.valueOf(3876 + 100 * i);
    high.setUnscaled(strHigh.getBytes());
    Decimal low = new Decimal();
    low.setScale((short) 3);
    String strLow = String.valueOf(38 + i);
    low.setUnscaled(strLow.getBytes());
    long nulls = 13 + i;
    long dVs = 923947293L + 100 * i;
    ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
    colStatsObj.setColName(DECIMAL_COL);
    colStatsObj.setColType(DECIMAL_TYPE);
    ColumnStatisticsData data = new ColumnStatisticsData();
    DecimalColumnStatsData decimalData = new DecimalColumnStatsData();
    decimalData.setHighValue(high);
    decimalData.setLowValue(low);
    decimalData.setNumNulls(nulls);
    decimalData.setNumDVs(dVs);
    data.setDecimalStats(decimalData);
    colStatsObj.setStatsData(data);
    return colStatsObj;
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) Decimal(org.apache.hadoop.hive.metastore.api.Decimal) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 4 with Decimal

use of org.apache.hadoop.hive.metastore.api.Decimal in project flink by apache.

the class HiveStatsUtil method toThriftDecimal.

private static Decimal toThriftDecimal(HiveDecimal hiveDecimal) {
    // the constructor signature changed in 3.x. use default constructor and set each field...
    Decimal res = new Decimal();
    res.setUnscaled(ByteBuffer.wrap(hiveDecimal.unscaledValue().toByteArray()));
    res.setScale((short) hiveDecimal.scale());
    return res;
}
Also used : BigDecimal(java.math.BigDecimal) Decimal(org.apache.hadoop.hive.metastore.api.Decimal) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal)

Example 5 with Decimal

use of org.apache.hadoop.hive.metastore.api.Decimal in project alluxio by Alluxio.

the class HiveUtilsTest method protoColStatsWithDecimalData.

@Test
public void protoColStatsWithDecimalData() {
    ColumnStatisticsObj hiveColStats = new ColumnStatisticsObj();
    hiveColStats.setColName("colName");
    hiveColStats.setColType("colType");
    ColumnStatisticsData data = new ColumnStatisticsData();
    // verify empty data
    DecimalColumnStatsData decimalData = new DecimalColumnStatsData();
    data.setDecimalStats(decimalData);
    hiveColStats.setStatsData(data);
    verifyColumnStats(hiveColStats);
    // verify non-empty data
    decimalData.setBitVectors(CommonUtils.randomAlphaNumString(5));
    decimalData.setNumNulls(mRandom.nextLong());
    decimalData.setHighValue(new Decimal(ByteBuffer.wrap(CommonUtils.randomBytes(5)), (short) mRandom.nextInt()));
    decimalData.setLowValue(new Decimal(ByteBuffer.wrap(CommonUtils.randomBytes(5)), (short) mRandom.nextInt()));
    decimalData.setNumDVs(mRandom.nextLong());
    data.setDecimalStats(decimalData);
    hiveColStats.setStatsData(data);
    verifyColumnStats(hiveColStats);
    // verify null column values
    decimalData.setHighValue(null);
    decimalData.setLowValue(null);
    data.setDecimalStats(decimalData);
    hiveColStats.setStatsData(data);
    verifyColumnStats(hiveColStats);
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) Decimal(org.apache.hadoop.hive.metastore.api.Decimal) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Aggregations

Decimal (org.apache.hadoop.hive.metastore.api.Decimal)13 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)7 BigDecimal (java.math.BigDecimal)6 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)6 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)5 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)4 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)4 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)3 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)3 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)3 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)2 Date (org.apache.hadoop.hive.metastore.api.Date)2 Test (org.junit.Test)2 BigInteger (java.math.BigInteger)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 NumDistinctValueEstimator (org.apache.hadoop.hive.metastore.NumDistinctValueEstimator)1 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)1 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)1 DateColumnStatsData (org.apache.hadoop.hive.metastore.api.DateColumnStatsData)1