Search in sources :

Example 1 with DecimalColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector in project hive by apache.

the class StatObjectConverter method fillColumnStatisticsData.

// DB
public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses, Object avgLong, Object avgDouble, Object avgDecimal, Object sumDist, boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
    colType = colType.toLowerCase();
    if (colType.equals("boolean")) {
        BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
        boolStats.setNumFalses(MetaStoreDirectSql.extractSqlLong(falses));
        boolStats.setNumTrues(MetaStoreDirectSql.extractSqlLong(trues));
        boolStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        data.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
        stringStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        stringStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen));
        stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen));
        stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
        data.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        binaryStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen));
        binaryStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen));
        data.setBinaryStats(binaryStats);
    } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint") || colType.equals("timestamp")) {
        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
        longStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        if (lhigh != null) {
            longStats.setHighValue(MetaStoreDirectSql.extractSqlLong(lhigh));
        }
        if (llow != null) {
            longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow));
        }
        long lowerBound = MetaStoreDirectSql.extractSqlLong(dist);
        long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist);
        long rangeBound = Long.MAX_VALUE;
        if (lhigh != null && llow != null) {
            rangeBound = MetaStoreDirectSql.extractSqlLong(lhigh) - MetaStoreDirectSql.extractSqlLong(llow) + 1;
        }
        long estimation;
        if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null && MetaStoreDirectSql.extractSqlDouble(avgLong) != 0.0) {
            // We have estimation, lowerbound and higherbound. We use estimation if
            // it is between lowerbound and higherbound.
            estimation = MetaStoreDirectSql.extractSqlLong((MetaStoreDirectSql.extractSqlLong(lhigh) - MetaStoreDirectSql.extractSqlLong(llow)) / MetaStoreDirectSql.extractSqlDouble(avgLong));
            if (estimation < lowerBound) {
                estimation = lowerBound;
            } else if (estimation > higherBound) {
                estimation = higherBound;
            }
        } else {
            estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner);
        }
        estimation = Math.min(estimation, rangeBound);
        longStats.setNumDVs(estimation);
        data.setLongStats(longStats);
    } else if (colType.equals("date")) {
        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
        dateStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        if (lhigh != null) {
            dateStats.setHighValue(new Date(MetaStoreDirectSql.extractSqlLong(lhigh)));
        }
        if (llow != null) {
            dateStats.setLowValue(new Date(MetaStoreDirectSql.extractSqlLong(llow)));
        }
        long lowerBound = MetaStoreDirectSql.extractSqlLong(dist);
        long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist);
        long rangeBound = Long.MAX_VALUE;
        if (lhigh != null && llow != null) {
            rangeBound = MetaStoreDirectSql.extractSqlLong(lhigh) - MetaStoreDirectSql.extractSqlLong(llow) + 1;
        }
        long estimation;
        if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null && MetaStoreDirectSql.extractSqlDouble(avgLong) != 0.0) {
            // We have estimation, lowerbound and higherbound. We use estimation if
            // it is between lowerbound and higherbound.
            estimation = MetaStoreDirectSql.extractSqlLong((MetaStoreDirectSql.extractSqlLong(lhigh) - MetaStoreDirectSql.extractSqlLong(llow)) / MetaStoreDirectSql.extractSqlDouble(avgLong));
            if (estimation < lowerBound) {
                estimation = lowerBound;
            } else if (estimation > higherBound) {
                estimation = higherBound;
            }
        } else {
            estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner);
        }
        estimation = Math.min(estimation, rangeBound);
        dateStats.setNumDVs(estimation);
        data.setDateStats(dateStats);
    } else if (colType.equals("double") || colType.equals("float")) {
        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
        doubleStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        if (dhigh != null) {
            doubleStats.setHighValue(MetaStoreDirectSql.extractSqlDouble(dhigh));
        }
        if (dlow != null) {
            doubleStats.setLowValue(MetaStoreDirectSql.extractSqlDouble(dlow));
        }
        long lowerBound = MetaStoreDirectSql.extractSqlLong(dist);
        long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist);
        if (useDensityFunctionForNDVEstimation && dhigh != null && dlow != null && avgDouble != null && MetaStoreDirectSql.extractSqlDouble(avgDouble) != 0.0) {
            long estimation = MetaStoreDirectSql.extractSqlLong((MetaStoreDirectSql.extractSqlLong(dhigh) - MetaStoreDirectSql.extractSqlLong(dlow)) / MetaStoreDirectSql.extractSqlDouble(avgDouble));
            if (estimation < lowerBound) {
                doubleStats.setNumDVs(lowerBound);
            } else if (estimation > higherBound) {
                doubleStats.setNumDVs(higherBound);
            } else {
                doubleStats.setNumDVs(estimation);
            }
        } else {
            doubleStats.setNumDVs((long) (lowerBound + (higherBound - lowerBound) * ndvTuner));
        }
        data.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
        decimalStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        Decimal low = null;
        Decimal high = null;
        BigDecimal blow = null;
        BigDecimal bhigh = null;
        if (dechigh instanceof BigDecimal) {
            bhigh = (BigDecimal) dechigh;
            high = new Decimal(ByteBuffer.wrap(bhigh.unscaledValue().toByteArray()), (short) bhigh.scale());
        } else if (dechigh instanceof String) {
            bhigh = new BigDecimal((String) dechigh);
            high = createThriftDecimal((String) dechigh);
        }
        decimalStats.setHighValue(high);
        if (declow instanceof BigDecimal) {
            blow = (BigDecimal) declow;
            low = new Decimal(ByteBuffer.wrap(blow.unscaledValue().toByteArray()), (short) blow.scale());
        } else if (dechigh instanceof String) {
            blow = new BigDecimal((String) declow);
            low = createThriftDecimal((String) declow);
        }
        decimalStats.setLowValue(low);
        long lowerBound = MetaStoreDirectSql.extractSqlLong(dist);
        long higherBound = MetaStoreDirectSql.extractSqlLong(sumDist);
        if (useDensityFunctionForNDVEstimation && dechigh != null && declow != null && avgDecimal != null && MetaStoreDirectSql.extractSqlDouble(avgDecimal) != 0.0) {
            long estimation = MetaStoreDirectSql.extractSqlLong(MetaStoreDirectSql.extractSqlLong(bhigh.subtract(blow).floatValue() / MetaStoreDirectSql.extractSqlDouble(avgDecimal)));
            if (estimation < lowerBound) {
                decimalStats.setNumDVs(lowerBound);
            } else if (estimation > higherBound) {
                decimalStats.setNumDVs(higherBound);
            } else {
                decimalStats.setNumDVs(estimation);
            }
        } else {
            decimalStats.setNumDVs((long) (lowerBound + (higherBound - lowerBound) * ndvTuner));
        }
        data.setDecimalStats(decimalStats);
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) BigDecimal(java.math.BigDecimal) Decimal(org.apache.hadoop.hive.metastore.api.Decimal) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Date(org.apache.hadoop.hive.metastore.api.Date) BigDecimal(java.math.BigDecimal)

Example 2 with DecimalColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector in project hive by apache.

the class StatObjectConverter method getPartitionColumnStatisticsObj.

public static ColumnStatisticsObj getPartitionColumnStatisticsObj(MPartitionColumnStatistics mStatsObj, boolean enableBitVector) {
    ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
    statsObj.setColType(mStatsObj.getColType());
    statsObj.setColName(mStatsObj.getColName());
    String colType = mStatsObj.getColType().toLowerCase();
    ColumnStatisticsData colStatsData = new ColumnStatisticsData();
    if (colType.equals("boolean")) {
        BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
        boolStats.setNumFalses(mStatsObj.getNumFalses());
        boolStats.setNumTrues(mStatsObj.getNumTrues());
        boolStats.setNumNulls(mStatsObj.getNumNulls());
        colStatsData.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
        stringStats.setNumNulls(mStatsObj.getNumNulls());
        stringStats.setAvgColLen(mStatsObj.getAvgColLen());
        stringStats.setMaxColLen(mStatsObj.getMaxColLen());
        stringStats.setNumDVs(mStatsObj.getNumDVs());
        stringStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNulls(mStatsObj.getNumNulls());
        binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
        binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
        colStatsData.setBinaryStats(binaryStats);
    } else if (colType.equals("tinyint") || colType.equals("smallint") || colType.equals("int") || colType.equals("bigint") || colType.equals("timestamp")) {
        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
        longStats.setNumNulls(mStatsObj.getNumNulls());
        if (mStatsObj.getLongHighValue() != null) {
            longStats.setHighValue(mStatsObj.getLongHighValue());
        }
        if (mStatsObj.getLongLowValue() != null) {
            longStats.setLowValue(mStatsObj.getLongLowValue());
        }
        longStats.setNumDVs(mStatsObj.getNumDVs());
        longStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setLongStats(longStats);
    } else if (colType.equals("double") || colType.equals("float")) {
        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
        doubleStats.setNumNulls(mStatsObj.getNumNulls());
        if (mStatsObj.getDoubleHighValue() != null) {
            doubleStats.setHighValue(mStatsObj.getDoubleHighValue());
        }
        if (mStatsObj.getDoubleLowValue() != null) {
            doubleStats.setLowValue(mStatsObj.getDoubleLowValue());
        }
        doubleStats.setNumDVs(mStatsObj.getNumDVs());
        doubleStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
        decimalStats.setNumNulls(mStatsObj.getNumNulls());
        if (mStatsObj.getDecimalHighValue() != null) {
            decimalStats.setHighValue(createThriftDecimal(mStatsObj.getDecimalHighValue()));
        }
        if (mStatsObj.getDecimalLowValue() != null) {
            decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue()));
        }
        decimalStats.setNumDVs(mStatsObj.getNumDVs());
        decimalStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setDecimalStats(decimalStats);
    } else if (colType.equals("date")) {
        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
        dateStats.setNumNulls(mStatsObj.getNumNulls());
        dateStats.setHighValue(new Date(mStatsObj.getLongHighValue()));
        dateStats.setLowValue(new Date(mStatsObj.getLongLowValue()));
        dateStats.setNumDVs(mStatsObj.getNumDVs());
        dateStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setDateStats(dateStats);
    }
    statsObj.setStatsData(colStatsData);
    return statsObj;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Date(org.apache.hadoop.hive.metastore.api.Date)

Example 3 with DecimalColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector in project hive by apache.

the class StatObjectConverter method getTableColumnStatisticsObj.

public static ColumnStatisticsObj getTableColumnStatisticsObj(MTableColumnStatistics mStatsObj, boolean enableBitVector) {
    ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
    statsObj.setColType(mStatsObj.getColType());
    statsObj.setColName(mStatsObj.getColName());
    String colType = mStatsObj.getColType().toLowerCase();
    ColumnStatisticsData colStatsData = new ColumnStatisticsData();
    if (colType.equals("boolean")) {
        BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
        boolStats.setNumFalses(mStatsObj.getNumFalses());
        boolStats.setNumTrues(mStatsObj.getNumTrues());
        boolStats.setNumNulls(mStatsObj.getNumNulls());
        colStatsData.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
        stringStats.setNumNulls(mStatsObj.getNumNulls());
        stringStats.setAvgColLen(mStatsObj.getAvgColLen());
        stringStats.setMaxColLen(mStatsObj.getMaxColLen());
        stringStats.setNumDVs(mStatsObj.getNumDVs());
        stringStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNulls(mStatsObj.getNumNulls());
        binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
        binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
        colStatsData.setBinaryStats(binaryStats);
    } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint") || colType.equals("timestamp")) {
        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
        longStats.setNumNulls(mStatsObj.getNumNulls());
        Long longHighValue = mStatsObj.getLongHighValue();
        if (longHighValue != null) {
            longStats.setHighValue(longHighValue);
        }
        Long longLowValue = mStatsObj.getLongLowValue();
        if (longLowValue != null) {
            longStats.setLowValue(longLowValue);
        }
        longStats.setNumDVs(mStatsObj.getNumDVs());
        longStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setLongStats(longStats);
    } else if (colType.equals("double") || colType.equals("float")) {
        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
        doubleStats.setNumNulls(mStatsObj.getNumNulls());
        Double doubleHighValue = mStatsObj.getDoubleHighValue();
        if (doubleHighValue != null) {
            doubleStats.setHighValue(doubleHighValue);
        }
        Double doubleLowValue = mStatsObj.getDoubleLowValue();
        if (doubleLowValue != null) {
            doubleStats.setLowValue(doubleLowValue);
        }
        doubleStats.setNumDVs(mStatsObj.getNumDVs());
        doubleStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
        decimalStats.setNumNulls(mStatsObj.getNumNulls());
        String decimalHighValue = mStatsObj.getDecimalHighValue();
        if (decimalHighValue != null) {
            decimalStats.setHighValue(createThriftDecimal(decimalHighValue));
        }
        String decimalLowValue = mStatsObj.getDecimalLowValue();
        if (decimalLowValue != null) {
            decimalStats.setLowValue(createThriftDecimal(decimalLowValue));
        }
        decimalStats.setNumDVs(mStatsObj.getNumDVs());
        decimalStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setDecimalStats(decimalStats);
    } else if (colType.equals("date")) {
        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
        dateStats.setNumNulls(mStatsObj.getNumNulls());
        Long highValue = mStatsObj.getLongHighValue();
        if (highValue != null) {
            dateStats.setHighValue(new Date(highValue));
        }
        Long lowValue = mStatsObj.getLongLowValue();
        if (lowValue != null) {
            dateStats.setLowValue(new Date(lowValue));
        }
        dateStats.setNumDVs(mStatsObj.getNumDVs());
        dateStats.setBitVectors((mStatsObj.getBitVector() == null || !enableBitVector) ? null : mStatsObj.getBitVector());
        colStatsData.setDateStats(dateStats);
    }
    statsObj.setStatsData(colStatsData);
    return statsObj;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Date(org.apache.hadoop.hive.metastore.api.Date) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 4 with DecimalColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector in project hive by apache.

the class DecimalColumnStatsAggregator method extrapolate.

@Override
public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, int numPartsWithStats, Map<String, Double> adjustedIndexMap, Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
    int rightBorderInd = numParts;
    DecimalColumnStatsDataInspector extrapolateDecimalData = new DecimalColumnStatsDataInspector();
    Map<String, DecimalColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
    for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
        extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getDecimalStats());
    }
    List<Map.Entry<String, DecimalColumnStatsData>> list = new LinkedList<>(extractedAdjustedStatsMap.entrySet());
    // get the lowValue
    Collections.sort(list, new Comparator<Map.Entry<String, DecimalColumnStatsData>>() {

        @Override
        public int compare(Map.Entry<String, DecimalColumnStatsData> o1, Map.Entry<String, DecimalColumnStatsData> o2) {
            return o1.getValue().getLowValue().compareTo(o2.getValue().getLowValue());
        }
    });
    double minInd = adjustedIndexMap.get(list.get(0).getKey());
    double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
    double lowValue = 0;
    double min = MetaStoreUtils.decimalToDouble(list.get(0).getValue().getLowValue());
    double max = MetaStoreUtils.decimalToDouble(list.get(list.size() - 1).getValue().getLowValue());
    if (minInd == maxInd) {
        lowValue = min;
    } else if (minInd < maxInd) {
        // left border is the min
        lowValue = (max - (max - min) * maxInd / (maxInd - minInd));
    } else {
        // right border is the min
        lowValue = (max - (max - min) * (rightBorderInd - maxInd) / (minInd - maxInd));
    }
    // get the highValue
    Collections.sort(list, new Comparator<Map.Entry<String, DecimalColumnStatsData>>() {

        @Override
        public int compare(Map.Entry<String, DecimalColumnStatsData> o1, Map.Entry<String, DecimalColumnStatsData> o2) {
            return o1.getValue().getHighValue().compareTo(o2.getValue().getHighValue());
        }
    });
    minInd = adjustedIndexMap.get(list.get(0).getKey());
    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
    double highValue = 0;
    min = MetaStoreUtils.decimalToDouble(list.get(0).getValue().getHighValue());
    max = MetaStoreUtils.decimalToDouble(list.get(list.size() - 1).getValue().getHighValue());
    if (minInd == maxInd) {
        highValue = min;
    } else if (minInd < maxInd) {
        // right border is the max
        highValue = (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
    } else {
        // left border is the max
        highValue = (min + (max - min) * minInd / (minInd - maxInd));
    }
    // get the #nulls
    long numNulls = 0;
    for (Map.Entry<String, DecimalColumnStatsData> entry : extractedAdjustedStatsMap.entrySet()) {
        numNulls += entry.getValue().getNumNulls();
    }
    // we scale up sumNulls based on the number of partitions
    numNulls = numNulls * numParts / numPartsWithStats;
    // get the ndv
    long ndv = 0;
    long ndvMin = 0;
    long ndvMax = 0;
    Collections.sort(list, new Comparator<Map.Entry<String, DecimalColumnStatsData>>() {

        @Override
        public int compare(Map.Entry<String, DecimalColumnStatsData> o1, Map.Entry<String, DecimalColumnStatsData> o2) {
            return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
        }
    });
    long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
    long higherBound = 0;
    for (Map.Entry<String, DecimalColumnStatsData> entry : list) {
        higherBound += entry.getValue().getNumDVs();
    }
    if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) {
        ndv = (long) ((highValue - lowValue) / densityAvg);
        if (ndv < lowerBound) {
            ndv = lowerBound;
        } else if (ndv > higherBound) {
            ndv = higherBound;
        }
    } else {
        minInd = adjustedIndexMap.get(list.get(0).getKey());
        maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
        ndvMin = list.get(0).getValue().getNumDVs();
        ndvMax = list.get(list.size() - 1).getValue().getNumDVs();
        if (minInd == maxInd) {
            ndv = ndvMin;
        } else if (minInd < maxInd) {
            // right border is the max
            ndv = (long) (ndvMin + (ndvMax - ndvMin) * (rightBorderInd - minInd) / (maxInd - minInd));
        } else {
            // left border is the max
            ndv = (long) (ndvMin + (ndvMax - ndvMin) * minInd / (minInd - maxInd));
        }
    }
    extrapolateDecimalData.setLowValue(StatObjectConverter.createThriftDecimal(String.valueOf(lowValue)));
    extrapolateDecimalData.setHighValue(StatObjectConverter.createThriftDecimal(String.valueOf(highValue)));
    extrapolateDecimalData.setNumNulls(numNulls);
    extrapolateDecimalData.setNumDVs(ndv);
    extrapolateData.setDecimalStats(extrapolateDecimalData);
}
Also used : HashMap(java.util.HashMap) LinkedList(java.util.LinkedList) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) HashMap(java.util.HashMap) Map(java.util.Map) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 5 with DecimalColumnStatsDataInspector

use of org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector in project hive by apache.

the class ColumnStatsMergerFactory method newColumnStaticsObj.

public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) {
    ColumnStatisticsObj cso = new ColumnStatisticsObj();
    ColumnStatisticsData csd = new ColumnStatisticsData();
    cso.setColName(colName);
    cso.setColType(colType);
    switch(type) {
        case BOOLEAN_STATS:
            csd.setBooleanStats(new BooleanColumnStatsData());
            break;
        case LONG_STATS:
            csd.setLongStats(new LongColumnStatsDataInspector());
            break;
        case DOUBLE_STATS:
            csd.setDoubleStats(new DoubleColumnStatsDataInspector());
            break;
        case STRING_STATS:
            csd.setStringStats(new StringColumnStatsDataInspector());
            break;
        case BINARY_STATS:
            csd.setBinaryStats(new BinaryColumnStatsData());
            break;
        case DECIMAL_STATS:
            csd.setDecimalStats(new DecimalColumnStatsDataInspector());
            break;
        case DATE_STATS:
            csd.setDateStats(new DateColumnStatsDataInspector());
            break;
        default:
            throw new IllegalArgumentException("Unknown stats type");
    }
    cso.setStatsData(csd);
    return cso;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Aggregations

DecimalColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector)11 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)8 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)8 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)8 DateColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector)8 DoubleColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector)8 LongColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)8 StringColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector)8 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)6 Date (org.apache.hadoop.hive.metastore.api.Date)4 Decimal (org.apache.hadoop.hive.metastore.api.Decimal)3 BigDecimal (java.math.BigDecimal)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 NumDistinctValueEstimator (org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator)2 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)2 LinkedList (java.util.LinkedList)1 Entry (java.util.Map.Entry)1 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)1 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)1