Search in sources :

Example 1 with TimestampColumnStatsData

use of org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData in project hive by apache.

the class StatObjectConverter method convertToMTableColumnStatistics.

// JDO
public static MTableColumnStatistics convertToMTableColumnStatistics(MTable table, ColumnStatisticsDesc statsDesc, ColumnStatisticsObj statsObj, String engine) throws NoSuchObjectException, MetaException, InvalidObjectException {
    if (statsObj == null || statsDesc == null) {
        throw new InvalidObjectException("Invalid column stats object");
    }
    MTableColumnStatistics mColStats = new MTableColumnStatistics();
    mColStats.setTable(table);
    mColStats.setDbName(statsDesc.getDbName());
    mColStats.setCatName(statsDesc.isSetCatName() ? statsDesc.getCatName() : DEFAULT_CATALOG_NAME);
    mColStats.setTableName(statsDesc.getTableName());
    mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed());
    mColStats.setColName(statsObj.getColName());
    mColStats.setColType(statsObj.getColType());
    if (statsObj.getStatsData().isSetBooleanStats()) {
        BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats();
        mColStats.setBooleanStats(boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null);
    } else if (statsObj.getStatsData().isSetLongStats()) {
        LongColumnStatsData longStats = statsObj.getStatsData().getLongStats();
        mColStats.setLongStats(longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetBitVectors() ? longStats.getBitVectors() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null);
    } else if (statsObj.getStatsData().isSetDoubleStats()) {
        DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
        mColStats.setDoubleStats(doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetBitVectors() ? doubleStats.getBitVectors() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null);
    } else if (statsObj.getStatsData().isSetDecimalStats()) {
        DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats();
        String low = decimalStats.isSetLowValue() ? DecimalUtils.createJdoDecimalString(decimalStats.getLowValue()) : null;
        String high = decimalStats.isSetHighValue() ? DecimalUtils.createJdoDecimalString(decimalStats.getHighValue()) : null;
        mColStats.setDecimalStats(decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, decimalStats.isSetBitVectors() ? decimalStats.getBitVectors() : null, low, high);
    } else if (statsObj.getStatsData().isSetStringStats()) {
        StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
        mColStats.setStringStats(stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetBitVectors() ? stringStats.getBitVectors() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null);
    } else if (statsObj.getStatsData().isSetBinaryStats()) {
        BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats();
        mColStats.setBinaryStats(binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null);
    } else if (statsObj.getStatsData().isSetDateStats()) {
        DateColumnStatsData dateStats = statsObj.getStatsData().getDateStats();
        mColStats.setDateStats(dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null);
    } else if (statsObj.getStatsData().isSetTimestampStats()) {
        TimestampColumnStatsData timestampStats = statsObj.getStatsData().getTimestampStats();
        mColStats.setTimestampStats(timestampStats.isSetNumNulls() ? timestampStats.getNumNulls() : null, timestampStats.isSetNumDVs() ? timestampStats.getNumDVs() : null, timestampStats.isSetBitVectors() ? timestampStats.getBitVectors() : null, timestampStats.isSetLowValue() ? timestampStats.getLowValue().getSecondsSinceEpoch() : null, timestampStats.isSetHighValue() ? timestampStats.getHighValue().getSecondsSinceEpoch() : null);
    }
    mColStats.setEngine(engine);
    return mColStats;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) MTableColumnStatistics(org.apache.hadoop.hive.metastore.model.MTableColumnStatistics) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) TimestampColumnStatsData(org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 2 with TimestampColumnStatsData

use of org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData in project hive by apache.

the class StatObjectConverter method setFieldsIntoOldStats.

/**
 * Set field values in oldStatObj from newStatObj
 * @param oldStatObj
 * @param newStatObj
 */
public static void setFieldsIntoOldStats(ColumnStatisticsObj oldStatObj, ColumnStatisticsObj newStatObj) {
    _Fields typeNew = newStatObj.getStatsData().getSetField();
    _Fields typeOld = oldStatObj.getStatsData().getSetField();
    typeNew = typeNew == typeOld ? typeNew : null;
    switch(typeNew) {
        case BOOLEAN_STATS:
            BooleanColumnStatsData oldBooleanStatsData = oldStatObj.getStatsData().getBooleanStats();
            BooleanColumnStatsData newBooleanStatsData = newStatObj.getStatsData().getBooleanStats();
            if (newBooleanStatsData.isSetNumTrues()) {
                oldBooleanStatsData.setNumTrues(newBooleanStatsData.getNumTrues());
            }
            if (newBooleanStatsData.isSetNumFalses()) {
                oldBooleanStatsData.setNumFalses(newBooleanStatsData.getNumFalses());
            }
            if (newBooleanStatsData.isSetNumNulls()) {
                oldBooleanStatsData.setNumNulls(newBooleanStatsData.getNumNulls());
            }
            if (newBooleanStatsData.isSetBitVectors()) {
                oldBooleanStatsData.setBitVectors(newBooleanStatsData.getBitVectors());
            }
            break;
        case LONG_STATS:
            {
                LongColumnStatsData oldLongStatsData = oldStatObj.getStatsData().getLongStats();
                LongColumnStatsData newLongStatsData = newStatObj.getStatsData().getLongStats();
                if (newLongStatsData.isSetHighValue()) {
                    oldLongStatsData.setHighValue(newLongStatsData.getHighValue());
                }
                if (newLongStatsData.isSetLowValue()) {
                    oldLongStatsData.setLowValue(newLongStatsData.getLowValue());
                }
                if (newLongStatsData.isSetNumNulls()) {
                    oldLongStatsData.setNumNulls(newLongStatsData.getNumNulls());
                }
                if (newLongStatsData.isSetNumDVs()) {
                    oldLongStatsData.setNumDVs(newLongStatsData.getNumDVs());
                }
                if (newLongStatsData.isSetBitVectors()) {
                    oldLongStatsData.setBitVectors(newLongStatsData.getBitVectors());
                }
                break;
            }
        case DOUBLE_STATS:
            {
                DoubleColumnStatsData oldDoubleStatsData = oldStatObj.getStatsData().getDoubleStats();
                DoubleColumnStatsData newDoubleStatsData = newStatObj.getStatsData().getDoubleStats();
                if (newDoubleStatsData.isSetHighValue()) {
                    oldDoubleStatsData.setHighValue(newDoubleStatsData.getHighValue());
                }
                if (newDoubleStatsData.isSetLowValue()) {
                    oldDoubleStatsData.setLowValue(newDoubleStatsData.getLowValue());
                }
                if (newDoubleStatsData.isSetNumNulls()) {
                    oldDoubleStatsData.setNumNulls(newDoubleStatsData.getNumNulls());
                }
                if (newDoubleStatsData.isSetNumDVs()) {
                    oldDoubleStatsData.setNumDVs(newDoubleStatsData.getNumDVs());
                }
                if (newDoubleStatsData.isSetBitVectors()) {
                    oldDoubleStatsData.setBitVectors(newDoubleStatsData.getBitVectors());
                }
                break;
            }
        case STRING_STATS:
            {
                StringColumnStatsData oldStringStatsData = oldStatObj.getStatsData().getStringStats();
                StringColumnStatsData newStringStatsData = newStatObj.getStatsData().getStringStats();
                if (newStringStatsData.isSetMaxColLen()) {
                    oldStringStatsData.setMaxColLen(newStringStatsData.getMaxColLen());
                }
                if (newStringStatsData.isSetAvgColLen()) {
                    oldStringStatsData.setAvgColLen(newStringStatsData.getAvgColLen());
                }
                if (newStringStatsData.isSetNumNulls()) {
                    oldStringStatsData.setNumNulls(newStringStatsData.getNumNulls());
                }
                if (newStringStatsData.isSetNumDVs()) {
                    oldStringStatsData.setNumDVs(newStringStatsData.getNumDVs());
                }
                if (newStringStatsData.isSetBitVectors()) {
                    oldStringStatsData.setBitVectors(newStringStatsData.getBitVectors());
                }
                break;
            }
        case BINARY_STATS:
            BinaryColumnStatsData oldBinaryStatsData = oldStatObj.getStatsData().getBinaryStats();
            BinaryColumnStatsData newBinaryStatsData = newStatObj.getStatsData().getBinaryStats();
            if (newBinaryStatsData.isSetMaxColLen()) {
                oldBinaryStatsData.setMaxColLen(newBinaryStatsData.getMaxColLen());
            }
            if (newBinaryStatsData.isSetAvgColLen()) {
                oldBinaryStatsData.setAvgColLen(newBinaryStatsData.getAvgColLen());
            }
            if (newBinaryStatsData.isSetNumNulls()) {
                oldBinaryStatsData.setNumNulls(newBinaryStatsData.getNumNulls());
            }
            if (newBinaryStatsData.isSetBitVectors()) {
                oldBinaryStatsData.setBitVectors(newBinaryStatsData.getBitVectors());
            }
            break;
        case DECIMAL_STATS:
            {
                DecimalColumnStatsData oldDecimalStatsData = oldStatObj.getStatsData().getDecimalStats();
                DecimalColumnStatsData newDecimalStatsData = newStatObj.getStatsData().getDecimalStats();
                if (newDecimalStatsData.isSetHighValue()) {
                    oldDecimalStatsData.setHighValue(newDecimalStatsData.getHighValue());
                }
                if (newDecimalStatsData.isSetLowValue()) {
                    oldDecimalStatsData.setLowValue(newDecimalStatsData.getLowValue());
                }
                if (newDecimalStatsData.isSetNumNulls()) {
                    oldDecimalStatsData.setNumNulls(newDecimalStatsData.getNumNulls());
                }
                if (newDecimalStatsData.isSetNumDVs()) {
                    oldDecimalStatsData.setNumDVs(newDecimalStatsData.getNumDVs());
                }
                if (newDecimalStatsData.isSetBitVectors()) {
                    oldDecimalStatsData.setBitVectors(newDecimalStatsData.getBitVectors());
                }
                break;
            }
        case DATE_STATS:
            {
                DateColumnStatsData oldDateStatsData = oldStatObj.getStatsData().getDateStats();
                DateColumnStatsData newDateStatsData = newStatObj.getStatsData().getDateStats();
                if (newDateStatsData.isSetHighValue()) {
                    oldDateStatsData.setHighValue(newDateStatsData.getHighValue());
                }
                if (newDateStatsData.isSetLowValue()) {
                    oldDateStatsData.setLowValue(newDateStatsData.getLowValue());
                }
                if (newDateStatsData.isSetNumNulls()) {
                    oldDateStatsData.setNumNulls(newDateStatsData.getNumNulls());
                }
                if (newDateStatsData.isSetNumDVs()) {
                    oldDateStatsData.setNumDVs(newDateStatsData.getNumDVs());
                }
                if (newDateStatsData.isSetBitVectors()) {
                    oldDateStatsData.setBitVectors(newDateStatsData.getBitVectors());
                }
                break;
            }
        case TIMESTAMP_STATS:
            {
                TimestampColumnStatsData oldTimestampStatsData = oldStatObj.getStatsData().getTimestampStats();
                TimestampColumnStatsData newTimestampStatsData = newStatObj.getStatsData().getTimestampStats();
                if (newTimestampStatsData.isSetHighValue()) {
                    oldTimestampStatsData.setHighValue(newTimestampStatsData.getHighValue());
                }
                if (newTimestampStatsData.isSetLowValue()) {
                    oldTimestampStatsData.setLowValue(newTimestampStatsData.getLowValue());
                }
                if (newTimestampStatsData.isSetNumNulls()) {
                    oldTimestampStatsData.setNumNulls(newTimestampStatsData.getNumNulls());
                }
                if (newTimestampStatsData.isSetNumDVs()) {
                    oldTimestampStatsData.setNumDVs(newTimestampStatsData.getNumDVs());
                }
                if (newTimestampStatsData.isSetBitVectors()) {
                    oldTimestampStatsData.setBitVectors(newTimestampStatsData.getBitVectors());
                }
                break;
            }
        default:
            throw new IllegalArgumentException("Unknown stats type: " + typeNew.toString());
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) ColumnStatisticsData._Fields(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) TimestampColumnStatsData(org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 3 with TimestampColumnStatsData

use of org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData in project hive by apache.

the class StatObjectConverter method convertToMPartitionColumnStatistics.

public static MPartitionColumnStatistics convertToMPartitionColumnStatistics(MPartition partition, ColumnStatisticsDesc statsDesc, ColumnStatisticsObj statsObj, String engine) throws MetaException, NoSuchObjectException {
    if (statsDesc == null || statsObj == null) {
        return null;
    }
    MPartitionColumnStatistics mColStats = new MPartitionColumnStatistics();
    mColStats.setPartition(partition);
    mColStats.setCatName(statsDesc.isSetCatName() ? statsDesc.getCatName() : DEFAULT_CATALOG_NAME);
    mColStats.setDbName(statsDesc.getDbName());
    mColStats.setTableName(statsDesc.getTableName());
    mColStats.setPartitionName(statsDesc.getPartName());
    mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed());
    mColStats.setColName(statsObj.getColName());
    mColStats.setColType(statsObj.getColType());
    if (statsObj.getStatsData().isSetBooleanStats()) {
        BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats();
        mColStats.setBooleanStats(boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null);
    } else if (statsObj.getStatsData().isSetLongStats()) {
        LongColumnStatsData longStats = statsObj.getStatsData().getLongStats();
        mColStats.setLongStats(longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetBitVectors() ? longStats.getBitVectors() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null);
    } else if (statsObj.getStatsData().isSetDoubleStats()) {
        DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
        mColStats.setDoubleStats(doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetBitVectors() ? doubleStats.getBitVectors() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null);
    } else if (statsObj.getStatsData().isSetDecimalStats()) {
        DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats();
        String low = decimalStats.isSetLowValue() ? DecimalUtils.createJdoDecimalString(decimalStats.getLowValue()) : null;
        String high = decimalStats.isSetHighValue() ? DecimalUtils.createJdoDecimalString(decimalStats.getHighValue()) : null;
        mColStats.setDecimalStats(decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, decimalStats.isSetBitVectors() ? decimalStats.getBitVectors() : null, low, high);
    } else if (statsObj.getStatsData().isSetStringStats()) {
        StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
        mColStats.setStringStats(stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetBitVectors() ? stringStats.getBitVectors() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null);
    } else if (statsObj.getStatsData().isSetBinaryStats()) {
        BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats();
        mColStats.setBinaryStats(binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null);
    } else if (statsObj.getStatsData().isSetDateStats()) {
        DateColumnStatsData dateStats = statsObj.getStatsData().getDateStats();
        mColStats.setDateStats(dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null);
    } else if (statsObj.getStatsData().isSetTimestampStats()) {
        TimestampColumnStatsData timestampStats = statsObj.getStatsData().getTimestampStats();
        mColStats.setTimestampStats(timestampStats.isSetNumNulls() ? timestampStats.getNumNulls() : null, timestampStats.isSetNumDVs() ? timestampStats.getNumDVs() : null, timestampStats.isSetBitVectors() ? timestampStats.getBitVectors() : null, timestampStats.isSetLowValue() ? timestampStats.getLowValue().getSecondsSinceEpoch() : null, timestampStats.isSetHighValue() ? timestampStats.getHighValue().getSecondsSinceEpoch() : null);
    }
    mColStats.setEngine(engine);
    return mColStats;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) TimestampColumnStatsData(org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 4 with TimestampColumnStatsData

use of org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData in project hive by apache.

the class TimestampColumnStatsAggregator method extrapolate.

@Override
public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, int numPartsWithStats, Map<String, Double> adjustedIndexMap, Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
    int rightBorderInd = numParts;
    TimestampColumnStatsDataInspector extrapolateTimestampData = new TimestampColumnStatsDataInspector();
    Map<String, TimestampColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
    for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
        extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getTimestampStats());
    }
    List<Map.Entry<String, TimestampColumnStatsData>> list = new LinkedList<>(extractedAdjustedStatsMap.entrySet());
    // get the lowValue
    Collections.sort(list, new Comparator<Map.Entry<String, TimestampColumnStatsData>>() {

        @Override
        public int compare(Map.Entry<String, TimestampColumnStatsData> o1, Map.Entry<String, TimestampColumnStatsData> o2) {
            return o1.getValue().getLowValue().compareTo(o2.getValue().getLowValue());
        }
    });
    double minInd = adjustedIndexMap.get(list.get(0).getKey());
    double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
    long lowValue = 0;
    long min = list.get(0).getValue().getLowValue().getSecondsSinceEpoch();
    long max = list.get(list.size() - 1).getValue().getLowValue().getSecondsSinceEpoch();
    if (minInd == maxInd) {
        lowValue = min;
    } else if (minInd < maxInd) {
        // left border is the min
        lowValue = (long) (max - (max - min) * maxInd / (maxInd - minInd));
    } else {
        // right border is the min
        lowValue = (long) (max - (max - min) * (rightBorderInd - maxInd) / (minInd - maxInd));
    }
    // get the highValue
    Collections.sort(list, new Comparator<Map.Entry<String, TimestampColumnStatsData>>() {

        @Override
        public int compare(Map.Entry<String, TimestampColumnStatsData> o1, Map.Entry<String, TimestampColumnStatsData> o2) {
            return o1.getValue().getHighValue().compareTo(o2.getValue().getHighValue());
        }
    });
    minInd = adjustedIndexMap.get(list.get(0).getKey());
    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
    long highValue = 0;
    min = list.get(0).getValue().getHighValue().getSecondsSinceEpoch();
    max = list.get(list.size() - 1).getValue().getHighValue().getSecondsSinceEpoch();
    if (minInd == maxInd) {
        highValue = min;
    } else if (minInd < maxInd) {
        // right border is the max
        highValue = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
    } else {
        // left border is the max
        highValue = (long) (min + (max - min) * minInd / (minInd - maxInd));
    }
    // get the #nulls
    long numNulls = 0;
    for (Map.Entry<String, TimestampColumnStatsData> entry : extractedAdjustedStatsMap.entrySet()) {
        numNulls += entry.getValue().getNumNulls();
    }
    // we scale up sumNulls based on the number of partitions
    numNulls = numNulls * numParts / numPartsWithStats;
    // get the ndv
    long ndv = 0;
    Collections.sort(list, new Comparator<Map.Entry<String, TimestampColumnStatsData>>() {

        @Override
        public int compare(Map.Entry<String, TimestampColumnStatsData> o1, Map.Entry<String, TimestampColumnStatsData> o2) {
            return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
        }
    });
    long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
    long higherBound = 0;
    for (Map.Entry<String, TimestampColumnStatsData> entry : list) {
        higherBound += entry.getValue().getNumDVs();
    }
    if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) {
        ndv = (long) ((highValue - lowValue) / densityAvg);
        if (ndv < lowerBound) {
            ndv = lowerBound;
        } else if (ndv > higherBound) {
            ndv = higherBound;
        }
    } else {
        minInd = adjustedIndexMap.get(list.get(0).getKey());
        maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
        min = list.get(0).getValue().getNumDVs();
        max = list.get(list.size() - 1).getValue().getNumDVs();
        if (minInd == maxInd) {
            ndv = min;
        } else if (minInd < maxInd) {
            // right border is the max
            ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
        } else {
            // left border is the max
            ndv = (long) (min + (max - min) * minInd / (minInd - maxInd));
        }
    }
    extrapolateTimestampData.setLowValue(new Timestamp(lowValue));
    extrapolateTimestampData.setHighValue(new Timestamp(highValue));
    extrapolateTimestampData.setNumNulls(numNulls);
    extrapolateTimestampData.setNumDVs(ndv);
    extrapolateData.setTimestampStats(extrapolateTimestampData);
}
Also used : HashMap(java.util.HashMap) TimestampColumnStatsData(org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData) Timestamp(org.apache.hadoop.hive.metastore.api.Timestamp) LinkedList(java.util.LinkedList) TimestampColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector) HashMap(java.util.HashMap) Map(java.util.Map) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 5 with TimestampColumnStatsData

use of org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData in project hive by apache.

the class ShowUtils method extractColumnValues.

public static String[] extractColumnValues(FieldSchema column, boolean isColumnStatsAvailable, ColumnStatisticsObj columnStatisticsObj) {
    List<String> values = new ArrayList<>();
    values.add(column.getName());
    values.add(column.getType());
    if (isColumnStatsAvailable) {
        if (columnStatisticsObj != null) {
            ColumnStatisticsData statsData = columnStatisticsObj.getStatsData();
            if (statsData.isSetBinaryStats()) {
                BinaryColumnStatsData binaryStats = statsData.getBinaryStats();
                values.addAll(Lists.newArrayList("", "", "" + binaryStats.getNumNulls(), "", "" + binaryStats.getAvgColLen(), "" + binaryStats.getMaxColLen(), "", "", convertToString(binaryStats.getBitVectors())));
            } else if (statsData.isSetStringStats()) {
                StringColumnStatsData stringStats = statsData.getStringStats();
                values.addAll(Lists.newArrayList("", "", "" + stringStats.getNumNulls(), "" + stringStats.getNumDVs(), "" + stringStats.getAvgColLen(), "" + stringStats.getMaxColLen(), "", "", convertToString(stringStats.getBitVectors())));
            } else if (statsData.isSetBooleanStats()) {
                BooleanColumnStatsData booleanStats = statsData.getBooleanStats();
                values.addAll(Lists.newArrayList("", "", "" + booleanStats.getNumNulls(), "", "", "", "" + booleanStats.getNumTrues(), "" + booleanStats.getNumFalses(), convertToString(booleanStats.getBitVectors())));
            } else if (statsData.isSetDecimalStats()) {
                DecimalColumnStatsData decimalStats = statsData.getDecimalStats();
                values.addAll(Lists.newArrayList(convertToString(decimalStats.getLowValue()), convertToString(decimalStats.getHighValue()), "" + decimalStats.getNumNulls(), "" + decimalStats.getNumDVs(), "", "", "", "", convertToString(decimalStats.getBitVectors())));
            } else if (statsData.isSetDoubleStats()) {
                DoubleColumnStatsData doubleStats = statsData.getDoubleStats();
                values.addAll(Lists.newArrayList("" + doubleStats.getLowValue(), "" + doubleStats.getHighValue(), "" + doubleStats.getNumNulls(), "" + doubleStats.getNumDVs(), "", "", "", "", convertToString(doubleStats.getBitVectors())));
            } else if (statsData.isSetLongStats()) {
                LongColumnStatsData longStats = statsData.getLongStats();
                values.addAll(Lists.newArrayList("" + longStats.getLowValue(), "" + longStats.getHighValue(), "" + longStats.getNumNulls(), "" + longStats.getNumDVs(), "", "", "", "", convertToString(longStats.getBitVectors())));
            } else if (statsData.isSetDateStats()) {
                DateColumnStatsData dateStats = statsData.getDateStats();
                values.addAll(Lists.newArrayList(convertToString(dateStats.getLowValue()), convertToString(dateStats.getHighValue()), "" + dateStats.getNumNulls(), "" + dateStats.getNumDVs(), "", "", "", "", convertToString(dateStats.getBitVectors())));
            } else if (statsData.isSetTimestampStats()) {
                TimestampColumnStatsData timestampStats = statsData.getTimestampStats();
                values.addAll(Lists.newArrayList(convertToString(timestampStats.getLowValue()), convertToString(timestampStats.getHighValue()), "" + timestampStats.getNumNulls(), "" + timestampStats.getNumDVs(), "", "", "", "", convertToString(timestampStats.getBitVectors())));
            }
        } else {
            values.addAll(Lists.newArrayList("", "", "", "", "", "", "", "", ""));
        }
    }
    values.add(column.getComment() != null ? column.getComment() : "");
    return values.toArray(new String[0]);
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) ArrayList(java.util.ArrayList) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) TimestampColumnStatsData(org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Aggregations

TimestampColumnStatsData (org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData)6 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)4 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)4 DateColumnStatsData (org.apache.hadoop.hive.metastore.api.DateColumnStatsData)4 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)4 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)4 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)4 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)4 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)3 HashMap (java.util.HashMap)2 TimestampColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector)2 ArrayList (java.util.ArrayList)1 LinkedList (java.util.LinkedList)1 Map (java.util.Map)1 NumDistinctValueEstimator (org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator)1 ColumnStatisticsData._Fields (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields)1 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)1 InvalidObjectException (org.apache.hadoop.hive.metastore.api.InvalidObjectException)1 Timestamp (org.apache.hadoop.hive.metastore.api.Timestamp)1 TimestampColumnStatsMerger (org.apache.hadoop.hive.metastore.columnstats.merge.TimestampColumnStatsMerger)1