Search in sources :

Example 6 with Timestamp

use of org.apache.hadoop.hive.metastore.api.Timestamp in project hive by apache.

the class TimestampColumnStatsAggregator method extrapolate.

@Override
public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, int numPartsWithStats, Map<String, Double> adjustedIndexMap, Map<String, ColumnStatisticsData> adjustedStatsMap, double densityAvg) {
    int rightBorderInd = numParts;
    TimestampColumnStatsDataInspector extrapolateTimestampData = new TimestampColumnStatsDataInspector();
    Map<String, TimestampColumnStatsData> extractedAdjustedStatsMap = new HashMap<>();
    for (Map.Entry<String, ColumnStatisticsData> entry : adjustedStatsMap.entrySet()) {
        extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getTimestampStats());
    }
    List<Map.Entry<String, TimestampColumnStatsData>> list = new LinkedList<>(extractedAdjustedStatsMap.entrySet());
    // get the lowValue
    Collections.sort(list, new Comparator<Map.Entry<String, TimestampColumnStatsData>>() {

        @Override
        public int compare(Map.Entry<String, TimestampColumnStatsData> o1, Map.Entry<String, TimestampColumnStatsData> o2) {
            return o1.getValue().getLowValue().compareTo(o2.getValue().getLowValue());
        }
    });
    double minInd = adjustedIndexMap.get(list.get(0).getKey());
    double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
    long lowValue = 0;
    long min = list.get(0).getValue().getLowValue().getSecondsSinceEpoch();
    long max = list.get(list.size() - 1).getValue().getLowValue().getSecondsSinceEpoch();
    if (minInd == maxInd) {
        lowValue = min;
    } else if (minInd < maxInd) {
        // left border is the min
        lowValue = (long) (max - (max - min) * maxInd / (maxInd - minInd));
    } else {
        // right border is the min
        lowValue = (long) (max - (max - min) * (rightBorderInd - maxInd) / (minInd - maxInd));
    }
    // get the highValue
    Collections.sort(list, new Comparator<Map.Entry<String, TimestampColumnStatsData>>() {

        @Override
        public int compare(Map.Entry<String, TimestampColumnStatsData> o1, Map.Entry<String, TimestampColumnStatsData> o2) {
            return o1.getValue().getHighValue().compareTo(o2.getValue().getHighValue());
        }
    });
    minInd = adjustedIndexMap.get(list.get(0).getKey());
    maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
    long highValue = 0;
    min = list.get(0).getValue().getHighValue().getSecondsSinceEpoch();
    max = list.get(list.size() - 1).getValue().getHighValue().getSecondsSinceEpoch();
    if (minInd == maxInd) {
        highValue = min;
    } else if (minInd < maxInd) {
        // right border is the max
        highValue = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
    } else {
        // left border is the max
        highValue = (long) (min + (max - min) * minInd / (minInd - maxInd));
    }
    // get the #nulls
    long numNulls = 0;
    for (Map.Entry<String, TimestampColumnStatsData> entry : extractedAdjustedStatsMap.entrySet()) {
        numNulls += entry.getValue().getNumNulls();
    }
    // we scale up sumNulls based on the number of partitions
    numNulls = numNulls * numParts / numPartsWithStats;
    // get the ndv
    long ndv = 0;
    Collections.sort(list, new Comparator<Map.Entry<String, TimestampColumnStatsData>>() {

        @Override
        public int compare(Map.Entry<String, TimestampColumnStatsData> o1, Map.Entry<String, TimestampColumnStatsData> o2) {
            return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
        }
    });
    long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
    long higherBound = 0;
    for (Map.Entry<String, TimestampColumnStatsData> entry : list) {
        higherBound += entry.getValue().getNumDVs();
    }
    if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) {
        ndv = (long) ((highValue - lowValue) / densityAvg);
        if (ndv < lowerBound) {
            ndv = lowerBound;
        } else if (ndv > higherBound) {
            ndv = higherBound;
        }
    } else {
        minInd = adjustedIndexMap.get(list.get(0).getKey());
        maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey());
        min = list.get(0).getValue().getNumDVs();
        max = list.get(list.size() - 1).getValue().getNumDVs();
        if (minInd == maxInd) {
            ndv = min;
        } else if (minInd < maxInd) {
            // right border is the max
            ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd));
        } else {
            // left border is the max
            ndv = (long) (min + (max - min) * minInd / (minInd - maxInd));
        }
    }
    extrapolateTimestampData.setLowValue(new Timestamp(lowValue));
    extrapolateTimestampData.setHighValue(new Timestamp(highValue));
    extrapolateTimestampData.setNumNulls(numNulls);
    extrapolateTimestampData.setNumDVs(ndv);
    extrapolateData.setTimestampStats(extrapolateTimestampData);
}
Also used : HashMap(java.util.HashMap) TimestampColumnStatsData(org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData) Timestamp(org.apache.hadoop.hive.metastore.api.Timestamp) LinkedList(java.util.LinkedList) TimestampColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector) HashMap(java.util.HashMap) Map(java.util.Map) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 7 with Timestamp

use of org.apache.hadoop.hive.metastore.api.Timestamp in project hive by apache.

the class TimestampColumnStatsMerger method setLowValue.

public void setLowValue(TimestampColumnStatsDataInspector aggregateData, TimestampColumnStatsDataInspector newData) {
    final Timestamp aggregateLowValue = aggregateData.getLowValue();
    final Timestamp newLowValue = newData.getLowValue();
    final Timestamp mergedLowValue;
    if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) {
        return;
    } else if (aggregateData.isSetLowValue() && newData.isSetLowValue()) {
        mergedLowValue = ObjectUtils.min(newLowValue, aggregateLowValue);
    } else {
        mergedLowValue = MoreObjects.firstNonNull(aggregateLowValue, newLowValue);
    }
    aggregateData.setLowValue(mergedLowValue);
}
Also used : Timestamp(org.apache.hadoop.hive.metastore.api.Timestamp)

Example 8 with Timestamp

use of org.apache.hadoop.hive.metastore.api.Timestamp in project hive by apache.

the class StatObjectConverter method fillColumnStatisticsData.

// JAVA
public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, Object nulls, Object dist, Object bitVector, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException {
    colType = colType.toLowerCase();
    if (colType.equals("boolean")) {
        BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
        boolStats.setNumFalses(MetastoreDirectSqlUtils.extractSqlLong(falses));
        boolStats.setNumTrues(MetastoreDirectSqlUtils.extractSqlLong(trues));
        boolStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        data.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
        stringStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        stringStats.setAvgColLen(MetastoreDirectSqlUtils.extractSqlDouble(avglen));
        stringStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen));
        stringStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        stringStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        binaryStats.setAvgColLen(MetastoreDirectSqlUtils.extractSqlDouble(avglen));
        binaryStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen));
        data.setBinaryStats(binaryStats);
    } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint")) {
        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
        longStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (lhigh != null) {
            longStats.setHighValue(MetastoreDirectSqlUtils.extractSqlLong(lhigh));
        }
        if (llow != null) {
            longStats.setLowValue(MetastoreDirectSqlUtils.extractSqlLong(llow));
        }
        longStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        longStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setLongStats(longStats);
    } else if (colType.equals("double") || colType.equals("float")) {
        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
        doubleStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (dhigh != null) {
            doubleStats.setHighValue(MetastoreDirectSqlUtils.extractSqlDouble(dhigh));
        }
        if (dlow != null) {
            doubleStats.setLowValue(MetastoreDirectSqlUtils.extractSqlDouble(dlow));
        }
        doubleStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        doubleStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
        decimalStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (dechigh != null) {
            decimalStats.setHighValue(DecimalUtils.createThriftDecimal((String) dechigh));
        }
        if (declow != null) {
            decimalStats.setLowValue(DecimalUtils.createThriftDecimal((String) declow));
        }
        decimalStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        decimalStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setDecimalStats(decimalStats);
    } else if (colType.equals("date")) {
        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
        dateStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (lhigh != null) {
            dateStats.setHighValue(new Date(MetastoreDirectSqlUtils.extractSqlLong(lhigh)));
        }
        if (llow != null) {
            dateStats.setLowValue(new Date(MetastoreDirectSqlUtils.extractSqlLong(llow)));
        }
        dateStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        dateStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setDateStats(dateStats);
    } else if (colType.equals("timestamp")) {
        TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector();
        timestampStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (lhigh != null) {
            timestampStats.setHighValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(lhigh)));
        }
        if (llow != null) {
            timestampStats.setLowValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(llow)));
        }
        timestampStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        timestampStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setTimestampStats(timestampStats);
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) TimestampColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) Timestamp(org.apache.hadoop.hive.metastore.api.Timestamp) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Date(org.apache.hadoop.hive.metastore.api.Date)

Aggregations

Timestamp (org.apache.hadoop.hive.metastore.api.Timestamp)8 TimestampColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector)5 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)4 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)4 Date (org.apache.hadoop.hive.metastore.api.Date)4 DateColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector)4 DecimalColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector)4 DoubleColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector)4 LongColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)4 StringColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector)4 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)3 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)2 BigDecimal (java.math.BigDecimal)1 HashMap (java.util.HashMap)1 LinkedList (java.util.LinkedList)1 Map (java.util.Map)1 Decimal (org.apache.hadoop.hive.metastore.api.Decimal)1 TimestampColumnStatsData (org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData)1 TimestampWritableV2 (org.apache.hadoop.hive.serde2.io.TimestampWritableV2)1 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)1