Search in sources :

Example 36 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project hive by apache.

the class ColumnStatsAggregatorFactory method newColumnStaticsObj.

public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) {
    ColumnStatisticsObj cso = new ColumnStatisticsObj();
    ColumnStatisticsData csd = new ColumnStatisticsData();
    cso.setColName(colName);
    cso.setColType(colType);
    switch(type) {
        case BOOLEAN_STATS:
            csd.setBooleanStats(new BooleanColumnStatsData());
            break;
        case LONG_STATS:
            csd.setLongStats(new LongColumnStatsDataInspector());
            break;
        case DATE_STATS:
            csd.setDateStats(new DateColumnStatsDataInspector());
            break;
        case TIMESTAMP_STATS:
            csd.setTimestampStats(new TimestampColumnStatsDataInspector());
            break;
        case DOUBLE_STATS:
            csd.setDoubleStats(new DoubleColumnStatsDataInspector());
            break;
        case STRING_STATS:
            csd.setStringStats(new StringColumnStatsDataInspector());
            break;
        case BINARY_STATS:
            csd.setBinaryStats(new BinaryColumnStatsData());
            break;
        case DECIMAL_STATS:
            csd.setDecimalStats(new DecimalColumnStatsDataInspector());
            break;
        default:
            throw new RuntimeException("Woh, bad.  Unknown stats type!");
    }
    cso.setStatsData(csd);
    return cso;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) TimestampColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 37 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project hive by apache.

the class BinaryColumnStatsAggregator method aggregate.

@Override
public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWithSourceInfo, List<String> partNames, boolean areAllPartsFound) throws MetaException {
    ColumnStatisticsObj statsObj = null;
    String colType = null;
    String colName = null;
    BinaryColumnStatsData aggregateData = null;
    for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
        ColumnStatisticsObj cso = csp.getColStatsObj();
        if (statsObj == null) {
            colName = cso.getColName();
            colType = cso.getColType();
            statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField());
        }
        BinaryColumnStatsData newData = cso.getStatsData().getBinaryStats();
        if (aggregateData == null) {
            aggregateData = newData.deepCopy();
        } else {
            aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
            aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
            aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
        }
    }
    ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
    columnStatisticsData.setBinaryStats(aggregateData);
    statsObj.setStatsData(columnStatisticsData);
    return statsObj;
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColStatsObjWithSourceInfo(org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 38 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project hive by apache.

the class BinaryColumnStatsMerger method merge.

@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
    LOG.debug("Merging statistics: [aggregateColStats:{}, newColStats: {}]", aggregateColStats, newColStats);
    BinaryColumnStatsData aggregateData = aggregateColStats.getStatsData().getBinaryStats();
    BinaryColumnStatsData newData = newColStats.getStatsData().getBinaryStats();
    aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
    aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
    aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
}
Also used : BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 39 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project hive by apache.

the class StatObjectConverter method fillColumnStatisticsData.

// JAVA
public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, Object nulls, Object dist, Object bitVector, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException {
    colType = colType.toLowerCase();
    if (colType.equals("boolean")) {
        BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
        boolStats.setNumFalses(MetastoreDirectSqlUtils.extractSqlLong(falses));
        boolStats.setNumTrues(MetastoreDirectSqlUtils.extractSqlLong(trues));
        boolStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        data.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
        stringStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        stringStats.setAvgColLen(MetastoreDirectSqlUtils.extractSqlDouble(avglen));
        stringStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen));
        stringStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        stringStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        binaryStats.setAvgColLen(MetastoreDirectSqlUtils.extractSqlDouble(avglen));
        binaryStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen));
        data.setBinaryStats(binaryStats);
    } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint")) {
        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
        longStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (lhigh != null) {
            longStats.setHighValue(MetastoreDirectSqlUtils.extractSqlLong(lhigh));
        }
        if (llow != null) {
            longStats.setLowValue(MetastoreDirectSqlUtils.extractSqlLong(llow));
        }
        longStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        longStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setLongStats(longStats);
    } else if (colType.equals("double") || colType.equals("float")) {
        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
        doubleStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (dhigh != null) {
            doubleStats.setHighValue(MetastoreDirectSqlUtils.extractSqlDouble(dhigh));
        }
        if (dlow != null) {
            doubleStats.setLowValue(MetastoreDirectSqlUtils.extractSqlDouble(dlow));
        }
        doubleStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        doubleStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
        decimalStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (dechigh != null) {
            decimalStats.setHighValue(DecimalUtils.createThriftDecimal((String) dechigh));
        }
        if (declow != null) {
            decimalStats.setLowValue(DecimalUtils.createThriftDecimal((String) declow));
        }
        decimalStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        decimalStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setDecimalStats(decimalStats);
    } else if (colType.equals("date")) {
        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
        dateStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (lhigh != null) {
            dateStats.setHighValue(new Date(MetastoreDirectSqlUtils.extractSqlLong(lhigh)));
        }
        if (llow != null) {
            dateStats.setLowValue(new Date(MetastoreDirectSqlUtils.extractSqlLong(llow)));
        }
        dateStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        dateStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setDateStats(dateStats);
    } else if (colType.equals("timestamp")) {
        TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector();
        timestampStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls));
        if (lhigh != null) {
            timestampStats.setHighValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(lhigh)));
        }
        if (llow != null) {
            timestampStats.setLowValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(llow)));
        }
        timestampStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist));
        timestampStats.setBitVectors(getBitVector(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)));
        data.setTimestampStats(timestampStats);
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) TimestampColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) Timestamp(org.apache.hadoop.hive.metastore.api.Timestamp) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Date(org.apache.hadoop.hive.metastore.api.Date)

Example 40 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project presto by prestodb.

the class ThriftMetastoreUtil method fromMetastoreApiColumnStatistics.

public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatisticsObj columnStatistics, OptionalLong rowCount) {
    if (columnStatistics.getStatsData().isSetLongStats()) {
        LongColumnStatsData longStatsData = columnStatistics.getStatsData().getLongStats();
        OptionalLong min = longStatsData.isSetLowValue() ? OptionalLong.of(longStatsData.getLowValue()) : OptionalLong.empty();
        OptionalLong max = longStatsData.isSetHighValue() ? OptionalLong.of(longStatsData.getHighValue()) : OptionalLong.empty();
        OptionalLong nullsCount = longStatsData.isSetNumNulls() ? fromMetastoreNullsCount(longStatsData.getNumNulls()) : OptionalLong.empty();
        OptionalLong distinctValuesCount = longStatsData.isSetNumDVs() ? OptionalLong.of(longStatsData.getNumDVs()) : OptionalLong.empty();
        return createIntegerColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount));
    }
    if (columnStatistics.getStatsData().isSetDoubleStats()) {
        DoubleColumnStatsData doubleStatsData = columnStatistics.getStatsData().getDoubleStats();
        OptionalDouble min = doubleStatsData.isSetLowValue() ? OptionalDouble.of(doubleStatsData.getLowValue()) : OptionalDouble.empty();
        OptionalDouble max = doubleStatsData.isSetHighValue() ? OptionalDouble.of(doubleStatsData.getHighValue()) : OptionalDouble.empty();
        OptionalLong nullsCount = doubleStatsData.isSetNumNulls() ? fromMetastoreNullsCount(doubleStatsData.getNumNulls()) : OptionalLong.empty();
        OptionalLong distinctValuesCount = doubleStatsData.isSetNumDVs() ? OptionalLong.of(doubleStatsData.getNumDVs()) : OptionalLong.empty();
        return createDoubleColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount));
    }
    if (columnStatistics.getStatsData().isSetDecimalStats()) {
        DecimalColumnStatsData decimalStatsData = columnStatistics.getStatsData().getDecimalStats();
        Optional<BigDecimal> min = decimalStatsData.isSetLowValue() ? fromMetastoreDecimal(decimalStatsData.getLowValue()) : Optional.empty();
        Optional<BigDecimal> max = decimalStatsData.isSetHighValue() ? fromMetastoreDecimal(decimalStatsData.getHighValue()) : Optional.empty();
        OptionalLong nullsCount = decimalStatsData.isSetNumNulls() ? fromMetastoreNullsCount(decimalStatsData.getNumNulls()) : OptionalLong.empty();
        OptionalLong distinctValuesCount = decimalStatsData.isSetNumDVs() ? OptionalLong.of(decimalStatsData.getNumDVs()) : OptionalLong.empty();
        return createDecimalColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount));
    }
    if (columnStatistics.getStatsData().isSetDateStats()) {
        DateColumnStatsData dateStatsData = columnStatistics.getStatsData().getDateStats();
        Optional<LocalDate> min = dateStatsData.isSetLowValue() ? fromMetastoreDate(dateStatsData.getLowValue()) : Optional.empty();
        Optional<LocalDate> max = dateStatsData.isSetHighValue() ? fromMetastoreDate(dateStatsData.getHighValue()) : Optional.empty();
        OptionalLong nullsCount = dateStatsData.isSetNumNulls() ? fromMetastoreNullsCount(dateStatsData.getNumNulls()) : OptionalLong.empty();
        OptionalLong distinctValuesCount = dateStatsData.isSetNumDVs() ? OptionalLong.of(dateStatsData.getNumDVs()) : OptionalLong.empty();
        return createDateColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount));
    }
    if (columnStatistics.getStatsData().isSetBooleanStats()) {
        BooleanColumnStatsData booleanStatsData = columnStatistics.getStatsData().getBooleanStats();
        return createBooleanColumnStatistics(booleanStatsData.isSetNumTrues() ? OptionalLong.of(booleanStatsData.getNumTrues()) : OptionalLong.empty(), booleanStatsData.isSetNumFalses() ? OptionalLong.of(booleanStatsData.getNumFalses()) : OptionalLong.empty(), booleanStatsData.isSetNumNulls() ? fromMetastoreNullsCount(booleanStatsData.getNumNulls()) : OptionalLong.empty());
    }
    if (columnStatistics.getStatsData().isSetStringStats()) {
        StringColumnStatsData stringStatsData = columnStatistics.getStatsData().getStringStats();
        OptionalLong maxColumnLength = stringStatsData.isSetMaxColLen() ? OptionalLong.of(stringStatsData.getMaxColLen()) : OptionalLong.empty();
        OptionalDouble averageColumnLength = stringStatsData.isSetAvgColLen() ? OptionalDouble.of(stringStatsData.getAvgColLen()) : OptionalDouble.empty();
        OptionalLong nullsCount = stringStatsData.isSetNumNulls() ? fromMetastoreNullsCount(stringStatsData.getNumNulls()) : OptionalLong.empty();
        OptionalLong distinctValuesCount = stringStatsData.isSetNumDVs() ? OptionalLong.of(stringStatsData.getNumDVs()) : OptionalLong.empty();
        return createStringColumnStatistics(maxColumnLength, getTotalSizeInBytes(averageColumnLength, rowCount, nullsCount), nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount));
    }
    if (columnStatistics.getStatsData().isSetBinaryStats()) {
        BinaryColumnStatsData binaryStatsData = columnStatistics.getStatsData().getBinaryStats();
        OptionalLong maxColumnLength = binaryStatsData.isSetMaxColLen() ? OptionalLong.of(binaryStatsData.getMaxColLen()) : OptionalLong.empty();
        OptionalDouble averageColumnLength = binaryStatsData.isSetAvgColLen() ? OptionalDouble.of(binaryStatsData.getAvgColLen()) : OptionalDouble.empty();
        OptionalLong nullsCount = binaryStatsData.isSetNumNulls() ? fromMetastoreNullsCount(binaryStatsData.getNumNulls()) : OptionalLong.empty();
        return createBinaryColumnStatistics(maxColumnLength, getTotalSizeInBytes(averageColumnLength, rowCount, nullsCount), nullsCount);
    } else {
        throw new PrestoException(HIVE_INVALID_METADATA, "Invalid column statistics data: " + columnStatistics);
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) PrestoException(com.facebook.presto.spi.PrestoException) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) LocalDate(java.time.LocalDate) OptionalDouble(java.util.OptionalDouble) BigDecimal(java.math.BigDecimal) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) OptionalLong(java.util.OptionalLong)

Aggregations

BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)43 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)31 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)26 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)22 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)22 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)22 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)22 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)22 DateColumnStatsData (org.apache.hadoop.hive.metastore.api.DateColumnStatsData)15 DateColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector)9 DecimalColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector)9 DoubleColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector)9 LongColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)9 StringColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector)9 TimestampColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector)8 Date (org.apache.hadoop.hive.metastore.api.Date)7 BigDecimal (java.math.BigDecimal)5 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)5 ArrayList (java.util.ArrayList)4 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)4