Search in sources :

Example 26 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project hive by apache.

the class BinaryColumnStatsAggregator method aggregate.

@Override
public ColumnStatisticsObj aggregate(List<ColStatsObjWithSourceInfo> colStatsWithSourceInfo, List<String> partNames, boolean areAllPartsFound) throws MetaException {
    ColumnStatisticsObj statsObj = null;
    String colType = null;
    String colName = null;
    BinaryColumnStatsData aggregateData = null;
    for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) {
        ColumnStatisticsObj cso = csp.getColStatsObj();
        if (statsObj == null) {
            colName = cso.getColName();
            colType = cso.getColType();
            statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso.getStatsData().getSetField());
        }
        BinaryColumnStatsData newData = cso.getStatsData().getBinaryStats();
        if (aggregateData == null) {
            aggregateData = newData.deepCopy();
        } else {
            aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
            aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
            aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
        }
    }
    ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
    columnStatisticsData.setBinaryStats(aggregateData);
    statsObj.setStatsData(columnStatisticsData);
    return statsObj;
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColStatsObjWithSourceInfo(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 27 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project hive by apache.

the class ColumnStatsAggregatorFactory method newColumnStaticsObj.

public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) {
    ColumnStatisticsObj cso = new ColumnStatisticsObj();
    ColumnStatisticsData csd = new ColumnStatisticsData();
    cso.setColName(colName);
    cso.setColType(colType);
    switch(type) {
        case BOOLEAN_STATS:
            csd.setBooleanStats(new BooleanColumnStatsData());
            break;
        case LONG_STATS:
            csd.setLongStats(new LongColumnStatsDataInspector());
            break;
        case DATE_STATS:
            csd.setDateStats(new DateColumnStatsDataInspector());
            break;
        case DOUBLE_STATS:
            csd.setDoubleStats(new DoubleColumnStatsDataInspector());
            break;
        case STRING_STATS:
            csd.setStringStats(new StringColumnStatsDataInspector());
            break;
        case BINARY_STATS:
            csd.setBinaryStats(new BinaryColumnStatsData());
            break;
        case DECIMAL_STATS:
            csd.setDecimalStats(new DecimalColumnStatsDataInspector());
            break;
        default:
            throw new RuntimeException("Woh, bad.  Unknown stats type!");
    }
    cso.setStatsData(csd);
    return cso;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 28 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project hive by apache.

the class BinaryColumnStatsMerger method merge.

@Override
public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
    BinaryColumnStatsData aggregateData = aggregateColStats.getStatsData().getBinaryStats();
    BinaryColumnStatsData newData = newColStats.getStatsData().getBinaryStats();
    aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
    aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
    aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
}
Also used : BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 29 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project hive by apache.

the class StatObjectConverter method convertToMTableColumnStatistics.

// JDO
public static MTableColumnStatistics convertToMTableColumnStatistics(MTable table, ColumnStatisticsDesc statsDesc, ColumnStatisticsObj statsObj) throws NoSuchObjectException, MetaException, InvalidObjectException {
    if (statsObj == null || statsDesc == null) {
        throw new InvalidObjectException("Invalid column stats object");
    }
    MTableColumnStatistics mColStats = new MTableColumnStatistics();
    mColStats.setTable(table);
    mColStats.setDbName(statsDesc.getDbName());
    mColStats.setTableName(statsDesc.getTableName());
    mColStats.setLastAnalyzed(statsDesc.getLastAnalyzed());
    mColStats.setColName(statsObj.getColName());
    mColStats.setColType(statsObj.getColType());
    if (statsObj.getStatsData().isSetBooleanStats()) {
        BooleanColumnStatsData boolStats = statsObj.getStatsData().getBooleanStats();
        mColStats.setBooleanStats(boolStats.isSetNumTrues() ? boolStats.getNumTrues() : null, boolStats.isSetNumFalses() ? boolStats.getNumFalses() : null, boolStats.isSetNumNulls() ? boolStats.getNumNulls() : null);
    } else if (statsObj.getStatsData().isSetLongStats()) {
        LongColumnStatsData longStats = statsObj.getStatsData().getLongStats();
        mColStats.setLongStats(longStats.isSetNumNulls() ? longStats.getNumNulls() : null, longStats.isSetNumDVs() ? longStats.getNumDVs() : null, longStats.isSetBitVectors() ? longStats.getBitVectors() : null, longStats.isSetLowValue() ? longStats.getLowValue() : null, longStats.isSetHighValue() ? longStats.getHighValue() : null);
    } else if (statsObj.getStatsData().isSetDoubleStats()) {
        DoubleColumnStatsData doubleStats = statsObj.getStatsData().getDoubleStats();
        mColStats.setDoubleStats(doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetBitVectors() ? doubleStats.getBitVectors() : null, doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null);
    } else if (statsObj.getStatsData().isSetDecimalStats()) {
        DecimalColumnStatsData decimalStats = statsObj.getStatsData().getDecimalStats();
        String low = decimalStats.isSetLowValue() ? createJdoDecimalString(decimalStats.getLowValue()) : null;
        String high = decimalStats.isSetHighValue() ? createJdoDecimalString(decimalStats.getHighValue()) : null;
        mColStats.setDecimalStats(decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null, decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null, decimalStats.isSetBitVectors() ? decimalStats.getBitVectors() : null, low, high);
    } else if (statsObj.getStatsData().isSetStringStats()) {
        StringColumnStatsData stringStats = statsObj.getStatsData().getStringStats();
        mColStats.setStringStats(stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetBitVectors() ? stringStats.getBitVectors() : null, stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null);
    } else if (statsObj.getStatsData().isSetBinaryStats()) {
        BinaryColumnStatsData binaryStats = statsObj.getStatsData().getBinaryStats();
        mColStats.setBinaryStats(binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null, binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null);
    } else if (statsObj.getStatsData().isSetDateStats()) {
        DateColumnStatsData dateStats = statsObj.getStatsData().getDateStats();
        mColStats.setDateStats(dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null, dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null, dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null);
    }
    return mColStats;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) MTableColumnStatistics(org.apache.hadoop.hive.metastore.model.MTableColumnStatistics) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)

Example 30 with BinaryColumnStatsData

use of org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData in project hive by apache.

the class StatObjectConverter method fillColumnStatisticsData.

// JAVA
public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data, Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh, Object nulls, Object dist, Object bitVector, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException {
    colType = colType.toLowerCase();
    if (colType.equals("boolean")) {
        BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
        boolStats.setNumFalses(MetaStoreDirectSql.extractSqlLong(falses));
        boolStats.setNumTrues(MetaStoreDirectSql.extractSqlLong(trues));
        boolStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        data.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
        StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
        stringStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        stringStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen));
        stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen));
        stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
        stringStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
        data.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        binaryStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen));
        binaryStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen));
        data.setBinaryStats(binaryStats);
    } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint") || colType.equals("timestamp")) {
        LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
        longStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        if (lhigh != null) {
            longStats.setHighValue(MetaStoreDirectSql.extractSqlLong(lhigh));
        }
        if (llow != null) {
            longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow));
        }
        longStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
        longStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
        data.setLongStats(longStats);
    } else if (colType.equals("double") || colType.equals("float")) {
        DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
        doubleStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        if (dhigh != null) {
            doubleStats.setHighValue(MetaStoreDirectSql.extractSqlDouble(dhigh));
        }
        if (dlow != null) {
            doubleStats.setLowValue(MetaStoreDirectSql.extractSqlDouble(dlow));
        }
        doubleStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
        doubleStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
        data.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
        DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
        decimalStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        if (dechigh != null) {
            decimalStats.setHighValue(createThriftDecimal((String) dechigh));
        }
        if (declow != null) {
            decimalStats.setLowValue(createThriftDecimal((String) declow));
        }
        decimalStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
        decimalStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
        data.setDecimalStats(decimalStats);
    } else if (colType.equals("date")) {
        DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
        dateStats.setNumNulls(MetaStoreDirectSql.extractSqlLong(nulls));
        if (lhigh != null) {
            dateStats.setHighValue(new Date(MetaStoreDirectSql.extractSqlLong(lhigh)));
        }
        if (llow != null) {
            dateStats.setLowValue(new Date(MetaStoreDirectSql.extractSqlLong(llow)));
        }
        dateStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
        dateStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
        data.setDateStats(dateStats);
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DecimalColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector) DoubleColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector) LongColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector) DateColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector) StringColumnStatsDataInspector(org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Date(org.apache.hadoop.hive.metastore.api.Date)

Aggregations

BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)30 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)23 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)22 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)18 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)15 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)15 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)15 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)15 DateColumnStatsData (org.apache.hadoop.hive.metastore.api.DateColumnStatsData)10 DateColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector)8 DecimalColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector)8 DoubleColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector)8 LongColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector)8 StringColumnStatsDataInspector (org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector)8 Date (org.apache.hadoop.hive.metastore.api.Date)7 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)5 Decimal (org.apache.hadoop.hive.metastore.api.Decimal)5 BigDecimal (java.math.BigDecimal)4 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)4 ArrayList (java.util.ArrayList)3