Search in sources :

Example 1 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.

the class StatObjectConverter method getTableColumnStatisticsObj.

public static ColumnStatisticsObj getTableColumnStatisticsObj(MTableColumnStatistics mStatsObj) {
    ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
    statsObj.setColType(mStatsObj.getColType());
    statsObj.setColName(mStatsObj.getColName());
    String colType = mStatsObj.getColType().toLowerCase();
    ColumnStatisticsData colStatsData = new ColumnStatisticsData();
    if (colType.equals("boolean")) {
        BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
        boolStats.setNumFalses(mStatsObj.getNumFalses());
        boolStats.setNumTrues(mStatsObj.getNumTrues());
        boolStats.setNumNulls(mStatsObj.getNumNulls());
        colStatsData.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
        StringColumnStatsData stringStats = new StringColumnStatsData();
        stringStats.setNumNulls(mStatsObj.getNumNulls());
        stringStats.setAvgColLen(mStatsObj.getAvgColLen());
        stringStats.setMaxColLen(mStatsObj.getMaxColLen());
        stringStats.setNumDVs(mStatsObj.getNumDVs());
        colStatsData.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNulls(mStatsObj.getNumNulls());
        binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
        binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
        colStatsData.setBinaryStats(binaryStats);
    } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") || colType.equals("tinyint") || colType.equals("timestamp")) {
        LongColumnStatsData longStats = new LongColumnStatsData();
        longStats.setNumNulls(mStatsObj.getNumNulls());
        Long longHighValue = mStatsObj.getLongHighValue();
        if (longHighValue != null) {
            longStats.setHighValue(longHighValue);
        }
        Long longLowValue = mStatsObj.getLongLowValue();
        if (longLowValue != null) {
            longStats.setLowValue(longLowValue);
        }
        longStats.setNumDVs(mStatsObj.getNumDVs());
        colStatsData.setLongStats(longStats);
    } else if (colType.equals("double") || colType.equals("float")) {
        DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
        doubleStats.setNumNulls(mStatsObj.getNumNulls());
        Double doubleHighValue = mStatsObj.getDoubleHighValue();
        if (doubleHighValue != null) {
            doubleStats.setHighValue(doubleHighValue);
        }
        Double doubleLowValue = mStatsObj.getDoubleLowValue();
        if (doubleLowValue != null) {
            doubleStats.setLowValue(doubleLowValue);
        }
        doubleStats.setNumDVs(mStatsObj.getNumDVs());
        colStatsData.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
        DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
        decimalStats.setNumNulls(mStatsObj.getNumNulls());
        String decimalHighValue = mStatsObj.getDecimalHighValue();
        if (decimalHighValue != null) {
            decimalStats.setHighValue(createThriftDecimal(decimalHighValue));
        }
        String decimalLowValue = mStatsObj.getDecimalLowValue();
        if (decimalLowValue != null) {
            decimalStats.setLowValue(createThriftDecimal(decimalLowValue));
        }
        decimalStats.setNumDVs(mStatsObj.getNumDVs());
        colStatsData.setDecimalStats(decimalStats);
    } else if (colType.equals("date")) {
        DateColumnStatsData dateStats = new DateColumnStatsData();
        dateStats.setNumNulls(mStatsObj.getNumNulls());
        Long highValue = mStatsObj.getLongHighValue();
        if (highValue != null) {
            dateStats.setHighValue(new Date(highValue));
        }
        Long lowValue = mStatsObj.getLongLowValue();
        if (lowValue != null) {
            dateStats.setLowValue(new Date(lowValue));
        }
        dateStats.setNumDVs(mStatsObj.getNumDVs());
        colStatsData.setDateStats(dateStats);
    }
    statsObj.setStatsData(colStatsData);
    return statsObj;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Date(org.apache.hadoop.hive.metastore.api.Date) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 2 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.

the class StatObjectConverter method getPartitionColumnStatisticsObj.

public static ColumnStatisticsObj getPartitionColumnStatisticsObj(MPartitionColumnStatistics mStatsObj) {
    ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
    statsObj.setColType(mStatsObj.getColType());
    statsObj.setColName(mStatsObj.getColName());
    String colType = mStatsObj.getColType().toLowerCase();
    ColumnStatisticsData colStatsData = new ColumnStatisticsData();
    if (colType.equals("boolean")) {
        BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
        boolStats.setNumFalses(mStatsObj.getNumFalses());
        boolStats.setNumTrues(mStatsObj.getNumTrues());
        boolStats.setNumNulls(mStatsObj.getNumNulls());
        colStatsData.setBooleanStats(boolStats);
    } else if (colType.equals("string") || colType.startsWith("varchar") || colType.startsWith("char")) {
        StringColumnStatsData stringStats = new StringColumnStatsData();
        stringStats.setNumNulls(mStatsObj.getNumNulls());
        stringStats.setAvgColLen(mStatsObj.getAvgColLen());
        stringStats.setMaxColLen(mStatsObj.getMaxColLen());
        stringStats.setNumDVs(mStatsObj.getNumDVs());
        colStatsData.setStringStats(stringStats);
    } else if (colType.equals("binary")) {
        BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
        binaryStats.setNumNulls(mStatsObj.getNumNulls());
        binaryStats.setAvgColLen(mStatsObj.getAvgColLen());
        binaryStats.setMaxColLen(mStatsObj.getMaxColLen());
        colStatsData.setBinaryStats(binaryStats);
    } else if (colType.equals("tinyint") || colType.equals("smallint") || colType.equals("int") || colType.equals("bigint") || colType.equals("timestamp")) {
        LongColumnStatsData longStats = new LongColumnStatsData();
        longStats.setNumNulls(mStatsObj.getNumNulls());
        if (mStatsObj.getLongHighValue() != null) {
            longStats.setHighValue(mStatsObj.getLongHighValue());
        }
        if (mStatsObj.getLongLowValue() != null) {
            longStats.setLowValue(mStatsObj.getLongLowValue());
        }
        longStats.setNumDVs(mStatsObj.getNumDVs());
        colStatsData.setLongStats(longStats);
    } else if (colType.equals("double") || colType.equals("float")) {
        DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
        doubleStats.setNumNulls(mStatsObj.getNumNulls());
        if (mStatsObj.getDoubleHighValue() != null) {
            doubleStats.setHighValue(mStatsObj.getDoubleHighValue());
        }
        if (mStatsObj.getDoubleLowValue() != null) {
            doubleStats.setLowValue(mStatsObj.getDoubleLowValue());
        }
        doubleStats.setNumDVs(mStatsObj.getNumDVs());
        colStatsData.setDoubleStats(doubleStats);
    } else if (colType.startsWith("decimal")) {
        DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
        decimalStats.setNumNulls(mStatsObj.getNumNulls());
        if (mStatsObj.getDecimalHighValue() != null) {
            decimalStats.setHighValue(createThriftDecimal(mStatsObj.getDecimalHighValue()));
        }
        if (mStatsObj.getDecimalLowValue() != null) {
            decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue()));
        }
        decimalStats.setNumDVs(mStatsObj.getNumDVs());
        colStatsData.setDecimalStats(decimalStats);
    } else if (colType.equals("date")) {
        DateColumnStatsData dateStats = new DateColumnStatsData();
        dateStats.setNumNulls(mStatsObj.getNumNulls());
        dateStats.setHighValue(new Date(mStatsObj.getLongHighValue()));
        dateStats.setLowValue(new Date(mStatsObj.getLongLowValue()));
        dateStats.setNumDVs(mStatsObj.getNumDVs());
        colStatsData.setDateStats(dateStats);
    }
    statsObj.setStatsData(colStatsData);
    return statsObj;
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) DateColumnStatsData(org.apache.hadoop.hive.metastore.api.DateColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) Date(org.apache.hadoop.hive.metastore.api.Date)

Example 3 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.

the class HBaseReadWrite method printOneTable.

private String printOneTable(Result result) throws IOException, TException {
    byte[] key = result.getRow();
    HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializeTable(key, result.getValue(CATALOG_CF, CATALOG_COL));
    StringBuilder builder = new StringBuilder();
    builder.append(dumpThriftObject(sdParts.containingTable)).append(" sdHash: ").append(Base64.encodeBase64URLSafeString(sdParts.sdHash)).append(" stats:");
    NavigableMap<byte[], byte[]> statsCols = result.getFamilyMap(STATS_CF);
    for (Map.Entry<byte[], byte[]> statsCol : statsCols.entrySet()) {
        builder.append(" column ").append(new String(statsCol.getKey(), HBaseUtils.ENCODING)).append(": ");
        ColumnStatistics pcs = buildColStats(key, true);
        ColumnStatisticsObj cso = HBaseUtils.deserializeStatsForOneColumn(pcs, statsCol.getValue());
        builder.append(dumpThriftObject(cso));
    }
    // Add the primary key
    List<SQLPrimaryKey> pk = getPrimaryKey(sdParts.containingTable.getDbName(), sdParts.containingTable.getTableName());
    if (pk != null && pk.size() > 0) {
        builder.append(" primary key: ");
        for (SQLPrimaryKey pkcol : pk) builder.append(dumpThriftObject(pkcol));
    }
    // Add any foreign keys
    List<SQLForeignKey> fks = getForeignKeys(sdParts.containingTable.getDbName(), sdParts.containingTable.getTableName());
    if (fks != null && fks.size() > 0) {
        builder.append(" foreign keys: ");
        for (SQLForeignKey fkcol : fks) builder.append(dumpThriftObject(fkcol));
    }
    return builder.toString();
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) SQLPrimaryKey(org.apache.hadoop.hive.metastore.api.SQLPrimaryKey) SQLForeignKey(org.apache.hadoop.hive.metastore.api.SQLForeignKey) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Map(java.util.Map) NavigableMap(java.util.NavigableMap) HashMap(java.util.HashMap)

Example 4 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.

the class HiveAlterHandler method updatePartColumnStatsForAlterColumns.

private void updatePartColumnStatsForAlterColumns(RawStore msdb, Partition oldPartition, String oldPartName, List<String> partVals, List<FieldSchema> oldCols, Partition newPart) throws MetaException, InvalidObjectException {
    String dbName = oldPartition.getDbName();
    String tableName = oldPartition.getTableName();
    try {
        List<String> oldPartNames = Lists.newArrayList(oldPartName);
        List<String> oldColNames = new ArrayList<String>(oldCols.size());
        for (FieldSchema oldCol : oldCols) {
            oldColNames.add(oldCol.getName());
        }
        List<FieldSchema> newCols = newPart.getSd().getCols();
        List<ColumnStatistics> partsColStats = msdb.getPartitionColumnStatistics(dbName, tableName, oldPartNames, oldColNames);
        assert (partsColStats.size() <= 1);
        for (ColumnStatistics partColStats : partsColStats) {
            //actually only at most one loop
            List<ColumnStatisticsObj> statsObjs = partColStats.getStatsObj();
            List<String> deletedCols = new ArrayList<String>();
            for (ColumnStatisticsObj statsObj : statsObjs) {
                boolean found = false;
                for (FieldSchema newCol : newCols) {
                    if (statsObj.getColName().equalsIgnoreCase(newCol.getName()) && statsObj.getColType().equalsIgnoreCase(newCol.getType())) {
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    msdb.deletePartitionColumnStatistics(dbName, tableName, oldPartName, partVals, statsObj.getColName());
                    deletedCols.add(statsObj.getColName());
                }
            }
            StatsSetupConst.removeColumnStatsState(newPart.getParameters(), deletedCols);
        }
    } catch (NoSuchObjectException nsoe) {
        LOG.debug("Could not find db entry." + nsoe);
    //ignore
    } catch (InvalidInputException iie) {
        throw new InvalidObjectException("Invalid input to update partition column stats in alter table change columns" + iie);
    }
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException)

Example 5 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.

the class MetaStoreDirectSql method aggrColStatsForPartitions.

public AggrStats aggrColStatsForPartitions(String dbName, String tableName, List<String> partNames, List<String> colNames, boolean useDensityFunctionForNDVEstimation) throws MetaException {
    if (colNames.isEmpty() || partNames.isEmpty()) {
        LOG.debug("Columns is empty or partNames is empty : Short-circuiting stats eval");
        // Nothing to aggregate
        return new AggrStats(new ArrayList<ColumnStatisticsObj>(), 0);
    }
    long partsFound = 0;
    List<ColumnStatisticsObj> colStatsList;
    // Try to read from the cache first
    if (isAggregateStatsCacheEnabled && (partNames.size() < aggrStatsCache.getMaxPartsPerCacheNode())) {
        AggrColStats colStatsAggrCached;
        List<ColumnStatisticsObj> colStatsAggrFromDB;
        int maxPartsPerCacheNode = aggrStatsCache.getMaxPartsPerCacheNode();
        float fpp = aggrStatsCache.getFalsePositiveProbability();
        colStatsList = new ArrayList<ColumnStatisticsObj>();
        // Bloom filter for the new node that we will eventually add to the cache
        BloomFilter bloomFilter = createPartsBloomFilter(maxPartsPerCacheNode, fpp, partNames);
        boolean computePartsFound = true;
        for (String colName : colNames) {
            // Check the cache first
            colStatsAggrCached = aggrStatsCache.get(dbName, tableName, colName, partNames);
            if (colStatsAggrCached != null) {
                colStatsList.add(colStatsAggrCached.getColStats());
                partsFound = colStatsAggrCached.getNumPartsCached();
            } else {
                if (computePartsFound) {
                    partsFound = partsFoundForPartitions(dbName, tableName, partNames, colNames);
                    computePartsFound = false;
                }
                List<String> colNamesForDB = new ArrayList<String>();
                colNamesForDB.add(colName);
                // Read aggregated stats for one column
                colStatsAggrFromDB = columnStatisticsObjForPartitions(dbName, tableName, partNames, colNamesForDB, partsFound, useDensityFunctionForNDVEstimation);
                if (!colStatsAggrFromDB.isEmpty()) {
                    ColumnStatisticsObj colStatsAggr = colStatsAggrFromDB.get(0);
                    colStatsList.add(colStatsAggr);
                    // Update the cache to add this new aggregate node
                    aggrStatsCache.add(dbName, tableName, colName, partsFound, colStatsAggr, bloomFilter);
                }
            }
        }
    } else {
        partsFound = partsFoundForPartitions(dbName, tableName, partNames, colNames);
        colStatsList = columnStatisticsObjForPartitions(dbName, tableName, partNames, colNames, partsFound, useDensityFunctionForNDVEstimation);
    }
    LOG.info("useDensityFunctionForNDVEstimation = " + useDensityFunctionForNDVEstimation + "\npartsFound = " + partsFound + "\nColumnStatisticsObj = " + Arrays.toString(colStatsList.toArray()));
    return new AggrStats(colStatsList, partsFound);
}
Also used : AggrStats(org.apache.hadoop.hive.metastore.api.AggrStats) ArrayList(java.util.ArrayList) AggrColStats(org.apache.hadoop.hive.metastore.AggregateStatsCache.AggrColStats) MConstraint(org.apache.hadoop.hive.metastore.model.MConstraint) BloomFilter(org.apache.hive.common.util.BloomFilter) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)

Aggregations

ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)219 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)104 ArrayList (java.util.ArrayList)98 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)82 Test (org.junit.Test)79 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)68 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)43 Table (org.apache.hadoop.hive.metastore.api.Table)43 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)35 Partition (org.apache.hadoop.hive.metastore.api.Partition)35 List (java.util.List)34 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)30 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)29 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)29 HashMap (java.util.HashMap)28 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)28 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)27 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)25 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)23 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)22