Examples with ColumnStatistics - org.apache.hadoop.hive.metastore.api.ColumnStatistics

Example 71 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class SessionHiveMetaStoreClient method setPartitionColumnStatistics.

/** {@inheritDoc} */
@Override
public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException {
    if (request.getColStatsSize() == 1) {
        ColumnStatistics colStats = request.getColStatsIterator().next();
        ColumnStatisticsDesc desc = colStats.getStatsDesc();
        String dbName = desc.getDbName().toLowerCase();
        String tableName = desc.getTableName().toLowerCase();
        if (getTempTable(dbName, tableName) != null) {
            return updateTempTableColumnStats(dbName, tableName, colStats);
        }
    }
    return super.setPartitionColumnStatistics(request);
}

Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Example 72 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class HBaseReadWrite method getPartitionStatistics.

/**
   * Get statistics for a set of partitions
   *
   * @param dbName name of database table is in
   * @param tblName table partitions are in
   * @param partNames names of the partitions, used only to set values inside the return stats
   *          objects
   * @param partVals partition values for each partition, needed because this class doesn't know how
   *          to translate from partName to partVals
   * @param colNames column names to fetch stats for. These columns will be fetched for all
   *          requested partitions
   * @return list of ColumnStats, one for each partition for which we found at least one column's
   * stats.
   * @throws IOException
   */
List<ColumnStatistics> getPartitionStatistics(String dbName, String tblName, List<String> partNames, List<List<String>> partVals, List<String> colNames) throws IOException {
    List<ColumnStatistics> statsList = new ArrayList<>(partNames.size());
    Map<List<String>, String> valToPartMap = new HashMap<>(partNames.size());
    List<Get> gets = new ArrayList<>(partNames.size() * colNames.size());
    assert partNames.size() == partVals.size();
    byte[][] colNameBytes = new byte[colNames.size()][];
    for (int i = 0; i < colNames.size(); i++) {
        colNameBytes[i] = HBaseUtils.buildKey(colNames.get(i));
    }
    for (int i = 0; i < partNames.size(); i++) {
        valToPartMap.put(partVals.get(i), partNames.get(i));
        byte[] partKey = HBaseUtils.buildPartitionKey(dbName, tblName, HBaseUtils.getPartitionKeyTypes(getTable(dbName, tblName).getPartitionKeys()), partVals.get(i));
        Get get = new Get(partKey);
        for (byte[] colName : colNameBytes) {
            get.addColumn(STATS_CF, colName);
        }
        gets.add(get);
    }
    HTableInterface htab = conn.getHBaseTable(PART_TABLE);
    Result[] results = htab.get(gets);
    for (int i = 0; i < results.length; i++) {
        ColumnStatistics colStats = null;
        for (int j = 0; j < colNameBytes.length; j++) {
            byte[] serializedColStats = results[i].getValue(STATS_CF, colNameBytes[j]);
            if (serializedColStats != null) {
                if (colStats == null) {
                    // We initialize this late so that we don't create extras in the case of
                    // partitions with no stats
                    colStats = buildColStats(results[i].getRow(), false);
                    statsList.add(colStats);
                }
                ColumnStatisticsObj cso = HBaseUtils.deserializeStatsForOneColumn(colStats, serializedColStats);
                cso.setColName(colNames.get(j));
                colStats.addToStatsObj(cso);
            }
        }
    }
    return statsList;
}

Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) HTableInterface(org.apache.hadoop.hbase.client.HTableInterface) Result(org.apache.hadoop.hbase.client.Result) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Get(org.apache.hadoop.hbase.client.Get) List(java.util.List) ArrayList(java.util.ArrayList)

Example 73 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class HBaseReadWrite method getTableStatistics.

/**
   * Get statistics for a table
   *
   * @param dbName name of database table is in
   * @param tblName name of table
   * @param colNames list of column names to get statistics for
   * @return column statistics for indicated table
   * @throws IOException
   */
ColumnStatistics getTableStatistics(String dbName, String tblName, List<String> colNames) throws IOException {
    byte[] tabKey = HBaseUtils.buildKey(dbName, tblName);
    ColumnStatistics tableStats = new ColumnStatistics();
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
    statsDesc.setIsTblLevel(true);
    statsDesc.setDbName(dbName);
    statsDesc.setTableName(tblName);
    tableStats.setStatsDesc(statsDesc);
    byte[][] colKeys = new byte[colNames.size()][];
    for (int i = 0; i < colKeys.length; i++) {
        colKeys[i] = HBaseUtils.buildKey(colNames.get(i));
    }
    Result result = read(TABLE_TABLE, tabKey, STATS_CF, colKeys);
    for (int i = 0; i < colKeys.length; i++) {
        byte[] serializedColStats = result.getValue(STATS_CF, colKeys[i]);
        if (serializedColStats == null) {
            // There were no stats for this column, so skip it
            continue;
        }
        ColumnStatisticsObj obj = HBaseUtils.deserializeStatsForOneColumn(tableStats, serializedColStats);
        obj.setColName(colNames.get(i));
        tableStats.addToStatsObj(obj);
    }
    return tableStats;
}

Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) Result(org.apache.hadoop.hbase.client.Result)

Example 74 with ColumnStatistics

use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.

the class HBaseReadWrite method printOnePartition.

private String printOnePartition(Result result) throws IOException, TException {
    byte[] key = result.getRow();
    HBaseUtils.StorageDescriptorParts sdParts = HBaseUtils.deserializePartition(key, result.getValue(CATALOG_CF, CATALOG_COL), this);
    StringBuilder builder = new StringBuilder();
    builder.append(dumpThriftObject(sdParts.containingPartition)).append(" sdHash: ").append(Base64.encodeBase64URLSafeString(sdParts.sdHash)).append(" stats:");
    NavigableMap<byte[], byte[]> statsCols = result.getFamilyMap(STATS_CF);
    for (Map.Entry<byte[], byte[]> statsCol : statsCols.entrySet()) {
        builder.append(" column ").append(new String(statsCol.getKey(), HBaseUtils.ENCODING)).append(": ");
        ColumnStatistics pcs = buildColStats(key, false);
        ColumnStatisticsObj cso = HBaseUtils.deserializeStatsForOneColumn(pcs, statsCol.getValue());
        builder.append(dumpThriftObject(cso));
    }
    return builder.toString();
}

Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Map(java.util.Map) NavigableMap(java.util.NavigableMap) HashMap(java.util.HashMap)

Aggregations

ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)74 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)65 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)58 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)57 ArrayList (java.util.ArrayList)49 Test (org.junit.Test)48 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)31 Table (org.apache.hadoop.hive.metastore.api.Table)29 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)28 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)27 Partition (org.apache.hadoop.hive.metastore.api.Partition)26 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)24 List (java.util.List)22 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)19 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)13 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)13 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)12 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)9 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)9 HashMap (java.util.HashMap)6