Search in sources :

Example 31 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class TestHBaseStore method longTableStatistics.

@Test
public void longTableStatistics() throws Exception {
    createMockTable(LONG_TYPE);
    // Add a long table stats for LONG_COL to DB
    // Because of the way our mock implementation works we actually need to not create the table
    // before we set statistics on it.
    ColumnStatistics stats = new ColumnStatistics();
    // Get a default ColumnStatisticsDesc for table level stats
    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
    stats.setStatsDesc(desc);
    // Get one of the pre-created ColumnStatisticsObj
    ColumnStatisticsObj obj = longColStatsObjs.get(0);
    LongColumnStatsData longData = obj.getStatsData().getLongStats();
    // Add to DB
    stats.addToStatsObj(obj);
    store.updateTableColumnStatistics(stats);
    // Get from DB
    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(LONG_COL));
    // Compare ColumnStatisticsDesc
    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
    // Compare ColumnStatisticsObj
    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
    // Compare ColumnStatisticsData
    Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField());
    // Compare LongColumnStatsData
    LongColumnStatsData longDataFromDB = dataFromDB.getLongStats();
    Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue());
    Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue());
    Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls());
    Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs());
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Example 32 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class HiveAlterHandler method alterTableUpdateTableColumnStats.

@VisibleForTesting
void alterTableUpdateTableColumnStats(RawStore msdb, Table oldTable, Table newTable) throws MetaException, InvalidObjectException {
    String dbName = oldTable.getDbName().toLowerCase();
    String tableName = org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier(oldTable.getTableName());
    String newDbName = newTable.getDbName().toLowerCase();
    String newTableName = org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier(newTable.getTableName());
    try {
        List<FieldSchema> oldCols = oldTable.getSd().getCols();
        List<FieldSchema> newCols = newTable.getSd().getCols();
        List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
        ColumnStatistics colStats = null;
        boolean updateColumnStats = true;
        // Nothing to update if everything is the same
        if (newDbName.equals(dbName) && newTableName.equals(tableName) && MetaStoreUtils.columnsIncludedByNameType(oldCols, newCols)) {
            updateColumnStats = false;
        }
        if (updateColumnStats) {
            List<String> oldColNames = new ArrayList<>(oldCols.size());
            for (FieldSchema oldCol : oldCols) {
                oldColNames.add(oldCol.getName());
            }
            // Collect column stats which need to be rewritten and remove old stats
            colStats = msdb.getTableColumnStatistics(dbName, tableName, oldColNames);
            if (colStats == null) {
                updateColumnStats = false;
            } else {
                List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
                if (statsObjs != null) {
                    List<String> deletedCols = new ArrayList<>();
                    for (ColumnStatisticsObj statsObj : statsObjs) {
                        boolean found = false;
                        for (FieldSchema newCol : newCols) {
                            if (statsObj.getColName().equalsIgnoreCase(newCol.getName()) && statsObj.getColType().equalsIgnoreCase(newCol.getType())) {
                                found = true;
                                break;
                            }
                        }
                        if (found) {
                            if (!newDbName.equals(dbName) || !newTableName.equals(tableName)) {
                                msdb.deleteTableColumnStatistics(dbName, tableName, statsObj.getColName());
                                newStatsObjs.add(statsObj);
                                deletedCols.add(statsObj.getColName());
                            }
                        } else {
                            msdb.deleteTableColumnStatistics(dbName, tableName, statsObj.getColName());
                            deletedCols.add(statsObj.getColName());
                        }
                    }
                    StatsSetupConst.removeColumnStatsState(newTable.getParameters(), deletedCols);
                }
            }
        }
        // Change to new table and append stats for the new table
        msdb.alterTable(dbName, tableName, newTable);
        if (updateColumnStats && !newStatsObjs.isEmpty()) {
            ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
            statsDesc.setDbName(newDbName);
            statsDesc.setTableName(newTableName);
            colStats.setStatsObj(newStatsObjs);
            msdb.updateTableColumnStatistics(colStats);
        }
    } catch (NoSuchObjectException nsoe) {
        LOG.debug("Could not find db entry." + nsoe);
    } catch (InvalidInputException e) {
        // should not happen since the input were verified before passed in
        throw new InvalidObjectException("Invalid inputs to update table column stats: " + e);
    }
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 33 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project presto by prestodb.

the class ThriftHiveMetastoreClient method setTableColumnStatistics.

@Override
public void setTableColumnStatistics(String databaseName, String tableName, List<ColumnStatisticsObj> statistics) throws TException {
    ColumnStatisticsDesc statisticsDescription = new ColumnStatisticsDesc(true, databaseName, tableName);
    ColumnStatistics request = new ColumnStatistics(statisticsDescription, statistics);
    client.update_table_column_statistics(request);
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Example 34 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project presto by prestodb.

the class ThriftHiveMetastoreClient method setPartitionColumnStatistics.

@Override
public void setPartitionColumnStatistics(String databaseName, String tableName, String partitionName, List<ColumnStatisticsObj> statistics) throws TException {
    ColumnStatisticsDesc statisticsDescription = new ColumnStatisticsDesc(false, databaseName, tableName);
    statisticsDescription.setPartName(partitionName);
    ColumnStatistics request = new ColumnStatistics(statisticsDescription, statistics);
    client.update_partition_column_statistics(request);
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Example 35 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project flink by apache.

the class HiveStatsUtil method createPartitionColumnStats.

/**
 * Create columnStatistics from the given Hive column stats of a hive partition.
 */
public static ColumnStatistics createPartitionColumnStats(Partition hivePartition, String partName, Map<String, CatalogColumnStatisticsDataBase> colStats, String hiveVersion) {
    ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, hivePartition.getDbName(), hivePartition.getTableName());
    desc.setPartName(partName);
    return createHiveColumnStatistics(colStats, hivePartition.getSd(), desc, hiveVersion);
}
Also used : ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Aggregations

ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)95 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)77 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)68 ArrayList (java.util.ArrayList)60 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)54 Test (org.junit.Test)50 Table (org.apache.hadoop.hive.metastore.api.Table)38 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)35 Partition (org.apache.hadoop.hive.metastore.api.Partition)30 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)30 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)28 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)25 List (java.util.List)24 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)19 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)12 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)11 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)11 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)9 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)7 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)7