Search in sources :

Example 96 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.

the class TestAggregateStatsCache method testTimeToLive.

@Test
public void testTimeToLive() throws Exception {
    // Add a dummy node to cache
    // Partnames: [tab1part1...tab1part9]
    List<String> partNames = preparePartNames(tables.get(0), 1, 9);
    // Prepare the bloom filter
    BloomFilter bloomFilter = prepareBloomFilter(partNames);
    // Add a dummy aggregate stats object for the above parts (part1...part9) of tab1 for col1
    String tblName = tables.get(0);
    String colName = tabCols.get(0);
    int highVal = 100, lowVal = 10, numDVs = 50, numNulls = 5;
    // We'll treat this as the aggregate col stats for part1...part9 of tab1, col1
    ColumnStatisticsObj aggrColStats = getDummyLongColStat(colName, highVal, lowVal, numDVs, numNulls);
    // Now add to cache
    cache.add(DEFAULT_CATALOG_NAME, DB_NAME, tblName, colName, 10, aggrColStats, bloomFilter);
    // Sleep for 3 seconds
    Thread.sleep(3000);
    // Get should fail now (since TTL is 2s) and we've snoozed for 3 seconds
    AggrColStats aggrStatsCached = cache.get(DEFAULT_CATALOG_NAME, DB_NAME, tblName, colName, partNames);
    Assert.assertNull(aggrStatsCached);
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) AggrColStats(org.apache.hadoop.hive.metastore.AggregateStatsCache.AggrColStats) BloomFilter(org.apache.hive.common.util.BloomFilter) Test(org.junit.Test) MetastoreUnitTest(org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest)

Example 97 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.

the class DecimalColumnStatsMergerTest method testMergeNonNullAndNullLowerValuesNewIsNull.

@Test
public void testMergeNonNullAndNullLowerValuesNewIsNull() {
    ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
    createData(oldObj, DECIMAL_3, null);
    ColumnStatisticsObj newObj = new ColumnStatisticsObj();
    createData(newObj, null, null);
    merger.merge(oldObj, newObj);
    Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue());
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Test(org.junit.Test) MetastoreUnitTest(org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest)

Example 98 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.

the class DecimalColumnStatsMergerTest method testMergeLowValuesSecondWins.

@Test
public void testMergeLowValuesSecondWins() {
    ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
    createData(oldObj, DECIMAL_5, null);
    ColumnStatisticsObj newObj = new ColumnStatisticsObj();
    createData(newObj, DECIMAL_3, null);
    merger.merge(oldObj, newObj);
    Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getLowValue());
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Test(org.junit.Test) MetastoreUnitTest(org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest)

Example 99 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.

the class DecimalColumnStatsMergerTest method testMergeNonNullAndNullHigherValuesOldIsNull.

@Test
public void testMergeNonNullAndNullHigherValuesOldIsNull() {
    ColumnStatisticsObj oldObj = new ColumnStatisticsObj();
    createData(oldObj, null, null);
    ColumnStatisticsObj newObj = new ColumnStatisticsObj();
    createData(newObj, null, DECIMAL_3);
    merger.merge(oldObj, newObj);
    Assert.assertEquals(DECIMAL_3, oldObj.getStatsData().getDecimalStats().getHighValue());
}
Also used : ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Test(org.junit.Test) MetastoreUnitTest(org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest)

Example 100 with ColumnStatisticsObj

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj in project hive by apache.

the class ObjectStore method updatePartitionColumnStatistics.

@Override
public Map<String, String> updatePartitionColumnStatistics(ColumnStatistics colStats, List<String> partVals, String validWriteIds, long writeId) throws MetaException, NoSuchObjectException, InvalidObjectException, InvalidInputException {
    boolean committed = false;
    try {
        openTransaction();
        List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
        ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
        String catName = statsDesc.isSetCatName() ? statsDesc.getCatName() : getDefaultCatalog(conf);
        MTable mTable = ensureGetMTable(catName, statsDesc.getDbName(), statsDesc.getTableName());
        Table table = convertToTable(mTable);
        Partition partition = convertToPart(getMPartition(catName, statsDesc.getDbName(), statsDesc.getTableName(), partVals, mTable), false);
        List<String> colNames = new ArrayList<>();
        for (ColumnStatisticsObj statsObj : statsObjs) {
            colNames.add(statsObj.getColName());
        }
        Map<String, MPartitionColumnStatistics> oldStats = getPartitionColStats(table, statsDesc.getPartName(), colNames, colStats.getEngine());
        MPartition mPartition = getMPartition(catName, statsDesc.getDbName(), statsDesc.getTableName(), partVals, mTable);
        if (partition == null) {
            throw new NoSuchObjectException("Partition for which stats is gathered doesn't exist.");
        }
        for (ColumnStatisticsObj statsObj : statsObjs) {
            MPartitionColumnStatistics mStatsObj = StatObjectConverter.convertToMPartitionColumnStatistics(mPartition, statsDesc, statsObj, colStats.getEngine());
            writeMPartitionColumnStatistics(table, partition, mStatsObj, oldStats.get(statsObj.getColName()));
        }
        // TODO: (HIVE-20109) the col stats stats should be in colstats, not in the partition!
        Map<String, String> newParams = new HashMap<>(mPartition.getParameters());
        StatsSetupConst.setColumnStatsState(newParams, colNames);
        boolean isTxn = TxnUtils.isTransactionalTable(table);
        if (isTxn) {
            if (!areTxnStatsSupported) {
                StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE);
            } else {
                String errorMsg = verifyStatsChangeCtx(TableName.getDbTable(statsDesc.getDbName(), statsDesc.getTableName()), mPartition.getParameters(), newParams, writeId, validWriteIds, true);
                if (errorMsg != null) {
                    throw new MetaException(errorMsg);
                }
                if (!isCurrentStatsValidForTheQuery(mPartition, validWriteIds, true)) {
                    // Make sure we set the flag to invalid regardless of the current value.
                    StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE);
                    LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the partition " + statsDesc.getDbName() + "." + statsDesc.getTableName() + "." + statsDesc.getPartName());
                }
                mPartition.setWriteId(writeId);
            }
        }
        mPartition.setParameters(newParams);
        committed = commitTransaction();
        // TODO: what is the "return committed;" about? would it ever return false without throwing?
        return committed ? newParams : null;
    } finally {
        if (!committed) {
            rollbackTransaction();
        }
    }
}
Also used : MPartition(org.apache.hadoop.hive.metastore.model.MPartition) Partition(org.apache.hadoop.hive.metastore.api.Partition) MVersionTable(org.apache.hadoop.hive.metastore.model.MVersionTable) SourceTable(org.apache.hadoop.hive.metastore.api.SourceTable) Table(org.apache.hadoop.hive.metastore.api.Table) MTable(org.apache.hadoop.hive.metastore.model.MTable) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) MTable(org.apache.hadoop.hive.metastore.model.MTable) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) MPartition(org.apache.hadoop.hive.metastore.model.MPartition) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Aggregations

ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)219 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)104 ArrayList (java.util.ArrayList)98 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)82 Test (org.junit.Test)79 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)68 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)43 Table (org.apache.hadoop.hive.metastore.api.Table)43 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)35 Partition (org.apache.hadoop.hive.metastore.api.Partition)35 List (java.util.List)34 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)30 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)29 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)29 HashMap (java.util.HashMap)28 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)28 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)27 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)25 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)23 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)22