Search in sources :

Example 41 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class MetaStoreDirectSql method getTableStats.

/**
 * Retrieve the column statistics for the specified columns of the table. NULL
 * is returned if the columns are not provided.
 * @param catName     the catalog name of the table
 * @param dbName      the database name of the table
 * @param tableName   the table name
 * @param colNames    the list of the column names
 * @param engine      engine making the request
 * @return            the column statistics for the specified columns
 * @throws MetaException
 */
public ColumnStatistics getTableStats(final String catName, final String dbName, final String tableName, List<String> colNames, String engine, boolean enableBitVector) throws MetaException {
    if (colNames == null || colNames.isEmpty()) {
        return null;
    }
    final boolean doTrace = LOG.isDebugEnabled();
    final String queryText0 = "select " + getStatsList(enableBitVector) + " from " + TAB_COL_STATS + " where \"CAT_NAME\" = ? and \"DB_NAME\" = ? and \"TABLE_NAME\" = ? " + " and \"ENGINE\" = ? and \"COLUMN_NAME\" in (";
    Batchable<String, Object[]> b = new Batchable<String, Object[]>() {

        @Override
        public List<Object[]> run(List<String> input) throws MetaException {
            String queryText = queryText0 + makeParams(input.size()) + ")";
            Object[] params = new Object[input.size() + 4];
            params[0] = catName;
            params[1] = dbName;
            params[2] = tableName;
            params[3] = engine;
            for (int i = 0; i < input.size(); ++i) {
                params[i + 4] = input.get(i);
            }
            long start = doTrace ? System.nanoTime() : 0;
            Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
            try {
                Object qResult = executeWithArray(query, params, queryText);
                MetastoreDirectSqlUtils.timingTrace(doTrace, queryText0 + "...)", start, (doTrace ? System.nanoTime() : 0));
                if (qResult == null) {
                    return null;
                }
                return MetastoreDirectSqlUtils.ensureList(qResult);
            } finally {
                addQueryAfterUse(query);
            }
        }
    };
    List<Object[]> list;
    try {
        list = Batchable.runBatched(batchSize, colNames, b);
        if (list != null) {
            list = new ArrayList<>(list);
        }
    } finally {
        b.closeAllQueries();
    }
    if (list == null || list.isEmpty()) {
        return null;
    }
    ColumnStatisticsDesc csd = new ColumnStatisticsDesc(true, dbName, tableName);
    csd.setCatName(catName);
    ColumnStatistics result = makeColumnStats(list, csd, 0, engine);
    return result;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) MTableColumnStatistics(org.apache.hadoop.hive.metastore.model.MTableColumnStatistics) Query(javax.jdo.Query) MConstraint(org.apache.hadoop.hive.metastore.model.MConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList)

Example 42 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class StatObjectConverter method getPartitionColumnStatisticsDesc.

public static ColumnStatisticsDesc getPartitionColumnStatisticsDesc(MPartitionColumnStatistics mStatsObj) {
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
    statsDesc.setIsTblLevel(false);
    statsDesc.setCatName(mStatsObj.getCatName());
    statsDesc.setDbName(mStatsObj.getDbName());
    statsDesc.setTableName(mStatsObj.getTableName());
    statsDesc.setPartName(mStatsObj.getPartitionName());
    statsDesc.setLastAnalyzed(mStatsObj.getLastAnalyzed());
    return statsDesc;
}
Also used : ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Example 43 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class SharedCache method getTableColStatsFromCache.

public ColumnStatistics getTableColStatsFromCache(String catName, String dbName, String tblName, List<String> colNames, String validWriteIds, boolean areTxnStatsSupported) throws MetaException {
    try {
        cacheLock.readLock().lock();
        TableWrapper tblWrapper = tableCache.getIfPresent(CacheUtils.buildTableKey(catName, dbName, tblName));
        if (tblWrapper == null) {
            LOG.info("Table " + tblName + " is missing from cache.");
            return null;
        }
        ColumnStatisticsDesc csd = new ColumnStatisticsDesc(true, dbName, tblName);
        return tblWrapper.getCachedTableColStats(csd, colNames, validWriteIds, areTxnStatsSupported);
    } finally {
        cacheLock.readLock().unlock();
    }
}
Also used : ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Example 44 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class TestObjectStore method createPartitionedTable.

/**
 * Creates DB1 database, TABLE1 table with 3 partitions.
 * @param withPrivileges Should we create privileges as well
 * @param withStatistics Should we create statitics as well
 */
private void createPartitionedTable(boolean withPrivileges, boolean withStatistics) throws Exception {
    Database db1 = new DatabaseBuilder().setName(DB1).setDescription("description").setLocation("locationurl").build(conf);
    try (AutoCloseable c = deadline()) {
        objectStore.createDatabase(db1);
    }
    Table tbl1 = new TableBuilder().setDbName(DB1).setTableName(TABLE1).addCol("test_col1", "int").addCol("test_col2", "int").addPartCol("test_part_col", "int").addCol("test_bucket_col", "int", "test bucket col comment").addCol("test_skewed_col", "int", "test skewed col comment").addCol("test_sort_col", "int", "test sort col comment").build(conf);
    try (AutoCloseable c = deadline()) {
        objectStore.createTable(tbl1);
    }
    PrivilegeBag privilegeBag = new PrivilegeBag();
    // Create partitions for the partitioned table
    for (int i = 0; i < 3; i++) {
        Partition part = new PartitionBuilder().inTable(tbl1).addValue("a" + i).addSerdeParam("serdeParam", "serdeParamValue").addStorageDescriptorParam("sdParam", "sdParamValue").addBucketCol("test_bucket_col").addSkewedColName("test_skewed_col").addSortCol("test_sort_col", 1).build(conf);
        try (AutoCloseable c = deadline()) {
            objectStore.addPartition(part);
        }
        if (withPrivileges) {
            HiveObjectRef partitionReference = new HiveObjectRefBuilder().buildPartitionReference(part);
            HiveObjectRef partitionColumnReference = new HiveObjectRefBuilder().buildPartitionColumnReference(tbl1, "test_part_col", part.getValues());
            PrivilegeGrantInfo privilegeGrantInfo = new PrivilegeGrantInfoBuilder().setPrivilege("a").build();
            HiveObjectPrivilege partitionPriv = new HiveObjectPrivilegeBuilder().setHiveObjectRef(partitionReference).setPrincipleName("a").setPrincipalType(PrincipalType.USER).setGrantInfo(privilegeGrantInfo).build();
            privilegeBag.addToPrivileges(partitionPriv);
            HiveObjectPrivilege partitionColPriv = new HiveObjectPrivilegeBuilder().setHiveObjectRef(partitionColumnReference).setPrincipleName("a").setPrincipalType(PrincipalType.USER).setGrantInfo(privilegeGrantInfo).build();
            privilegeBag.addToPrivileges(partitionColPriv);
        }
        if (withStatistics) {
            ColumnStatistics stats = new ColumnStatistics();
            ColumnStatisticsDesc desc = new ColumnStatisticsDesc();
            desc.setCatName(tbl1.getCatName());
            desc.setDbName(tbl1.getDbName());
            desc.setTableName(tbl1.getTableName());
            desc.setPartName("test_part_col=a" + i);
            stats.setStatsDesc(desc);
            List<ColumnStatisticsObj> statsObjList = new ArrayList<>(1);
            stats.setStatsObj(statsObjList);
            stats.setEngine(ENGINE);
            ColumnStatisticsData data = new ColumnStatisticsData();
            LongColumnStatsData longStats = new LongColumnStatsData();
            longStats.setNumNulls(1);
            longStats.setNumDVs(2);
            longStats.setLowValue(3);
            longStats.setHighValue(4);
            data.setLongStats(longStats);
            ColumnStatisticsObj partStats = new ColumnStatisticsObj("test_part_col", "int", data);
            statsObjList.add(partStats);
            try (AutoCloseable c = deadline()) {
                objectStore.updatePartitionColumnStatistics(stats, part.getValues(), null, -1);
            }
        }
    }
    if (withPrivileges) {
        try (AutoCloseable c = deadline()) {
            objectStore.grantPrivileges(privilegeBag);
        }
    }
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) PrivilegeBag(org.apache.hadoop.hive.metastore.api.PrivilegeBag) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) SourceTable(org.apache.hadoop.hive.metastore.api.SourceTable) TestHiveMetaStore.createSourceTable(org.apache.hadoop.hive.metastore.TestHiveMetaStore.createSourceTable) PrivilegeGrantInfo(org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo) HiveObjectRef(org.apache.hadoop.hive.metastore.api.HiveObjectRef) HiveObjectPrivilegeBuilder(org.apache.hadoop.hive.metastore.client.builder.HiveObjectPrivilegeBuilder) ArrayList(java.util.ArrayList) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) TableBuilder(org.apache.hadoop.hive.metastore.client.builder.TableBuilder) DatabaseBuilder(org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder) HiveObjectPrivilege(org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) PartitionBuilder(org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder) HiveObjectRefBuilder(org.apache.hadoop.hive.metastore.client.builder.HiveObjectRefBuilder) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) Database(org.apache.hadoop.hive.metastore.api.Database) PrivilegeGrantInfoBuilder(org.apache.hadoop.hive.metastore.client.builder.PrivilegeGrantInfoBuilder) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 45 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class TestHiveMetaStore method testColumnStatistics.

@Test
public void testColumnStatistics() throws Throwable {
    String dbName = "columnstatstestdb";
    String tblName = "tbl";
    String typeName = "Person";
    String tblOwner = "testowner";
    int lastAccessed = 6796;
    try {
        cleanUp(dbName, tblName, typeName);
        new DatabaseBuilder().setName(dbName).create(client, conf);
        createTableForTestFilter(dbName, tblName, tblOwner, lastAccessed, true);
        // Create a ColumnStatistics Obj
        String[] colName = new String[] { "income", "name" };
        double lowValue = 50000.21;
        double highValue = 1200000.4525;
        long numNulls = 3;
        long numDVs = 22;
        double avgColLen = 50.30;
        long maxColLen = 102;
        String[] colType = new String[] { "double", "string" };
        boolean isTblLevel = true;
        String partName = null;
        List<ColumnStatisticsObj> statsObjs = new ArrayList<>();
        ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
        statsDesc.setDbName(dbName);
        statsDesc.setTableName(tblName);
        statsDesc.setIsTblLevel(isTblLevel);
        statsDesc.setPartName(partName);
        ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
        statsObj.setColName(colName[0]);
        statsObj.setColType(colType[0]);
        ColumnStatisticsData statsData = new ColumnStatisticsData();
        DoubleColumnStatsData numericStats = new DoubleColumnStatsData();
        statsData.setDoubleStats(numericStats);
        statsData.getDoubleStats().setHighValue(highValue);
        statsData.getDoubleStats().setLowValue(lowValue);
        statsData.getDoubleStats().setNumDVs(numDVs);
        statsData.getDoubleStats().setNumNulls(numNulls);
        statsObj.setStatsData(statsData);
        statsObjs.add(statsObj);
        statsObj = new ColumnStatisticsObj();
        statsObj.setColName(colName[1]);
        statsObj.setColType(colType[1]);
        statsData = new ColumnStatisticsData();
        StringColumnStatsData stringStats = new StringColumnStatsData();
        statsData.setStringStats(stringStats);
        statsData.getStringStats().setAvgColLen(avgColLen);
        statsData.getStringStats().setMaxColLen(maxColLen);
        statsData.getStringStats().setNumDVs(numDVs);
        statsData.getStringStats().setNumNulls(numNulls);
        statsObj.setStatsData(statsData);
        statsObjs.add(statsObj);
        ColumnStatistics colStats = new ColumnStatistics();
        colStats.setStatsDesc(statsDesc);
        colStats.setStatsObj(statsObjs);
        colStats.setEngine(ENGINE);
        // write stats objs persistently
        client.updateTableColumnStatistics(colStats);
        // retrieve the stats obj that was just written
        ColumnStatisticsObj colStats2 = client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[0]), ENGINE).get(0);
        // compare stats obj to ensure what we get is what we wrote
        assertNotNull(colStats2);
        assertEquals(colStats2.getColName(), colName[0]);
        assertEquals(colStats2.getStatsData().getDoubleStats().getLowValue(), lowValue, 0.01);
        assertEquals(colStats2.getStatsData().getDoubleStats().getHighValue(), highValue, 0.01);
        assertEquals(colStats2.getStatsData().getDoubleStats().getNumNulls(), numNulls);
        assertEquals(colStats2.getStatsData().getDoubleStats().getNumDVs(), numDVs);
        // test delete column stats; if no col name is passed all column stats associated with the
        // table is deleted
        boolean status = client.deleteTableColumnStatistics(dbName, tblName, null, ENGINE);
        assertTrue(status);
        // try to query stats for a column for which stats doesn't exist
        assertTrue(client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[1]), ENGINE).isEmpty());
        colStats.setStatsDesc(statsDesc);
        colStats.setStatsObj(statsObjs);
        // update table level column stats
        client.updateTableColumnStatistics(colStats);
        // query column stats for column whose stats were updated in the previous call
        colStats2 = client.getTableColumnStatistics(dbName, tblName, Lists.newArrayList(colName[0]), ENGINE).get(0);
        // partition level column statistics test
        // create a table with multiple partitions
        cleanUp(dbName, tblName, typeName);
        List<List<String>> values = new ArrayList<>();
        values.add(makeVals("2008-07-01 14:13:12", "14"));
        values.add(makeVals("2008-07-01 14:13:12", "15"));
        values.add(makeVals("2008-07-02 14:13:12", "15"));
        values.add(makeVals("2008-07-03 14:13:12", "151"));
        createMultiPartitionTableSchema(dbName, tblName, typeName, values);
        List<String> partitions = client.listPartitionNames(dbName, tblName, (short) -1);
        partName = partitions.get(0);
        isTblLevel = false;
        // create a new columnstatistics desc to represent partition level column stats
        statsDesc = new ColumnStatisticsDesc();
        statsDesc.setDbName(dbName);
        statsDesc.setTableName(tblName);
        statsDesc.setPartName(partName);
        statsDesc.setIsTblLevel(isTblLevel);
        colStats = new ColumnStatistics();
        colStats.setStatsDesc(statsDesc);
        colStats.setStatsObj(statsObjs);
        colStats.setEngine(ENGINE);
        client.updatePartitionColumnStatistics(colStats);
        colStats2 = client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[1]), ENGINE).get(partName).get(0);
        // compare stats obj to ensure what we get is what we wrote
        assertNotNull(colStats2);
        assertEquals(colStats.getStatsDesc().getPartName(), partName);
        assertEquals(colStats2.getColName(), colName[1]);
        assertEquals(colStats2.getStatsData().getStringStats().getMaxColLen(), maxColLen);
        assertEquals(colStats2.getStatsData().getStringStats().getAvgColLen(), avgColLen, 0.01);
        assertEquals(colStats2.getStatsData().getStringStats().getNumNulls(), numNulls);
        assertEquals(colStats2.getStatsData().getStringStats().getNumDVs(), numDVs);
        // test stats deletion at partition level
        client.deletePartitionColumnStatistics(dbName, tblName, partName, colName[1], ENGINE);
        colStats2 = client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[0]), ENGINE).get(partName).get(0);
        // test get stats on a column for which stats doesn't exist
        assertTrue(client.getPartitionColumnStatistics(dbName, tblName, Lists.newArrayList(partName), Lists.newArrayList(colName[1]), ENGINE).isEmpty());
    } catch (Exception e) {
        System.err.println(StringUtils.stringifyException(e));
        System.err.println("testColumnStatistics() failed.");
        throw e;
    } finally {
        cleanUp(dbName, tblName, typeName);
    }
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ArrayList(java.util.ArrayList) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) ConfigValSecurityException(org.apache.hadoop.hive.metastore.api.ConfigValSecurityException) SQLException(java.sql.SQLException) UnknownDBException(org.apache.hadoop.hive.metastore.api.UnknownDBException) TException(org.apache.thrift.TException) IOException(java.io.IOException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) DatabaseBuilder(org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) List(java.util.List) ArrayList(java.util.ArrayList) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData) Test(org.junit.Test)

Aggregations

ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)95 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)77 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)68 ArrayList (java.util.ArrayList)60 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)54 Test (org.junit.Test)50 Table (org.apache.hadoop.hive.metastore.api.Table)38 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)35 Partition (org.apache.hadoop.hive.metastore.api.Partition)30 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)30 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)28 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)25 List (java.util.List)24 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)19 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)12 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)11 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)11 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)9 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)7 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)7