Search in sources :

Example 1 with SetPartitionsStatsRequest

use of org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest in project hive by apache.

the class TestHiveMetaStoreStatsMerge method testStatsMerge.

public void testStatsMerge() throws Exception {
    int listSize = 0;
    List<ListenerEvent> notifyList = DummyListener.notifyList;
    assertEquals(notifyList.size(), listSize);
    msc.createDatabase(db);
    listSize++;
    assertEquals(listSize, notifyList.size());
    CreateDatabaseEvent dbEvent = (CreateDatabaseEvent) (notifyList.get(listSize - 1));
    assert dbEvent.getStatus();
    msc.createTable(table);
    listSize++;
    assertEquals(notifyList.size(), listSize);
    CreateTableEvent tblEvent = (CreateTableEvent) (notifyList.get(listSize - 1));
    assert tblEvent.getStatus();
    table = msc.getTable(dbName, tblName);
    ColumnStatistics cs = new ColumnStatistics();
    ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tblName);
    cs.setStatsDesc(desc);
    ColumnStatisticsObj obj = new ColumnStatisticsObj();
    obj.setColName("a");
    obj.setColType("string");
    ColumnStatisticsData data = new ColumnStatisticsData();
    StringColumnStatsData scsd = new StringColumnStatsData();
    scsd.setAvgColLen(10);
    scsd.setMaxColLen(20);
    scsd.setNumNulls(30);
    scsd.setNumDVs(123);
    scsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}");
    data.setStringStats(scsd);
    obj.setStatsData(data);
    cs.addToStatsObj(obj);
    List<ColumnStatistics> colStats = new ArrayList<>();
    colStats.add(cs);
    SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
    msc.setPartitionColumnStatistics(request);
    List<String> colNames = new ArrayList<>();
    colNames.add("a");
    StringColumnStatsData getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0).getStatsData().getStringStats();
    assertEquals(getScsd.getNumDVs(), 123);
    cs = new ColumnStatistics();
    scsd = new StringColumnStatsData();
    scsd.setAvgColLen(20);
    scsd.setMaxColLen(5);
    scsd.setNumNulls(70);
    scsd.setNumDVs(456);
    scsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}");
    data.setStringStats(scsd);
    obj.setStatsData(data);
    cs.addToStatsObj(obj);
    request = new SetPartitionsStatsRequest(colStats);
    request.setNeedMerge(true);
    msc.setPartitionColumnStatistics(request);
    getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0).getStatsData().getStringStats();
    assertEquals(getScsd.getAvgColLen(), 20.0);
    assertEquals(getScsd.getMaxColLen(), 20);
    assertEquals(getScsd.getNumNulls(), 100);
    // since metastore is ObjectStore, we use the max function to merge.
    assertEquals(getScsd.getNumDVs(), 456);
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) CreateDatabaseEvent(org.apache.hadoop.hive.metastore.events.CreateDatabaseEvent) ArrayList(java.util.ArrayList) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) SetPartitionsStatsRequest(org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest) ListenerEvent(org.apache.hadoop.hive.metastore.events.ListenerEvent) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) CreateTableEvent(org.apache.hadoop.hive.metastore.events.CreateTableEvent) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 2 with SetPartitionsStatsRequest

use of org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest in project hive by apache.

the class ColumnStatsTask method persistColumnStats.

private int persistColumnStats(Hive db) throws HiveException, MetaException, IOException {
    // Construct a column statistics object from the result
    List<ColumnStatistics> colStats = constructColumnStatsFromPackedRows(db);
    // Persist the column statistics object to the metastore
    // Note, this function is shared for both table and partition column stats.
    SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
    if (work.getColStats() != null && work.getColStats().getNumBitVector() > 0) {
        request.setNeedMerge(true);
    }
    db.setPartitionColumnStatistics(request);
    return 0;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) SetPartitionsStatsRequest(org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest)

Example 3 with SetPartitionsStatsRequest

use of org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest in project hive by apache.

the class ColStatsProcessor method persistColumnStats.

public int persistColumnStats(Hive db, Table tbl) throws HiveException, MetaException, IOException {
    // Construct a column statistics object from the result
    long writeId = -1;
    ValidWriteIdList validWriteIdList = null;
    HiveTxnManager txnMgr = AcidUtils.isTransactionalTable(tbl) ? SessionState.get().getTxnMgr() : null;
    if (txnMgr != null) {
        writeId = txnMgr.getAllocatedTableWriteId(tbl.getDbName(), tbl.getTableName());
        validWriteIdList = AcidUtils.getTableValidWriteIdList(conf, AcidUtils.getFullTableName(tbl.getDbName(), tbl.getTableName()));
    }
    boolean done = false;
    long maxNumStats = conf.getLongVar(HiveConf.ConfVars.HIVE_STATS_MAX_NUM_STATS);
    while (!done) {
        List<ColumnStatistics> colStats = new ArrayList<>();
        long start = System.currentTimeMillis();
        done = constructColumnStatsFromPackedRows(tbl, colStats, maxNumStats);
        long end = System.currentTimeMillis();
        LOG.info("Time taken to build " + colStats.size() + " stats desc : " + ((end - start) / 1000F) + " seconds.");
        // Note, this function is shared for both table and partition column stats.
        if (colStats.isEmpty()) {
            continue;
        }
        SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats, Constants.HIVE_ENGINE);
        request.setNeedMerge(colStatDesc.isNeedMerge());
        if (txnMgr != null) {
            request.setWriteId(writeId);
            if (validWriteIdList != null) {
                request.setValidWriteIdList(validWriteIdList.toString());
            }
        }
        start = System.currentTimeMillis();
        db.setPartitionColumnStatistics(request);
        end = System.currentTimeMillis();
        LOG.info("Time taken to update " + colStats.size() + " stats : " + ((end - start) / 1000F) + " seconds.");
    }
    return 0;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) HiveTxnManager(org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager) ArrayList(java.util.ArrayList) SetPartitionsStatsRequest(org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest)

Example 4 with SetPartitionsStatsRequest

use of org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest in project hive by apache.

the class TestStats method createMetadata.

private List<String> createMetadata(String catName, String dbName, String tableName, String partKey, List<String> partVals, Map<String, Column> colMap) throws TException {
    if (!DEFAULT_CATALOG_NAME.equals(catName) && !NO_CAT.equals(catName)) {
        Catalog cat = new CatalogBuilder().setName(catName).setLocation(MetaStoreTestUtils.getTestWarehouseDir(catName)).build();
        client.createCatalog(cat);
    }
    Database db;
    if (!DEFAULT_DATABASE_NAME.equals(dbName)) {
        DatabaseBuilder dbBuilder = new DatabaseBuilder().setName(dbName);
        if (!NO_CAT.equals(catName))
            dbBuilder.setCatalogName(catName);
        db = dbBuilder.create(client, conf);
    } else {
        db = client.getDatabase(DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME);
    }
    TableBuilder tb = new TableBuilder().inDb(db).setTableName(tableName);
    for (Column col : colMap.values()) tb.addCol(col.colName, col.colType);
    if (partKey != null) {
        assert partVals != null && !partVals.isEmpty() : "Must provide partition values for partitioned table";
        tb.addPartCol(partKey, ColumnType.STRING_TYPE_NAME);
    }
    Table table = tb.create(client, conf);
    if (partKey != null) {
        for (String partVal : partVals) {
            new PartitionBuilder().inTable(table).addValue(partVal).addToTable(client, conf);
        }
    }
    SetPartitionsStatsRequest rqst = new SetPartitionsStatsRequest();
    List<String> partNames = new ArrayList<>();
    if (partKey == null) {
        rqst.addToColStats(buildStatsForOneTableOrPartition(catName, dbName, tableName, null, colMap.values()));
    } else {
        for (String partVal : partVals) {
            String partName = partKey + "=" + partVal;
            rqst.addToColStats(buildStatsForOneTableOrPartition(catName, dbName, tableName, partName, colMap.values()));
            partNames.add(partName);
        }
    }
    rqst.setEngine(ENGINE);
    client.setPartitionColumnStatistics(rqst);
    return partNames;
}
Also used : DatabaseBuilder(org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder) Table(org.apache.hadoop.hive.metastore.api.Table) PartitionBuilder(org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder) CatalogBuilder(org.apache.hadoop.hive.metastore.client.builder.CatalogBuilder) Database(org.apache.hadoop.hive.metastore.api.Database) ArrayList(java.util.ArrayList) SetPartitionsStatsRequest(org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest) TableBuilder(org.apache.hadoop.hive.metastore.client.builder.TableBuilder) Catalog(org.apache.hadoop.hive.metastore.api.Catalog)

Example 5 with SetPartitionsStatsRequest

use of org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest in project hive by apache.

the class TestPartitionStat method updatePartColStat.

private List<String> updatePartColStat(Map<List<String>, ColumnStatisticsData> partitionStats) throws Exception {
    SetPartitionsStatsRequest rqst = new SetPartitionsStatsRequest();
    rqst.setEngine(HIVE_ENGINE);
    List<String> pNameList = new ArrayList<>();
    for (Map.Entry entry : partitionStats.entrySet()) {
        ColumnStatistics colStats = createPartColStats((List<String>) entry.getKey(), (ColumnStatisticsData) entry.getValue());
        String pName = FileUtils.makePartName(Collections.singletonList(PART_COL_NAME), (List<String>) entry.getKey());
        rqst.addToColStats(colStats);
        pNameList.add(pName);
    }
    client.setPartitionColumnStatistics(rqst);
    return pNameList;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ArrayList(java.util.ArrayList) SetPartitionsStatsRequest(org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

SetPartitionsStatsRequest (org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest)6 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)5 ArrayList (java.util.ArrayList)4 ValidWriteIdList (org.apache.hadoop.hive.common.ValidWriteIdList)2 BitSet (java.util.BitSet)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ValidReaderWriteIdList (org.apache.hadoop.hive.common.ValidReaderWriteIdList)1 Catalog (org.apache.hadoop.hive.metastore.api.Catalog)1 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)1 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)1 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)1 Database (org.apache.hadoop.hive.metastore.api.Database)1 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)1 Table (org.apache.hadoop.hive.metastore.api.Table)1 CatalogBuilder (org.apache.hadoop.hive.metastore.client.builder.CatalogBuilder)1 DatabaseBuilder (org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder)1 PartitionBuilder (org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder)1 TableBuilder (org.apache.hadoop.hive.metastore.client.builder.TableBuilder)1 CreateDatabaseEvent (org.apache.hadoop.hive.metastore.events.CreateDatabaseEvent)1