Search in sources :

Example 91 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class DirectSqlUpdateStat method populateInsertUpdateMap.

private void populateInsertUpdateMap(Map<PartitionInfo, ColumnStatistics> statsPartInfoMap, Map<PartColNameInfo, MPartitionColumnStatistics> updateMap, Map<PartColNameInfo, MPartitionColumnStatistics> insertMap, Connection dbConn) throws SQLException, MetaException, NoSuchObjectException {
    StringBuilder prefix = new StringBuilder();
    StringBuilder suffix = new StringBuilder();
    Statement statement = null;
    ResultSet rs = null;
    List<String> queries = new ArrayList<>();
    Set<PartColNameInfo> selectedParts = new HashSet<>();
    List<Long> partIdList = statsPartInfoMap.keySet().stream().map(e -> e.partitionId).collect(Collectors.toList());
    prefix.append("select \"PART_ID\", \"COLUMN_NAME\" from \"PART_COL_STATS\" WHERE ");
    TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, partIdList, "\"PART_ID\"", true, false);
    for (String query : queries) {
        try {
            statement = dbConn.createStatement();
            LOG.debug("Going to execute query " + query);
            rs = statement.executeQuery(query);
            while (rs.next()) {
                selectedParts.add(new PartColNameInfo(rs.getLong(1), rs.getString(2)));
            }
        } finally {
            close(rs, statement, null);
        }
    }
    for (Map.Entry entry : statsPartInfoMap.entrySet()) {
        PartitionInfo partitionInfo = (PartitionInfo) entry.getKey();
        ColumnStatistics colStats = (ColumnStatistics) entry.getValue();
        long partId = partitionInfo.partitionId;
        ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
        for (ColumnStatisticsObj statisticsObj : colStats.getStatsObj()) {
            PartColNameInfo temp = new PartColNameInfo(partId, statisticsObj.getColName());
            if (selectedParts.contains(temp)) {
                updateMap.put(temp, StatObjectConverter.convertToMPartitionColumnStatistics(null, statsDesc, statisticsObj, colStats.getEngine()));
            } else {
                insertMap.put(temp, StatObjectConverter.convertToMPartitionColumnStatistics(null, statsDesc, statisticsObj, colStats.getEngine()));
            }
        }
    }
}
Also used : UpdatePartitionColumnStatEventBatch(org.apache.hadoop.hive.metastore.events.UpdatePartitionColumnStatEventBatch) Connection(java.sql.Connection) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) LoggerFactory(org.slf4j.LoggerFactory) PersistenceManager(javax.jdo.PersistenceManager) HashMap(java.util.HashMap) ConfVars(org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) SQLException(java.sql.SQLException) StatsSetupConst(org.apache.hadoop.hive.common.StatsSetupConst) JDOConnection(javax.jdo.datastore.JDOConnection) COLUMN_STATS_ACCURATE(org.apache.hadoop.hive.common.StatsSetupConst.COLUMN_STATS_ACCURATE) ResultSet(java.sql.ResultSet) EventMessage(org.apache.hadoop.hive.metastore.messaging.EventMessage) SQLGenerator(org.apache.hadoop.hive.metastore.tools.SQLGenerator) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) UpdatePartitionColumnStatEvent(org.apache.hadoop.hive.metastore.events.UpdatePartitionColumnStatEvent) Logger(org.slf4j.Logger) HMSHandler.getPartValsFromName(org.apache.hadoop.hive.metastore.HMSHandler.getPartValsFromName) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Set(java.util.Set) PreparedStatement(java.sql.PreparedStatement) Collectors(java.util.stream.Collectors) Table(org.apache.hadoop.hive.metastore.api.Table) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) TxnUtils(org.apache.hadoop.hive.metastore.txn.TxnUtils) List(java.util.List) MetastoreConf(org.apache.hadoop.hive.metastore.conf.MetastoreConf) Statement(java.sql.Statement) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ResultSet(java.sql.ResultSet) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet)

Example 92 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class TestPartitionStat method createPartColStats.

private ColumnStatistics createPartColStats(List<String> partValue, ColumnStatisticsData partitionStats) {
    String pName = FileUtils.makePartName(Collections.singletonList(PART_COL_NAME), partValue);
    ColumnStatistics colStats = new ColumnStatistics();
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(false, DB_NAME, TABLE_NAME);
    statsDesc.setPartName(pName);
    colStats.setStatsDesc(statsDesc);
    colStats.setEngine(HIVE_ENGINE);
    ColumnStatisticsObj statObj = new ColumnStatisticsObj(PART_COL_NAME, "int", partitionStats);
    colStats.addToStatsObj(statObj);
    return colStats;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Example 93 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class MetaStoreDirectSql method getPartitionStats.

public List<ColumnStatistics> getPartitionStats(final String catName, final String dbName, final String tableName, final List<String> partNames, List<String> colNames, String engine, boolean enableBitVector) throws MetaException {
    if (colNames.isEmpty() || partNames.isEmpty()) {
        return Collections.emptyList();
    }
    final boolean doTrace = LOG.isDebugEnabled();
    final String queryText0 = "select \"PARTITION_NAME\", " + getStatsList(enableBitVector) + " from " + " " + PART_COL_STATS + " where \"CAT_NAME\" = ? and \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and " + "\"COLUMN_NAME\"" + "  in (%1$s) AND \"PARTITION_NAME\" in (%2$s) " + " and \"ENGINE\" = ? " + " order by \"PARTITION_NAME\"";
    Batchable<String, Object[]> b = new Batchable<String, Object[]>() {

        @Override
        public List<Object[]> run(final List<String> inputColNames) throws MetaException {
            Batchable<String, Object[]> b2 = new Batchable<String, Object[]>() {

                @Override
                public List<Object[]> run(List<String> inputPartNames) throws MetaException {
                    String queryText = String.format(queryText0, makeParams(inputColNames.size()), makeParams(inputPartNames.size()));
                    long start = doTrace ? System.nanoTime() : 0;
                    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
                    try {
                        Object qResult = executeWithArray(query, prepareParams(catName, dbName, tableName, inputPartNames, inputColNames, engine), queryText);
                        MetastoreDirectSqlUtils.timingTrace(doTrace, queryText0, start, (doTrace ? System.nanoTime() : 0));
                        if (qResult == null) {
                            return Collections.emptyList();
                        }
                        return MetastoreDirectSqlUtils.ensureList(qResult);
                    } finally {
                        addQueryAfterUse(query);
                    }
                }
            };
            try {
                return Batchable.runBatched(batchSize, partNames, b2);
            } finally {
                addQueryAfterUse(b2);
            }
        }
    };
    List<ColumnStatistics> result = new ArrayList<ColumnStatistics>(partNames.size());
    String lastPartName = null;
    int from = 0;
    try {
        List<Object[]> list = Batchable.runBatched(batchSize, colNames, b);
        for (int i = 0; i <= list.size(); ++i) {
            boolean isLast = i == list.size();
            String partName = isLast ? null : (String) list.get(i)[0];
            if (!isLast && partName.equals(lastPartName)) {
                continue;
            } else if (from != i) {
                ColumnStatisticsDesc csd = new ColumnStatisticsDesc(false, dbName, tableName);
                csd.setCatName(catName);
                csd.setPartName(lastPartName);
                result.add(makeColumnStats(list.subList(from, i), csd, 1, engine));
            }
            lastPartName = partName;
            from = i;
            Deadline.checkTimeout();
        }
    } finally {
        b.closeAllQueries();
    }
    return result;
}
Also used : ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) MTableColumnStatistics(org.apache.hadoop.hive.metastore.model.MTableColumnStatistics) Query(javax.jdo.Query) ArrayList(java.util.ArrayList) MConstraint(org.apache.hadoop.hive.metastore.model.MConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList)

Example 94 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class ObjectStore method updateTableColumnStatistics.

@Override
public Map<String, String> updateTableColumnStatistics(ColumnStatistics colStats, String validWriteIds, long writeId) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException {
    boolean committed = false;
    openTransaction();
    try {
        List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
        ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
        // DataNucleus objects get detached all over the place for no (real) reason.
        // So let's not use them anywhere unless absolutely necessary.
        String catName = statsDesc.isSetCatName() ? statsDesc.getCatName() : getDefaultCatalog(conf);
        MTable mTable = ensureGetMTable(catName, statsDesc.getDbName(), statsDesc.getTableName());
        Table table = convertToTable(mTable);
        List<String> colNames = new ArrayList<>();
        for (ColumnStatisticsObj statsObj : statsObjs) {
            colNames.add(statsObj.getColName());
        }
        Map<String, MTableColumnStatistics> oldStats = getPartitionColStats(table, colNames, colStats.getEngine());
        for (ColumnStatisticsObj statsObj : statsObjs) {
            MTableColumnStatistics mStatsObj = StatObjectConverter.convertToMTableColumnStatistics(mTable, statsDesc, statsObj, colStats.getEngine());
            writeMTableColumnStatistics(table, mStatsObj, oldStats.get(statsObj.getColName()));
        // There is no need to add colname again, otherwise we will get duplicate colNames.
        }
        // TODO: (HIVE-20109) ideally the col stats stats should be in colstats, not in the table!
        // Set the table properties
        // No need to check again if it exists.
        String dbname = table.getDbName();
        String name = table.getTableName();
        MTable oldt = mTable;
        Map<String, String> newParams = new HashMap<>(table.getParameters());
        StatsSetupConst.setColumnStatsState(newParams, colNames);
        boolean isTxn = TxnUtils.isTransactionalTable(oldt.getParameters());
        if (isTxn) {
            if (!areTxnStatsSupported) {
                StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE);
            } else {
                String errorMsg = verifyStatsChangeCtx(TableName.getDbTable(dbname, name), oldt.getParameters(), newParams, writeId, validWriteIds, true);
                if (errorMsg != null) {
                    throw new MetaException(errorMsg);
                }
                if (!isCurrentStatsValidForTheQuery(oldt, validWriteIds, true)) {
                    // Make sure we set the flag to invalid regardless of the current value.
                    StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE);
                    LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the table " + dbname + "." + name);
                }
                oldt.setWriteId(writeId);
            }
        }
        oldt.setParameters(newParams);
        committed = commitTransaction();
        // TODO: similar to update...Part, this used to do "return committed;"; makes little sense.
        return committed ? newParams : null;
    } finally {
        if (!committed) {
            rollbackTransaction();
        }
    }
}
Also used : MVersionTable(org.apache.hadoop.hive.metastore.model.MVersionTable) SourceTable(org.apache.hadoop.hive.metastore.api.SourceTable) Table(org.apache.hadoop.hive.metastore.api.Table) MTable(org.apache.hadoop.hive.metastore.model.MTable) MTableColumnStatistics(org.apache.hadoop.hive.metastore.model.MTableColumnStatistics) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) MTable(org.apache.hadoop.hive.metastore.model.MTable) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Example 95 with ColumnStatisticsDesc

use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.

the class StatObjectConverter method getTableColumnStatisticsDesc.

public static ColumnStatisticsDesc getTableColumnStatisticsDesc(MTableColumnStatistics mStatsObj) {
    ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
    statsDesc.setIsTblLevel(true);
    statsDesc.setCatName(mStatsObj.getCatName());
    statsDesc.setDbName(mStatsObj.getDbName());
    statsDesc.setTableName(mStatsObj.getTableName());
    statsDesc.setLastAnalyzed(mStatsObj.getLastAnalyzed());
    return statsDesc;
}
Also used : ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)

Aggregations

ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)95 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)77 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)68 ArrayList (java.util.ArrayList)60 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)54 Test (org.junit.Test)50 Table (org.apache.hadoop.hive.metastore.api.Table)38 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)35 Partition (org.apache.hadoop.hive.metastore.api.Partition)30 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)30 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)28 AggrStats (org.apache.hadoop.hive.metastore.api.AggrStats)25 List (java.util.List)24 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)19 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)12 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)11 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)11 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)9 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)7 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)7