Search in sources :

Example 1 with COLUMN_STATS_ACCURATE

use of org.apache.hadoop.hive.common.StatsSetupConst.COLUMN_STATS_ACCURATE in project hive by apache.

the class DirectSqlUpdateStat method updatePartitionParamTable.

private Map<String, Map<String, String>> updatePartitionParamTable(Connection dbConn, Map<PartitionInfo, ColumnStatistics> partitionInfoMap, String validWriteIds, long writeId, boolean isAcidTable) throws SQLException, MetaException {
    Map<String, Map<String, String>> result = new HashMap<>();
    boolean areTxnStatsSupported = MetastoreConf.getBoolVar(conf, ConfVars.HIVE_TXN_STATS_ENABLED);
    PreparedStatement statementInsert = null;
    PreparedStatement statementDelete = null;
    PreparedStatement statementUpdate = null;
    String insert = "INSERT INTO \"PARTITION_PARAMS\" (\"PART_ID\", \"PARAM_KEY\", \"PARAM_VALUE\") " + "VALUES( ? , 'COLUMN_STATS_ACCURATE'  , ? )";
    String delete = "DELETE from \"PARTITION_PARAMS\" " + " where \"PART_ID\" = ? " + " and \"PARAM_KEY\" = 'COLUMN_STATS_ACCURATE'";
    String update = "UPDATE \"PARTITION_PARAMS\" set \"PARAM_VALUE\" = ? " + " where \"PART_ID\" = ? " + " and \"PARAM_KEY\" = 'COLUMN_STATS_ACCURATE'";
    int numInsert = 0;
    int numDelete = 0;
    int numUpdate = 0;
    List<Long> partIdList = partitionInfoMap.keySet().stream().map(e -> e.partitionId).collect(Collectors.toList());
    // get the old parameters from PARTITION_PARAMS table.
    Map<Long, String> partIdToParaMap = getParamValues(dbConn, partIdList);
    try {
        statementInsert = dbConn.prepareStatement(insert);
        statementDelete = dbConn.prepareStatement(delete);
        statementUpdate = dbConn.prepareStatement(update);
        for (Map.Entry entry : partitionInfoMap.entrySet()) {
            PartitionInfo partitionInfo = (PartitionInfo) entry.getKey();
            ColumnStatistics colStats = (ColumnStatistics) entry.getValue();
            List<String> colNames = colStats.getStatsObj().stream().map(e -> e.getColName()).collect(Collectors.toList());
            long partWriteId = partitionInfo.writeId;
            long partId = partitionInfo.partitionId;
            Map<String, String> newParameter;
            if (!partIdToParaMap.containsKey(partId)) {
                newParameter = new HashMap<>();
                newParameter.put(COLUMN_STATS_ACCURATE, "TRUE");
                StatsSetupConst.setColumnStatsState(newParameter, colNames);
                statementInsert.setLong(1, partId);
                statementInsert.setString(2, newParameter.get(COLUMN_STATS_ACCURATE));
                numInsert++;
                statementInsert.addBatch();
                if (numInsert == maxBatchSize) {
                    LOG.debug(" Executing insert " + insert);
                    statementInsert.executeBatch();
                    numInsert = 0;
                }
            } else {
                String oldStats = partIdToParaMap.get(partId);
                Map<String, String> oldParameter = new HashMap<>();
                oldParameter.put(COLUMN_STATS_ACCURATE, oldStats);
                newParameter = new HashMap<>();
                newParameter.put(COLUMN_STATS_ACCURATE, oldStats);
                StatsSetupConst.setColumnStatsState(newParameter, colNames);
                if (isAcidTable) {
                    String errorMsg = ObjectStore.verifyStatsChangeCtx(colStats.getStatsDesc().getDbName() + "." + colStats.getStatsDesc().getTableName(), oldParameter, newParameter, writeId, validWriteIds, true);
                    if (errorMsg != null) {
                        throw new MetaException(errorMsg);
                    }
                }
                if (isAcidTable && (!areTxnStatsSupported || !ObjectStore.isCurrentStatsValidForTheQuery(oldParameter, partWriteId, validWriteIds, true))) {
                    statementDelete.setLong(1, partId);
                    statementDelete.addBatch();
                    numDelete++;
                    if (numDelete == maxBatchSize) {
                        statementDelete.executeBatch();
                        numDelete = 0;
                        LOG.debug("Removed COLUMN_STATS_ACCURATE from the parameters of the partition " + colStats.getStatsDesc().getDbName() + "." + colStats.getStatsDesc().getTableName() + "." + colStats.getStatsDesc().getPartName());
                    }
                } else {
                    statementUpdate.setString(1, newParameter.get(COLUMN_STATS_ACCURATE));
                    statementUpdate.setLong(2, partId);
                    statementUpdate.addBatch();
                    numUpdate++;
                    if (numUpdate == maxBatchSize) {
                        LOG.debug(" Executing update " + statementUpdate);
                        statementUpdate.executeBatch();
                        numUpdate = 0;
                    }
                }
            }
            result.put(partitionInfo.partitionName, newParameter);
        }
        if (numInsert != 0) {
            statementInsert.executeBatch();
        }
        if (numUpdate != 0) {
            statementUpdate.executeBatch();
        }
        if (numDelete != 0) {
            statementDelete.executeBatch();
        }
        if (isAcidTable) {
            updateWriteIdForPartitions(dbConn, writeId, partIdList);
        }
        return result;
    } finally {
        closeStmt(statementInsert);
        closeStmt(statementUpdate);
        closeStmt(statementDelete);
    }
}
Also used : UpdatePartitionColumnStatEventBatch(org.apache.hadoop.hive.metastore.events.UpdatePartitionColumnStatEventBatch) Connection(java.sql.Connection) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) LoggerFactory(org.slf4j.LoggerFactory) PersistenceManager(javax.jdo.PersistenceManager) HashMap(java.util.HashMap) ConfVars(org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) SQLException(java.sql.SQLException) StatsSetupConst(org.apache.hadoop.hive.common.StatsSetupConst) JDOConnection(javax.jdo.datastore.JDOConnection) COLUMN_STATS_ACCURATE(org.apache.hadoop.hive.common.StatsSetupConst.COLUMN_STATS_ACCURATE) ResultSet(java.sql.ResultSet) EventMessage(org.apache.hadoop.hive.metastore.messaging.EventMessage) SQLGenerator(org.apache.hadoop.hive.metastore.tools.SQLGenerator) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) UpdatePartitionColumnStatEvent(org.apache.hadoop.hive.metastore.events.UpdatePartitionColumnStatEvent) Logger(org.slf4j.Logger) HMSHandler.getPartValsFromName(org.apache.hadoop.hive.metastore.HMSHandler.getPartValsFromName) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Set(java.util.Set) PreparedStatement(java.sql.PreparedStatement) Collectors(java.util.stream.Collectors) Table(org.apache.hadoop.hive.metastore.api.Table) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) TxnUtils(org.apache.hadoop.hive.metastore.txn.TxnUtils) List(java.util.List) MetastoreConf(org.apache.hadoop.hive.metastore.conf.MetastoreConf) Statement(java.sql.Statement) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) HashMap(java.util.HashMap) PreparedStatement(java.sql.PreparedStatement) HashMap(java.util.HashMap) Map(java.util.Map) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Aggregations

Connection (java.sql.Connection)1 PreparedStatement (java.sql.PreparedStatement)1 ResultSet (java.sql.ResultSet)1 SQLException (java.sql.SQLException)1 Statement (java.sql.Statement)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 ReentrantLock (java.util.concurrent.locks.ReentrantLock)1 Collectors (java.util.stream.Collectors)1 PersistenceManager (javax.jdo.PersistenceManager)1 JDOConnection (javax.jdo.datastore.JDOConnection)1 Configuration (org.apache.hadoop.conf.Configuration)1 StatsSetupConst (org.apache.hadoop.hive.common.StatsSetupConst)1 COLUMN_STATS_ACCURATE (org.apache.hadoop.hive.common.StatsSetupConst.COLUMN_STATS_ACCURATE)1 HMSHandler.getPartValsFromName (org.apache.hadoop.hive.metastore.HMSHandler.getPartValsFromName)1 ColumnStatistics (org.apache.hadoop.hive.metastore.api.ColumnStatistics)1