Search in sources :

Example 6 with MPartitionColumnStatistics

use of org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics in project hive by apache.

the class ObjectStore method deletePartitionColumnStatistics.

@Override
public boolean deletePartitionColumnStatistics(String catName, String dbName, String tableName, String partName, List<String> partVals, String colName, String engine) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException {
    boolean ret = false;
    Query query = null;
    dbName = org.apache.commons.lang3.StringUtils.defaultString(dbName, Warehouse.DEFAULT_DATABASE_NAME);
    catName = normalizeIdentifier(catName);
    if (tableName == null) {
        throw new InvalidInputException("Table name is null.");
    }
    try {
        openTransaction();
        MTable mTable = getMTable(catName, dbName, tableName);
        MPartitionColumnStatistics mStatsObj;
        List<MPartitionColumnStatistics> mStatsObjColl;
        if (mTable == null) {
            throw new NoSuchObjectException("Table " + tableName + "  for which stats deletion is requested doesn't exist");
        }
        // Note: this does not verify ACID state; called internally when removing cols/etc.
        // Also called via an unused metastore API that checks for ACID tables.
        MPartition mPartition = getMPartition(catName, dbName, tableName, partVals, mTable);
        if (mPartition == null) {
            throw new NoSuchObjectException("Partition " + partName + " for which stats deletion is requested doesn't exist");
        }
        query = pm.newQuery(MPartitionColumnStatistics.class);
        String filter;
        String parameters;
        if (colName != null) {
            filter = "partition.partitionName == t1 && dbName == t2 && tableName == t3 && " + "colName == t4 && catName == t5" + (engine != null ? " && engine == t6" : "");
            parameters = "java.lang.String t1, java.lang.String t2, " + "java.lang.String t3, java.lang.String t4, java.lang.String t5" + (engine != null ? ", java.lang.String t6" : "");
        } else {
            filter = "partition.partitionName == t1 && dbName == t2 && tableName == t3 && catName == t4" + (engine != null ? " && engine == t5" : "");
            parameters = "java.lang.String t1, java.lang.String t2, java.lang.String t3, java.lang.String t4" + (engine != null ? ", java.lang.String t5" : "");
        }
        query.setFilter(filter);
        query.declareParameters(parameters);
        if (colName != null) {
            query.setUnique(true);
            if (engine != null) {
                mStatsObj = (MPartitionColumnStatistics) query.executeWithArray(partName.trim(), normalizeIdentifier(dbName), normalizeIdentifier(tableName), normalizeIdentifier(colName), normalizeIdentifier(catName), engine);
            } else {
                mStatsObj = (MPartitionColumnStatistics) query.executeWithArray(partName.trim(), normalizeIdentifier(dbName), normalizeIdentifier(tableName), normalizeIdentifier(colName), normalizeIdentifier(catName));
            }
            pm.retrieve(mStatsObj);
            if (mStatsObj != null) {
                pm.deletePersistent(mStatsObj);
            } else {
                throw new NoSuchObjectException("Column stats doesn't exist for table=" + TableName.getQualified(catName, dbName, tableName) + " partition=" + partName + " col=" + colName);
            }
        } else {
            if (engine != null) {
                mStatsObjColl = (List<MPartitionColumnStatistics>) query.executeWithArray(partName.trim(), normalizeIdentifier(dbName), normalizeIdentifier(tableName), normalizeIdentifier(catName), engine);
            } else {
                mStatsObjColl = (List<MPartitionColumnStatistics>) query.executeWithArray(partName.trim(), normalizeIdentifier(dbName), normalizeIdentifier(tableName), normalizeIdentifier(catName));
            }
            pm.retrieveAll(mStatsObjColl);
            if (mStatsObjColl != null) {
                pm.deletePersistentAll(mStatsObjColl);
            } else {
                throw new NoSuchObjectException("Column stats don't exist for table=" + TableName.getQualified(catName, dbName, tableName) + " partition" + partName);
            }
        }
        ret = commitTransaction();
    } finally {
        rollbackAndCleanup(ret, query);
    }
    return ret;
}
Also used : InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) MTable(org.apache.hadoop.hive.metastore.model.MTable) ScheduledQuery(org.apache.hadoop.hive.metastore.api.ScheduledQuery) Query(javax.jdo.Query) MScheduledQuery(org.apache.hadoop.hive.metastore.model.MScheduledQuery) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) MPartition(org.apache.hadoop.hive.metastore.model.MPartition)

Example 7 with MPartitionColumnStatistics

use of org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics in project hive by apache.

the class DirectSqlUpdateStat method populateInsertUpdateMap.

private void populateInsertUpdateMap(Map<PartitionInfo, ColumnStatistics> statsPartInfoMap, Map<PartColNameInfo, MPartitionColumnStatistics> updateMap, Map<PartColNameInfo, MPartitionColumnStatistics> insertMap, Connection dbConn) throws SQLException, MetaException, NoSuchObjectException {
    StringBuilder prefix = new StringBuilder();
    StringBuilder suffix = new StringBuilder();
    Statement statement = null;
    ResultSet rs = null;
    List<String> queries = new ArrayList<>();
    Set<PartColNameInfo> selectedParts = new HashSet<>();
    List<Long> partIdList = statsPartInfoMap.keySet().stream().map(e -> e.partitionId).collect(Collectors.toList());
    prefix.append("select \"PART_ID\", \"COLUMN_NAME\" from \"PART_COL_STATS\" WHERE ");
    TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, partIdList, "\"PART_ID\"", true, false);
    for (String query : queries) {
        try {
            statement = dbConn.createStatement();
            LOG.debug("Going to execute query " + query);
            rs = statement.executeQuery(query);
            while (rs.next()) {
                selectedParts.add(new PartColNameInfo(rs.getLong(1), rs.getString(2)));
            }
        } finally {
            close(rs, statement, null);
        }
    }
    for (Map.Entry entry : statsPartInfoMap.entrySet()) {
        PartitionInfo partitionInfo = (PartitionInfo) entry.getKey();
        ColumnStatistics colStats = (ColumnStatistics) entry.getValue();
        long partId = partitionInfo.partitionId;
        ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
        for (ColumnStatisticsObj statisticsObj : colStats.getStatsObj()) {
            PartColNameInfo temp = new PartColNameInfo(partId, statisticsObj.getColName());
            if (selectedParts.contains(temp)) {
                updateMap.put(temp, StatObjectConverter.convertToMPartitionColumnStatistics(null, statsDesc, statisticsObj, colStats.getEngine()));
            } else {
                insertMap.put(temp, StatObjectConverter.convertToMPartitionColumnStatistics(null, statsDesc, statisticsObj, colStats.getEngine()));
            }
        }
    }
}
Also used : UpdatePartitionColumnStatEventBatch(org.apache.hadoop.hive.metastore.events.UpdatePartitionColumnStatEventBatch) Connection(java.sql.Connection) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) LoggerFactory(org.slf4j.LoggerFactory) PersistenceManager(javax.jdo.PersistenceManager) HashMap(java.util.HashMap) ConfVars(org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) SQLException(java.sql.SQLException) StatsSetupConst(org.apache.hadoop.hive.common.StatsSetupConst) JDOConnection(javax.jdo.datastore.JDOConnection) COLUMN_STATS_ACCURATE(org.apache.hadoop.hive.common.StatsSetupConst.COLUMN_STATS_ACCURATE) ResultSet(java.sql.ResultSet) EventMessage(org.apache.hadoop.hive.metastore.messaging.EventMessage) SQLGenerator(org.apache.hadoop.hive.metastore.tools.SQLGenerator) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) UpdatePartitionColumnStatEvent(org.apache.hadoop.hive.metastore.events.UpdatePartitionColumnStatEvent) Logger(org.slf4j.Logger) HMSHandler.getPartValsFromName(org.apache.hadoop.hive.metastore.HMSHandler.getPartValsFromName) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) Set(java.util.Set) PreparedStatement(java.sql.PreparedStatement) Collectors(java.util.stream.Collectors) Table(org.apache.hadoop.hive.metastore.api.Table) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) TxnUtils(org.apache.hadoop.hive.metastore.txn.TxnUtils) List(java.util.List) MetastoreConf(org.apache.hadoop.hive.metastore.conf.MetastoreConf) Statement(java.sql.Statement) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) ColumnStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatistics) PreparedStatement(java.sql.PreparedStatement) Statement(java.sql.Statement) ArrayList(java.util.ArrayList) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ColumnStatisticsDesc(org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc) ResultSet(java.sql.ResultSet) HashMap(java.util.HashMap) Map(java.util.Map) HashSet(java.util.HashSet)

Example 8 with MPartitionColumnStatistics

use of org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics in project hive by apache.

the class DirectSqlUpdateStat method insertIntoPartColStatTable.

private void insertIntoPartColStatTable(Map<PartColNameInfo, MPartitionColumnStatistics> insertMap, long maxCsId, Connection dbConn) throws SQLException, MetaException, NoSuchObjectException {
    PreparedStatement preparedStatement = null;
    int numRows = 0;
    String insert = "INSERT INTO \"PART_COL_STATS\" (\"CS_ID\", \"CAT_NAME\", \"DB_NAME\"," + "\"TABLE_NAME\", \"PARTITION_NAME\", \"COLUMN_NAME\", \"COLUMN_TYPE\", \"PART_ID\"," + " \"LONG_LOW_VALUE\", \"LONG_HIGH_VALUE\", \"DOUBLE_HIGH_VALUE\", \"DOUBLE_LOW_VALUE\"," + " \"BIG_DECIMAL_LOW_VALUE\", \"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", \"BIT_VECTOR\" ," + " \"AVG_COL_LEN\", \"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\", \"ENGINE\") values " + "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
    try {
        preparedStatement = dbConn.prepareStatement(insert);
        for (Map.Entry entry : insertMap.entrySet()) {
            PartColNameInfo partColNameInfo = (PartColNameInfo) entry.getKey();
            Long partId = partColNameInfo.partitionId;
            MPartitionColumnStatistics mPartitionColumnStatistics = (MPartitionColumnStatistics) entry.getValue();
            preparedStatement.setLong(1, maxCsId);
            preparedStatement.setString(2, mPartitionColumnStatistics.getCatName());
            preparedStatement.setString(3, mPartitionColumnStatistics.getDbName());
            preparedStatement.setString(4, mPartitionColumnStatistics.getTableName());
            preparedStatement.setString(5, mPartitionColumnStatistics.getPartitionName());
            preparedStatement.setString(6, mPartitionColumnStatistics.getColName());
            preparedStatement.setString(7, mPartitionColumnStatistics.getColType());
            preparedStatement.setLong(8, partId);
            preparedStatement.setObject(9, mPartitionColumnStatistics.getLongLowValue());
            preparedStatement.setObject(10, mPartitionColumnStatistics.getLongHighValue());
            preparedStatement.setObject(11, mPartitionColumnStatistics.getDoubleHighValue());
            preparedStatement.setObject(12, mPartitionColumnStatistics.getDoubleLowValue());
            preparedStatement.setString(13, mPartitionColumnStatistics.getDecimalLowValue());
            preparedStatement.setString(14, mPartitionColumnStatistics.getDecimalHighValue());
            preparedStatement.setObject(15, mPartitionColumnStatistics.getNumNulls());
            preparedStatement.setObject(16, mPartitionColumnStatistics.getNumDVs());
            preparedStatement.setObject(17, mPartitionColumnStatistics.getBitVector());
            preparedStatement.setObject(18, mPartitionColumnStatistics.getAvgColLen());
            preparedStatement.setObject(19, mPartitionColumnStatistics.getMaxColLen());
            preparedStatement.setObject(20, mPartitionColumnStatistics.getNumTrues());
            preparedStatement.setObject(21, mPartitionColumnStatistics.getNumFalses());
            preparedStatement.setLong(22, mPartitionColumnStatistics.getLastAnalyzed());
            preparedStatement.setString(23, mPartitionColumnStatistics.getEngine());
            maxCsId++;
            numRows++;
            preparedStatement.addBatch();
            if (numRows == maxBatchSize) {
                preparedStatement.executeBatch();
                numRows = 0;
            }
        }
        if (numRows != 0) {
            preparedStatement.executeBatch();
        }
    } finally {
        closeStmt(preparedStatement);
    }
}
Also used : PreparedStatement(java.sql.PreparedStatement) HashMap(java.util.HashMap) Map(java.util.Map) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics)

Example 9 with MPartitionColumnStatistics

use of org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics in project hive by apache.

the class DirectSqlUpdateStat method updatePartColStatTable.

private void updatePartColStatTable(Map<PartColNameInfo, MPartitionColumnStatistics> updateMap, Connection dbConn) throws SQLException, MetaException, NoSuchObjectException {
    PreparedStatement pst = null;
    for (Map.Entry entry : updateMap.entrySet()) {
        PartColNameInfo partColNameInfo = (PartColNameInfo) entry.getKey();
        Long partId = partColNameInfo.partitionId;
        MPartitionColumnStatistics mPartitionColumnStatistics = (MPartitionColumnStatistics) entry.getValue();
        String update = "UPDATE \"PART_COL_STATS\" SET ";
        update += StatObjectConverter.getUpdatedColumnSql(mPartitionColumnStatistics);
        update += " WHERE \"PART_ID\" = " + partId + " AND " + " \"COLUMN_NAME\" = " + quoteString(mPartitionColumnStatistics.getColName());
        try {
            pst = dbConn.prepareStatement(update);
            StatObjectConverter.initUpdatedColumnStatement(mPartitionColumnStatistics, pst);
            LOG.debug("Going to execute update " + update);
            int numUpdate = pst.executeUpdate();
            if (numUpdate != 1) {
                throw new MetaException("Invalid state of  PART_COL_STATS for PART_ID " + partId);
            }
        } finally {
            closeStmt(pst);
        }
    }
}
Also used : PreparedStatement(java.sql.PreparedStatement) HashMap(java.util.HashMap) Map(java.util.Map) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Example 10 with MPartitionColumnStatistics

use of org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics in project hive by apache.

the class ObjectStore method getMPartitionColumnStatistics.

private List<MPartitionColumnStatistics> getMPartitionColumnStatistics(Table table, List<String> partNames, List<String> colNames, String engine) throws MetaException {
    boolean committed = false;
    try {
        openTransaction();
        // table. TODO: we need verify the partition column instead
        try {
            validateTableCols(table, colNames);
        } catch (MetaException me) {
            LOG.warn("The table does not have the same column definition as its partition.");
        }
        List<MPartitionColumnStatistics> result = Collections.emptyList();
        try (Query query = pm.newQuery(MPartitionColumnStatistics.class)) {
            String paramStr = "java.lang.String t1, java.lang.String t2, java.lang.String t3, java.lang.String t4";
            String filter = "tableName == t1 && dbName == t2 && catName == t3 && engine == t4 && (";
            Object[] params = new Object[colNames.size() + partNames.size() + 4];
            int i = 0;
            params[i++] = table.getTableName();
            params[i++] = table.getDbName();
            params[i++] = table.isSetCatName() ? table.getCatName() : getDefaultCatalog(conf);
            params[i++] = engine;
            int firstI = i;
            for (String s : partNames) {
                filter += ((i == firstI) ? "" : " || ") + "partitionName == p" + i;
                paramStr += ", java.lang.String p" + i;
                params[i++] = s;
            }
            filter += ") && (";
            firstI = i;
            for (String s : colNames) {
                filter += ((i == firstI) ? "" : " || ") + "colName == c" + i;
                paramStr += ", java.lang.String c" + i;
                params[i++] = s;
            }
            filter += ")";
            query.setFilter(filter);
            query.declareParameters(paramStr);
            query.setOrdering("partitionName ascending");
            result = (List<MPartitionColumnStatistics>) query.executeWithArray(params);
            pm.retrieveAll(result);
            result = new ArrayList<>(result);
        } catch (Exception ex) {
            LOG.error("Error retrieving statistics via jdo", ex);
            throw new MetaException(ex.getMessage());
        }
        committed = commitTransaction();
        return result;
    } finally {
        if (!committed) {
            rollbackTransaction();
            return Collections.emptyList();
        }
    }
}
Also used : ScheduledQuery(org.apache.hadoop.hive.metastore.api.ScheduledQuery) Query(javax.jdo.Query) MScheduledQuery(org.apache.hadoop.hive.metastore.model.MScheduledQuery) MPartitionColumnStatistics(org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics) MConstraint(org.apache.hadoop.hive.metastore.model.MConstraint) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidInputException(org.apache.hadoop.hive.metastore.api.InvalidInputException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) SQLIntegrityConstraintViolationException(java.sql.SQLIntegrityConstraintViolationException) IOException(java.io.IOException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) InvalidPartitionException(org.apache.hadoop.hive.metastore.api.InvalidPartitionException) UnknownPartitionException(org.apache.hadoop.hive.metastore.api.UnknownPartitionException) InvalidObjectException(org.apache.hadoop.hive.metastore.api.InvalidObjectException) JDOException(javax.jdo.JDOException) MissingTableException(org.datanucleus.store.rdbms.exceptions.MissingTableException) SQLException(java.sql.SQLException) UnknownDBException(org.apache.hadoop.hive.metastore.api.UnknownDBException) TException(org.apache.thrift.TException) JDODataStoreException(javax.jdo.JDODataStoreException) JDOObjectNotFoundException(javax.jdo.JDOObjectNotFoundException) UnknownTableException(org.apache.hadoop.hive.metastore.api.UnknownTableException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Aggregations

MPartitionColumnStatistics (org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics)11 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)7 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)6 HashMap (java.util.HashMap)5 SQLException (java.sql.SQLException)4 ArrayList (java.util.ArrayList)4 Map (java.util.Map)4 PreparedStatement (java.sql.PreparedStatement)3 Query (javax.jdo.Query)3 ColumnStatisticsDesc (org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc)3 ColumnStatisticsObj (org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj)3 InvalidInputException (org.apache.hadoop.hive.metastore.api.InvalidInputException)3 IOException (java.io.IOException)2 Connection (java.sql.Connection)2 SQLIntegrityConstraintViolationException (java.sql.SQLIntegrityConstraintViolationException)2 List (java.util.List)2 JDODataStoreException (javax.jdo.JDODataStoreException)2 JDOException (javax.jdo.JDOException)2 JDOObjectNotFoundException (javax.jdo.JDOObjectNotFoundException)2 JDOConnection (javax.jdo.datastore.JDOConnection)2