use of org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics in project hive by apache.
the class ObjectStore method deletePartitionColumnStatistics.
@Override
public boolean deletePartitionColumnStatistics(String catName, String dbName, String tableName, String partName, List<String> partVals, String colName, String engine) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException {
boolean ret = false;
Query query = null;
dbName = org.apache.commons.lang3.StringUtils.defaultString(dbName, Warehouse.DEFAULT_DATABASE_NAME);
catName = normalizeIdentifier(catName);
if (tableName == null) {
throw new InvalidInputException("Table name is null.");
}
try {
openTransaction();
MTable mTable = getMTable(catName, dbName, tableName);
MPartitionColumnStatistics mStatsObj;
List<MPartitionColumnStatistics> mStatsObjColl;
if (mTable == null) {
throw new NoSuchObjectException("Table " + tableName + " for which stats deletion is requested doesn't exist");
}
// Note: this does not verify ACID state; called internally when removing cols/etc.
// Also called via an unused metastore API that checks for ACID tables.
MPartition mPartition = getMPartition(catName, dbName, tableName, partVals, mTable);
if (mPartition == null) {
throw new NoSuchObjectException("Partition " + partName + " for which stats deletion is requested doesn't exist");
}
query = pm.newQuery(MPartitionColumnStatistics.class);
String filter;
String parameters;
if (colName != null) {
filter = "partition.partitionName == t1 && dbName == t2 && tableName == t3 && " + "colName == t4 && catName == t5" + (engine != null ? " && engine == t6" : "");
parameters = "java.lang.String t1, java.lang.String t2, " + "java.lang.String t3, java.lang.String t4, java.lang.String t5" + (engine != null ? ", java.lang.String t6" : "");
} else {
filter = "partition.partitionName == t1 && dbName == t2 && tableName == t3 && catName == t4" + (engine != null ? " && engine == t5" : "");
parameters = "java.lang.String t1, java.lang.String t2, java.lang.String t3, java.lang.String t4" + (engine != null ? ", java.lang.String t5" : "");
}
query.setFilter(filter);
query.declareParameters(parameters);
if (colName != null) {
query.setUnique(true);
if (engine != null) {
mStatsObj = (MPartitionColumnStatistics) query.executeWithArray(partName.trim(), normalizeIdentifier(dbName), normalizeIdentifier(tableName), normalizeIdentifier(colName), normalizeIdentifier(catName), engine);
} else {
mStatsObj = (MPartitionColumnStatistics) query.executeWithArray(partName.trim(), normalizeIdentifier(dbName), normalizeIdentifier(tableName), normalizeIdentifier(colName), normalizeIdentifier(catName));
}
pm.retrieve(mStatsObj);
if (mStatsObj != null) {
pm.deletePersistent(mStatsObj);
} else {
throw new NoSuchObjectException("Column stats doesn't exist for table=" + TableName.getQualified(catName, dbName, tableName) + " partition=" + partName + " col=" + colName);
}
} else {
if (engine != null) {
mStatsObjColl = (List<MPartitionColumnStatistics>) query.executeWithArray(partName.trim(), normalizeIdentifier(dbName), normalizeIdentifier(tableName), normalizeIdentifier(catName), engine);
} else {
mStatsObjColl = (List<MPartitionColumnStatistics>) query.executeWithArray(partName.trim(), normalizeIdentifier(dbName), normalizeIdentifier(tableName), normalizeIdentifier(catName));
}
pm.retrieveAll(mStatsObjColl);
if (mStatsObjColl != null) {
pm.deletePersistentAll(mStatsObjColl);
} else {
throw new NoSuchObjectException("Column stats don't exist for table=" + TableName.getQualified(catName, dbName, tableName) + " partition" + partName);
}
}
ret = commitTransaction();
} finally {
rollbackAndCleanup(ret, query);
}
return ret;
}
use of org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics in project hive by apache.
the class DirectSqlUpdateStat method populateInsertUpdateMap.
private void populateInsertUpdateMap(Map<PartitionInfo, ColumnStatistics> statsPartInfoMap, Map<PartColNameInfo, MPartitionColumnStatistics> updateMap, Map<PartColNameInfo, MPartitionColumnStatistics> insertMap, Connection dbConn) throws SQLException, MetaException, NoSuchObjectException {
StringBuilder prefix = new StringBuilder();
StringBuilder suffix = new StringBuilder();
Statement statement = null;
ResultSet rs = null;
List<String> queries = new ArrayList<>();
Set<PartColNameInfo> selectedParts = new HashSet<>();
List<Long> partIdList = statsPartInfoMap.keySet().stream().map(e -> e.partitionId).collect(Collectors.toList());
prefix.append("select \"PART_ID\", \"COLUMN_NAME\" from \"PART_COL_STATS\" WHERE ");
TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, partIdList, "\"PART_ID\"", true, false);
for (String query : queries) {
try {
statement = dbConn.createStatement();
LOG.debug("Going to execute query " + query);
rs = statement.executeQuery(query);
while (rs.next()) {
selectedParts.add(new PartColNameInfo(rs.getLong(1), rs.getString(2)));
}
} finally {
close(rs, statement, null);
}
}
for (Map.Entry entry : statsPartInfoMap.entrySet()) {
PartitionInfo partitionInfo = (PartitionInfo) entry.getKey();
ColumnStatistics colStats = (ColumnStatistics) entry.getValue();
long partId = partitionInfo.partitionId;
ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
for (ColumnStatisticsObj statisticsObj : colStats.getStatsObj()) {
PartColNameInfo temp = new PartColNameInfo(partId, statisticsObj.getColName());
if (selectedParts.contains(temp)) {
updateMap.put(temp, StatObjectConverter.convertToMPartitionColumnStatistics(null, statsDesc, statisticsObj, colStats.getEngine()));
} else {
insertMap.put(temp, StatObjectConverter.convertToMPartitionColumnStatistics(null, statsDesc, statisticsObj, colStats.getEngine()));
}
}
}
}
use of org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics in project hive by apache.
the class DirectSqlUpdateStat method insertIntoPartColStatTable.
private void insertIntoPartColStatTable(Map<PartColNameInfo, MPartitionColumnStatistics> insertMap, long maxCsId, Connection dbConn) throws SQLException, MetaException, NoSuchObjectException {
PreparedStatement preparedStatement = null;
int numRows = 0;
String insert = "INSERT INTO \"PART_COL_STATS\" (\"CS_ID\", \"CAT_NAME\", \"DB_NAME\"," + "\"TABLE_NAME\", \"PARTITION_NAME\", \"COLUMN_NAME\", \"COLUMN_TYPE\", \"PART_ID\"," + " \"LONG_LOW_VALUE\", \"LONG_HIGH_VALUE\", \"DOUBLE_HIGH_VALUE\", \"DOUBLE_LOW_VALUE\"," + " \"BIG_DECIMAL_LOW_VALUE\", \"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", \"BIT_VECTOR\" ," + " \"AVG_COL_LEN\", \"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\", \"ENGINE\") values " + "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)";
try {
preparedStatement = dbConn.prepareStatement(insert);
for (Map.Entry entry : insertMap.entrySet()) {
PartColNameInfo partColNameInfo = (PartColNameInfo) entry.getKey();
Long partId = partColNameInfo.partitionId;
MPartitionColumnStatistics mPartitionColumnStatistics = (MPartitionColumnStatistics) entry.getValue();
preparedStatement.setLong(1, maxCsId);
preparedStatement.setString(2, mPartitionColumnStatistics.getCatName());
preparedStatement.setString(3, mPartitionColumnStatistics.getDbName());
preparedStatement.setString(4, mPartitionColumnStatistics.getTableName());
preparedStatement.setString(5, mPartitionColumnStatistics.getPartitionName());
preparedStatement.setString(6, mPartitionColumnStatistics.getColName());
preparedStatement.setString(7, mPartitionColumnStatistics.getColType());
preparedStatement.setLong(8, partId);
preparedStatement.setObject(9, mPartitionColumnStatistics.getLongLowValue());
preparedStatement.setObject(10, mPartitionColumnStatistics.getLongHighValue());
preparedStatement.setObject(11, mPartitionColumnStatistics.getDoubleHighValue());
preparedStatement.setObject(12, mPartitionColumnStatistics.getDoubleLowValue());
preparedStatement.setString(13, mPartitionColumnStatistics.getDecimalLowValue());
preparedStatement.setString(14, mPartitionColumnStatistics.getDecimalHighValue());
preparedStatement.setObject(15, mPartitionColumnStatistics.getNumNulls());
preparedStatement.setObject(16, mPartitionColumnStatistics.getNumDVs());
preparedStatement.setObject(17, mPartitionColumnStatistics.getBitVector());
preparedStatement.setObject(18, mPartitionColumnStatistics.getAvgColLen());
preparedStatement.setObject(19, mPartitionColumnStatistics.getMaxColLen());
preparedStatement.setObject(20, mPartitionColumnStatistics.getNumTrues());
preparedStatement.setObject(21, mPartitionColumnStatistics.getNumFalses());
preparedStatement.setLong(22, mPartitionColumnStatistics.getLastAnalyzed());
preparedStatement.setString(23, mPartitionColumnStatistics.getEngine());
maxCsId++;
numRows++;
preparedStatement.addBatch();
if (numRows == maxBatchSize) {
preparedStatement.executeBatch();
numRows = 0;
}
}
if (numRows != 0) {
preparedStatement.executeBatch();
}
} finally {
closeStmt(preparedStatement);
}
}
use of org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics in project hive by apache.
the class DirectSqlUpdateStat method updatePartColStatTable.
private void updatePartColStatTable(Map<PartColNameInfo, MPartitionColumnStatistics> updateMap, Connection dbConn) throws SQLException, MetaException, NoSuchObjectException {
PreparedStatement pst = null;
for (Map.Entry entry : updateMap.entrySet()) {
PartColNameInfo partColNameInfo = (PartColNameInfo) entry.getKey();
Long partId = partColNameInfo.partitionId;
MPartitionColumnStatistics mPartitionColumnStatistics = (MPartitionColumnStatistics) entry.getValue();
String update = "UPDATE \"PART_COL_STATS\" SET ";
update += StatObjectConverter.getUpdatedColumnSql(mPartitionColumnStatistics);
update += " WHERE \"PART_ID\" = " + partId + " AND " + " \"COLUMN_NAME\" = " + quoteString(mPartitionColumnStatistics.getColName());
try {
pst = dbConn.prepareStatement(update);
StatObjectConverter.initUpdatedColumnStatement(mPartitionColumnStatistics, pst);
LOG.debug("Going to execute update " + update);
int numUpdate = pst.executeUpdate();
if (numUpdate != 1) {
throw new MetaException("Invalid state of PART_COL_STATS for PART_ID " + partId);
}
} finally {
closeStmt(pst);
}
}
}
use of org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics in project hive by apache.
the class ObjectStore method getMPartitionColumnStatistics.
private List<MPartitionColumnStatistics> getMPartitionColumnStatistics(Table table, List<String> partNames, List<String> colNames, String engine) throws MetaException {
boolean committed = false;
try {
openTransaction();
// table. TODO: we need verify the partition column instead
try {
validateTableCols(table, colNames);
} catch (MetaException me) {
LOG.warn("The table does not have the same column definition as its partition.");
}
List<MPartitionColumnStatistics> result = Collections.emptyList();
try (Query query = pm.newQuery(MPartitionColumnStatistics.class)) {
String paramStr = "java.lang.String t1, java.lang.String t2, java.lang.String t3, java.lang.String t4";
String filter = "tableName == t1 && dbName == t2 && catName == t3 && engine == t4 && (";
Object[] params = new Object[colNames.size() + partNames.size() + 4];
int i = 0;
params[i++] = table.getTableName();
params[i++] = table.getDbName();
params[i++] = table.isSetCatName() ? table.getCatName() : getDefaultCatalog(conf);
params[i++] = engine;
int firstI = i;
for (String s : partNames) {
filter += ((i == firstI) ? "" : " || ") + "partitionName == p" + i;
paramStr += ", java.lang.String p" + i;
params[i++] = s;
}
filter += ") && (";
firstI = i;
for (String s : colNames) {
filter += ((i == firstI) ? "" : " || ") + "colName == c" + i;
paramStr += ", java.lang.String c" + i;
params[i++] = s;
}
filter += ")";
query.setFilter(filter);
query.declareParameters(paramStr);
query.setOrdering("partitionName ascending");
result = (List<MPartitionColumnStatistics>) query.executeWithArray(params);
pm.retrieveAll(result);
result = new ArrayList<>(result);
} catch (Exception ex) {
LOG.error("Error retrieving statistics via jdo", ex);
throw new MetaException(ex.getMessage());
}
committed = commitTransaction();
return result;
} finally {
if (!committed) {
rollbackTransaction();
return Collections.emptyList();
}
}
}
Aggregations