use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class DirectSqlUpdateStat method populateInsertUpdateMap.
private void populateInsertUpdateMap(Map<PartitionInfo, ColumnStatistics> statsPartInfoMap, Map<PartColNameInfo, MPartitionColumnStatistics> updateMap, Map<PartColNameInfo, MPartitionColumnStatistics> insertMap, Connection dbConn) throws SQLException, MetaException, NoSuchObjectException {
StringBuilder prefix = new StringBuilder();
StringBuilder suffix = new StringBuilder();
Statement statement = null;
ResultSet rs = null;
List<String> queries = new ArrayList<>();
Set<PartColNameInfo> selectedParts = new HashSet<>();
List<Long> partIdList = statsPartInfoMap.keySet().stream().map(e -> e.partitionId).collect(Collectors.toList());
prefix.append("select \"PART_ID\", \"COLUMN_NAME\" from \"PART_COL_STATS\" WHERE ");
TxnUtils.buildQueryWithINClause(conf, queries, prefix, suffix, partIdList, "\"PART_ID\"", true, false);
for (String query : queries) {
try {
statement = dbConn.createStatement();
LOG.debug("Going to execute query " + query);
rs = statement.executeQuery(query);
while (rs.next()) {
selectedParts.add(new PartColNameInfo(rs.getLong(1), rs.getString(2)));
}
} finally {
close(rs, statement, null);
}
}
for (Map.Entry entry : statsPartInfoMap.entrySet()) {
PartitionInfo partitionInfo = (PartitionInfo) entry.getKey();
ColumnStatistics colStats = (ColumnStatistics) entry.getValue();
long partId = partitionInfo.partitionId;
ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
for (ColumnStatisticsObj statisticsObj : colStats.getStatsObj()) {
PartColNameInfo temp = new PartColNameInfo(partId, statisticsObj.getColName());
if (selectedParts.contains(temp)) {
updateMap.put(temp, StatObjectConverter.convertToMPartitionColumnStatistics(null, statsDesc, statisticsObj, colStats.getEngine()));
} else {
insertMap.put(temp, StatObjectConverter.convertToMPartitionColumnStatistics(null, statsDesc, statisticsObj, colStats.getEngine()));
}
}
}
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class TestPartitionStat method createPartColStats.
private ColumnStatistics createPartColStats(List<String> partValue, ColumnStatisticsData partitionStats) {
String pName = FileUtils.makePartName(Collections.singletonList(PART_COL_NAME), partValue);
ColumnStatistics colStats = new ColumnStatistics();
ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(false, DB_NAME, TABLE_NAME);
statsDesc.setPartName(pName);
colStats.setStatsDesc(statsDesc);
colStats.setEngine(HIVE_ENGINE);
ColumnStatisticsObj statObj = new ColumnStatisticsObj(PART_COL_NAME, "int", partitionStats);
colStats.addToStatsObj(statObj);
return colStats;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class MetaStoreDirectSql method getPartitionStats.
public List<ColumnStatistics> getPartitionStats(final String catName, final String dbName, final String tableName, final List<String> partNames, List<String> colNames, String engine, boolean enableBitVector) throws MetaException {
if (colNames.isEmpty() || partNames.isEmpty()) {
return Collections.emptyList();
}
final boolean doTrace = LOG.isDebugEnabled();
final String queryText0 = "select \"PARTITION_NAME\", " + getStatsList(enableBitVector) + " from " + " " + PART_COL_STATS + " where \"CAT_NAME\" = ? and \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and " + "\"COLUMN_NAME\"" + " in (%1$s) AND \"PARTITION_NAME\" in (%2$s) " + " and \"ENGINE\" = ? " + " order by \"PARTITION_NAME\"";
Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
@Override
public List<Object[]> run(final List<String> inputColNames) throws MetaException {
Batchable<String, Object[]> b2 = new Batchable<String, Object[]>() {
@Override
public List<Object[]> run(List<String> inputPartNames) throws MetaException {
String queryText = String.format(queryText0, makeParams(inputColNames.size()), makeParams(inputPartNames.size()));
long start = doTrace ? System.nanoTime() : 0;
Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
try {
Object qResult = executeWithArray(query, prepareParams(catName, dbName, tableName, inputPartNames, inputColNames, engine), queryText);
MetastoreDirectSqlUtils.timingTrace(doTrace, queryText0, start, (doTrace ? System.nanoTime() : 0));
if (qResult == null) {
return Collections.emptyList();
}
return MetastoreDirectSqlUtils.ensureList(qResult);
} finally {
addQueryAfterUse(query);
}
}
};
try {
return Batchable.runBatched(batchSize, partNames, b2);
} finally {
addQueryAfterUse(b2);
}
}
};
List<ColumnStatistics> result = new ArrayList<ColumnStatistics>(partNames.size());
String lastPartName = null;
int from = 0;
try {
List<Object[]> list = Batchable.runBatched(batchSize, colNames, b);
for (int i = 0; i <= list.size(); ++i) {
boolean isLast = i == list.size();
String partName = isLast ? null : (String) list.get(i)[0];
if (!isLast && partName.equals(lastPartName)) {
continue;
} else if (from != i) {
ColumnStatisticsDesc csd = new ColumnStatisticsDesc(false, dbName, tableName);
csd.setCatName(catName);
csd.setPartName(lastPartName);
result.add(makeColumnStats(list.subList(from, i), csd, 1, engine));
}
lastPartName = partName;
from = i;
Deadline.checkTimeout();
}
} finally {
b.closeAllQueries();
}
return result;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class ObjectStore method updateTableColumnStatistics.
@Override
public Map<String, String> updateTableColumnStatistics(ColumnStatistics colStats, String validWriteIds, long writeId) throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException {
boolean committed = false;
openTransaction();
try {
List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
// DataNucleus objects get detached all over the place for no (real) reason.
// So let's not use them anywhere unless absolutely necessary.
String catName = statsDesc.isSetCatName() ? statsDesc.getCatName() : getDefaultCatalog(conf);
MTable mTable = ensureGetMTable(catName, statsDesc.getDbName(), statsDesc.getTableName());
Table table = convertToTable(mTable);
List<String> colNames = new ArrayList<>();
for (ColumnStatisticsObj statsObj : statsObjs) {
colNames.add(statsObj.getColName());
}
Map<String, MTableColumnStatistics> oldStats = getPartitionColStats(table, colNames, colStats.getEngine());
for (ColumnStatisticsObj statsObj : statsObjs) {
MTableColumnStatistics mStatsObj = StatObjectConverter.convertToMTableColumnStatistics(mTable, statsDesc, statsObj, colStats.getEngine());
writeMTableColumnStatistics(table, mStatsObj, oldStats.get(statsObj.getColName()));
// There is no need to add colname again, otherwise we will get duplicate colNames.
}
// TODO: (HIVE-20109) ideally the col stats stats should be in colstats, not in the table!
// Set the table properties
// No need to check again if it exists.
String dbname = table.getDbName();
String name = table.getTableName();
MTable oldt = mTable;
Map<String, String> newParams = new HashMap<>(table.getParameters());
StatsSetupConst.setColumnStatsState(newParams, colNames);
boolean isTxn = TxnUtils.isTransactionalTable(oldt.getParameters());
if (isTxn) {
if (!areTxnStatsSupported) {
StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE);
} else {
String errorMsg = verifyStatsChangeCtx(TableName.getDbTable(dbname, name), oldt.getParameters(), newParams, writeId, validWriteIds, true);
if (errorMsg != null) {
throw new MetaException(errorMsg);
}
if (!isCurrentStatsValidForTheQuery(oldt, validWriteIds, true)) {
// Make sure we set the flag to invalid regardless of the current value.
StatsSetupConst.setBasicStatsState(newParams, StatsSetupConst.FALSE);
LOG.info("Removed COLUMN_STATS_ACCURATE from the parameters of the table " + dbname + "." + name);
}
oldt.setWriteId(writeId);
}
}
oldt.setParameters(newParams);
committed = commitTransaction();
// TODO: similar to update...Part, this used to do "return committed;"; makes little sense.
return committed ? newParams : null;
} finally {
if (!committed) {
rollbackTransaction();
}
}
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc in project hive by apache.
the class StatObjectConverter method getTableColumnStatisticsDesc.
public static ColumnStatisticsDesc getTableColumnStatisticsDesc(MTableColumnStatistics mStatsObj) {
ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc();
statsDesc.setIsTblLevel(true);
statsDesc.setCatName(mStatsObj.getCatName());
statsDesc.setDbName(mStatsObj.getDbName());
statsDesc.setTableName(mStatsObj.getTableName());
statsDesc.setLastAnalyzed(mStatsObj.getLastAnalyzed());
return statsDesc;
}
Aggregations