use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class HiveAlterHandler method updateOrGetPartitionColumnStats.
private ColumnStatistics updateOrGetPartitionColumnStats(RawStore msdb, String dbname, String tblname, List<String> partVals, List<FieldSchema> oldCols, Table table, Partition part, List<FieldSchema> newCols) throws MetaException, InvalidObjectException {
ColumnStatistics newPartsColStats = null;
try {
// if newCols are not specified, use default ones.
if (newCols == null) {
newCols = part.getSd() == null ? new ArrayList<>() : part.getSd().getCols();
}
String oldPartName = Warehouse.makePartName(table.getPartitionKeys(), partVals);
String newPartName = Warehouse.makePartName(table.getPartitionKeys(), part.getValues());
boolean rename = !part.getDbName().equals(dbname) || !part.getTableName().equals(tblname) || !oldPartName.equals(newPartName);
// do not need to update column stats if alter partition is not for rename or changing existing columns
if (!rename && MetaStoreUtils.columnsIncludedByNameType(oldCols, newCols)) {
return newPartsColStats;
}
List<String> oldColNames = new ArrayList<>(oldCols.size());
for (FieldSchema oldCol : oldCols) {
oldColNames.add(oldCol.getName());
}
List<String> oldPartNames = Lists.newArrayList(oldPartName);
List<ColumnStatistics> partsColStats = msdb.getPartitionColumnStatistics(dbname, tblname, oldPartNames, oldColNames);
assert (partsColStats.size() <= 1);
for (ColumnStatistics partColStats : partsColStats) {
// actually only at most one loop
List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
List<ColumnStatisticsObj> statsObjs = partColStats.getStatsObj();
List<String> deletedCols = new ArrayList<>();
for (ColumnStatisticsObj statsObj : statsObjs) {
boolean found = false;
for (FieldSchema newCol : newCols) {
if (statsObj.getColName().equalsIgnoreCase(newCol.getName()) && statsObj.getColType().equalsIgnoreCase(newCol.getType())) {
found = true;
break;
}
}
if (found) {
if (rename) {
msdb.deletePartitionColumnStatistics(dbname, tblname, partColStats.getStatsDesc().getPartName(), partVals, statsObj.getColName());
newStatsObjs.add(statsObj);
}
} else {
msdb.deletePartitionColumnStatistics(dbname, tblname, partColStats.getStatsDesc().getPartName(), partVals, statsObj.getColName());
deletedCols.add(statsObj.getColName());
}
}
StatsSetupConst.removeColumnStatsState(part.getParameters(), deletedCols);
if (!newStatsObjs.isEmpty()) {
partColStats.setStatsObj(newStatsObjs);
newPartsColStats = partColStats;
}
}
} catch (NoSuchObjectException nsoe) {
// ignore this exception, actually this exception won't be thrown from getPartitionColumnStatistics
} catch (InvalidInputException iie) {
throw new InvalidObjectException("Invalid input to delete partition column stats." + iie);
}
return newPartsColStats;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class HiveAlterHandler method alterTable.
@Override
public void alterTable(RawStore msdb, Warehouse wh, String dbname, String name, Table newt, EnvironmentContext environmentContext, IHMSHandler handler) throws InvalidOperationException, MetaException {
name = name.toLowerCase();
dbname = dbname.toLowerCase();
final boolean cascade = environmentContext != null && environmentContext.isSetProperties() && StatsSetupConst.TRUE.equals(environmentContext.getProperties().get(StatsSetupConst.CASCADE));
if (newt == null) {
throw new InvalidOperationException("New table is invalid: " + newt);
}
String newTblName = newt.getTableName().toLowerCase();
String newDbName = newt.getDbName().toLowerCase();
if (!MetaStoreUtils.validateName(newTblName, handler.getConf())) {
throw new InvalidOperationException(newTblName + " is not a valid object name");
}
String validate = MetaStoreUtils.validateTblColumns(newt.getSd().getCols());
if (validate != null) {
throw new InvalidOperationException("Invalid column " + validate);
}
Path srcPath = null;
FileSystem srcFs;
Path destPath = null;
FileSystem destFs = null;
boolean success = false;
boolean dataWasMoved = false;
Table oldt;
List<TransactionalMetaStoreEventListener> transactionalListeners = null;
if (handler != null) {
transactionalListeners = handler.getTransactionalListeners();
}
try {
boolean rename = false;
boolean isPartitionedTable = false;
List<Partition> parts;
// check if table with the new name already exists
if (!newTblName.equals(name) || !newDbName.equals(dbname)) {
if (msdb.getTable(newDbName, newTblName) != null) {
throw new InvalidOperationException("new table " + newDbName + "." + newTblName + " already exists");
}
rename = true;
}
msdb.openTransaction();
// get old table
oldt = msdb.getTable(dbname, name);
if (oldt == null) {
throw new InvalidOperationException("table " + dbname + "." + name + " doesn't exist");
}
if (oldt.getPartitionKeysSize() != 0) {
isPartitionedTable = true;
}
// be done only for non-views.
if (MetastoreConf.getBoolVar(handler.getConf(), MetastoreConf.ConfVars.DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES) && !oldt.getTableType().equals(TableType.VIRTUAL_VIEW.toString())) {
// Throws InvalidOperationException if the new column types are not
// compatible with the current column types.
checkColTypeChangeCompatible(oldt.getSd().getCols(), newt.getSd().getCols());
}
// check that partition keys have not changed, except for virtual views
// however, allow the partition comments to change
boolean partKeysPartiallyEqual = checkPartialPartKeysEqual(oldt.getPartitionKeys(), newt.getPartitionKeys());
if (!oldt.getTableType().equals(TableType.VIRTUAL_VIEW.toString())) {
if (!partKeysPartiallyEqual) {
throw new InvalidOperationException("partition keys can not be changed.");
}
}
// 4) the table was not initially created with a specified location
if (rename && !oldt.getTableType().equals(TableType.VIRTUAL_VIEW.toString()) && (oldt.getSd().getLocation().compareTo(newt.getSd().getLocation()) == 0 || StringUtils.isEmpty(newt.getSd().getLocation())) && !MetaStoreUtils.isExternalTable(oldt)) {
Database olddb = msdb.getDatabase(dbname);
// if a table was created in a user specified location using the DDL like
// create table tbl ... location ...., it should be treated like an external table
// in the table rename, its data location should not be changed. We can check
// if the table directory was created directly under its database directory to tell
// if it is such a table
srcPath = new Path(oldt.getSd().getLocation());
String oldtRelativePath = (new Path(olddb.getLocationUri()).toUri()).relativize(srcPath.toUri()).toString();
boolean tableInSpecifiedLoc = !oldtRelativePath.equalsIgnoreCase(name) && !oldtRelativePath.equalsIgnoreCase(name + Path.SEPARATOR);
if (!tableInSpecifiedLoc) {
srcFs = wh.getFs(srcPath);
// get new location
Database db = msdb.getDatabase(newDbName);
Path databasePath = constructRenamedPath(wh.getDatabasePath(db), srcPath);
destPath = new Path(databasePath, newTblName);
destFs = wh.getFs(destPath);
newt.getSd().setLocation(destPath.toString());
// check that src and dest are on the same file system
if (!FileUtils.equalsFileSystem(srcFs, destFs)) {
throw new InvalidOperationException("table new location " + destPath + " is on a different file system than the old location " + srcPath + ". This operation is not supported");
}
try {
if (destFs.exists(destPath)) {
throw new InvalidOperationException("New location for this table " + newDbName + "." + newTblName + " already exists : " + destPath);
}
// check that src exists and also checks permissions necessary, rename src to dest
if (srcFs.exists(srcPath) && wh.renameDir(srcPath, destPath, true)) {
dataWasMoved = true;
}
} catch (IOException | MetaException e) {
LOG.error("Alter Table operation for " + dbname + "." + name + " failed.", e);
throw new InvalidOperationException("Alter Table operation for " + dbname + "." + name + " failed to move data due to: '" + getSimpleMessage(e) + "' See hive log file for details.");
}
}
if (isPartitionedTable) {
String oldTblLocPath = srcPath.toUri().getPath();
String newTblLocPath = dataWasMoved ? destPath.toUri().getPath() : null;
// also the location field in partition
parts = msdb.getPartitions(dbname, name, -1);
Map<Partition, ColumnStatistics> columnStatsNeedUpdated = new HashMap<>();
for (Partition part : parts) {
String oldPartLoc = part.getSd().getLocation();
if (dataWasMoved && oldPartLoc.contains(oldTblLocPath)) {
URI oldUri = new Path(oldPartLoc).toUri();
String newPath = oldUri.getPath().replace(oldTblLocPath, newTblLocPath);
Path newPartLocPath = new Path(oldUri.getScheme(), oldUri.getAuthority(), newPath);
part.getSd().setLocation(newPartLocPath.toString());
}
part.setDbName(newDbName);
part.setTableName(newTblName);
ColumnStatistics colStats = updateOrGetPartitionColumnStats(msdb, dbname, name, part.getValues(), part.getSd().getCols(), oldt, part, null);
if (colStats != null) {
columnStatsNeedUpdated.put(part, colStats);
}
}
msdb.alterTable(dbname, name, newt);
// alterPartition is only for changing the partition location in the table rename
if (dataWasMoved) {
int partsToProcess = parts.size();
int partitionBatchSize = MetastoreConf.getIntVar(handler.getConf(), MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX);
int batchStart = 0;
while (partsToProcess > 0) {
int batchEnd = Math.min(batchStart + partitionBatchSize, parts.size());
List<Partition> partBatch = parts.subList(batchStart, batchEnd);
int partBatchSize = partBatch.size();
partsToProcess -= partBatchSize;
batchStart += partBatchSize;
List<List<String>> partValues = new ArrayList<>(partBatchSize);
for (Partition part : partBatch) {
partValues.add(part.getValues());
}
msdb.alterPartitions(newDbName, newTblName, partValues, partBatch);
}
}
for (Entry<Partition, ColumnStatistics> partColStats : columnStatsNeedUpdated.entrySet()) {
ColumnStatistics newPartColStats = partColStats.getValue();
newPartColStats.getStatsDesc().setDbName(newDbName);
newPartColStats.getStatsDesc().setTableName(newTblName);
msdb.updatePartitionColumnStatistics(newPartColStats, partColStats.getKey().getValues());
}
} else {
alterTableUpdateTableColumnStats(msdb, oldt, newt);
}
} else {
// operations other than table rename
if (MetaStoreUtils.requireCalStats(null, null, newt, environmentContext) && !isPartitionedTable) {
Database db = msdb.getDatabase(newDbName);
// Update table stats. For partitioned table, we update stats in alterPartition()
MetaStoreUtils.updateTableStatsFast(db, newt, wh, false, true, environmentContext, false);
}
if (isPartitionedTable) {
// Currently only column related changes can be cascaded in alter table
if (!MetaStoreUtils.areSameColumns(oldt.getSd().getCols(), newt.getSd().getCols())) {
parts = msdb.getPartitions(dbname, name, -1);
for (Partition part : parts) {
Partition oldPart = new Partition(part);
List<FieldSchema> oldCols = part.getSd().getCols();
part.getSd().setCols(newt.getSd().getCols());
ColumnStatistics colStats = updateOrGetPartitionColumnStats(msdb, dbname, name, part.getValues(), oldCols, oldt, part, null);
assert (colStats == null);
if (cascade) {
msdb.alterPartition(dbname, name, part.getValues(), part);
} else {
// update changed properties (stats)
oldPart.setParameters(part.getParameters());
msdb.alterPartition(dbname, name, part.getValues(), oldPart);
}
}
msdb.alterTable(dbname, name, newt);
} else {
LOG.warn("Alter table not cascaded to partitions.");
alterTableUpdateTableColumnStats(msdb, oldt, newt);
}
} else {
alterTableUpdateTableColumnStats(msdb, oldt, newt);
}
}
if (transactionalListeners != null && !transactionalListeners.isEmpty()) {
if (oldt.getDbName().equalsIgnoreCase(newt.getDbName())) {
MetaStoreListenerNotifier.notifyEvent(transactionalListeners, EventMessage.EventType.ALTER_TABLE, new AlterTableEvent(oldt, newt, false, true, handler), environmentContext);
} else {
MetaStoreListenerNotifier.notifyEvent(transactionalListeners, EventMessage.EventType.DROP_TABLE, new DropTableEvent(oldt, true, false, handler), environmentContext);
MetaStoreListenerNotifier.notifyEvent(transactionalListeners, EventMessage.EventType.CREATE_TABLE, new CreateTableEvent(newt, true, handler), environmentContext);
if (isPartitionedTable) {
parts = msdb.getPartitions(newt.getDbName(), newt.getTableName(), -1);
MetaStoreListenerNotifier.notifyEvent(transactionalListeners, EventMessage.EventType.ADD_PARTITION, new AddPartitionEvent(newt, parts, true, handler), environmentContext);
}
}
}
// commit the changes
success = msdb.commitTransaction();
} catch (InvalidObjectException e) {
LOG.debug("Failed to get object from Metastore ", e);
throw new InvalidOperationException("Unable to change partition or table." + " Check metastore logs for detailed stack." + e.getMessage());
} catch (InvalidInputException e) {
LOG.debug("Accessing Metastore failed due to invalid input ", e);
throw new InvalidOperationException("Unable to change partition or table." + " Check metastore logs for detailed stack." + e.getMessage());
} catch (NoSuchObjectException e) {
LOG.debug("Object not found in metastore ", e);
throw new InvalidOperationException("Unable to change partition or table. Database " + dbname + " does not exist" + " Check metastore logs for detailed stack." + e.getMessage());
} finally {
if (!success) {
LOG.error("Failed to alter table " + dbname + "." + name);
msdb.rollbackTransaction();
if (dataWasMoved) {
try {
if (destFs.exists(destPath)) {
if (!destFs.rename(destPath, srcPath)) {
LOG.error("Failed to restore data from " + destPath + " to " + srcPath + " in alter table failure. Manual restore is needed.");
}
}
} catch (IOException e) {
LOG.error("Failed to restore data from " + destPath + " to " + srcPath + " in alter table failure. Manual restore is needed.");
}
}
}
}
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class MetaStoreDirectSql method getPartitionStats.
public List<ColumnStatistics> getPartitionStats(final String dbName, final String tableName, final List<String> partNames, List<String> colNames, boolean enableBitVector) throws MetaException {
if (colNames.isEmpty() || partNames.isEmpty()) {
return Collections.emptyList();
}
final boolean doTrace = LOG.isDebugEnabled();
final String queryText0 = "select \"PARTITION_NAME\", " + getStatsList(enableBitVector) + " from " + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\"" + " in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by \"PARTITION_NAME\"";
Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
@Override
public List<Object[]> run(final List<String> inputColNames) throws MetaException {
Batchable<String, Object[]> b2 = new Batchable<String, Object[]>() {
@Override
public List<Object[]> run(List<String> inputPartNames) throws MetaException {
String queryText = String.format(queryText0, makeParams(inputColNames.size()), makeParams(inputPartNames.size()));
long start = doTrace ? System.nanoTime() : 0;
Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
Object qResult = executeWithArray(query, prepareParams(dbName, tableName, inputPartNames, inputColNames), queryText);
timingTrace(doTrace, queryText0, start, (doTrace ? System.nanoTime() : 0));
if (qResult == null) {
query.closeAll();
return Collections.emptyList();
}
addQueryAfterUse(query);
return ensureList(qResult);
}
};
try {
return runBatched(partNames, b2);
} finally {
addQueryAfterUse(b2);
}
}
};
List<Object[]> list = runBatched(colNames, b);
List<ColumnStatistics> result = new ArrayList<ColumnStatistics>(Math.min(list.size(), partNames.size()));
String lastPartName = null;
int from = 0;
for (int i = 0; i <= list.size(); ++i) {
boolean isLast = i == list.size();
String partName = isLast ? null : (String) list.get(i)[0];
if (!isLast && partName.equals(lastPartName)) {
continue;
} else if (from != i) {
ColumnStatisticsDesc csd = new ColumnStatisticsDesc(false, dbName, tableName);
csd.setPartName(lastPartName);
result.add(makeColumnStats(list.subList(from, i), csd, 1));
}
lastPartName = partName;
from = i;
Deadline.checkTimeout();
}
b.closeAllQueries();
return result;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class CachedStore method getTableColumnStatistics.
@Override
public ColumnStatistics getTableColumnStatistics(String dbName, String tblName, List<String> colNames) throws MetaException, NoSuchObjectException {
dbName = StringUtils.normalizeIdentifier(dbName);
tblName = StringUtils.normalizeIdentifier(tblName);
if (!shouldCacheTable(dbName, tblName)) {
return rawStore.getTableColumnStatistics(dbName, tblName, colNames);
}
Table table = sharedCache.getTableFromCache(dbName, tblName);
if (table == null) {
// The table is not yet loaded in cache
return rawStore.getTableColumnStatistics(dbName, tblName, colNames);
}
ColumnStatisticsDesc csd = new ColumnStatisticsDesc(true, dbName, tblName);
List<ColumnStatisticsObj> colStatObjs = sharedCache.getTableColStatsFromCache(dbName, tblName, colNames);
return new ColumnStatistics(csd, colStatObjs);
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class CachedStore method prewarm.
@VisibleForTesting
static /**
* This initializes the caches in SharedCache by getting the objects from Metastore DB via
* ObjectStore and populating the respective caches
*
* @param rawStore
* @throws Exception
*/
void prewarm(RawStore rawStore) {
if (isCachePrewarmed.get()) {
return;
}
long startTime = System.nanoTime();
LOG.info("Prewarming CachedStore");
while (!isCachePrewarmed.get()) {
// Prevents throwing exceptions in our raw store calls since we're not using RawStoreProxy
Deadline.registerIfNot(1000000);
List<String> dbNames;
try {
dbNames = rawStore.getAllDatabases();
} catch (MetaException e) {
// Try again
continue;
}
LOG.info("Number of databases to prewarm: {}", dbNames.size());
List<Database> databases = new ArrayList<>(dbNames.size());
for (String dbName : dbNames) {
try {
databases.add(rawStore.getDatabase(dbName));
} catch (NoSuchObjectException e) {
// Continue with next database
continue;
}
}
sharedCache.populateDatabasesInCache(databases);
LOG.debug("Databases cache is now prewarmed. Now adding tables, partitions and statistics to the cache");
int numberOfDatabasesCachedSoFar = 0;
for (String dbName : dbNames) {
dbName = StringUtils.normalizeIdentifier(dbName);
List<String> tblNames;
try {
tblNames = rawStore.getAllTables(dbName);
} catch (MetaException e) {
// Continue with next database
continue;
}
int numberOfTablesCachedSoFar = 0;
for (String tblName : tblNames) {
tblName = StringUtils.normalizeIdentifier(tblName);
if (!shouldCacheTable(dbName, tblName)) {
continue;
}
Table table;
try {
table = rawStore.getTable(dbName, tblName);
} catch (MetaException e) {
// in that case, continue with the next table
continue;
}
List<String> colNames = MetaStoreUtils.getColumnNamesForTable(table);
try {
ColumnStatistics tableColStats = null;
List<Partition> partitions = null;
List<ColumnStatistics> partitionColStats = null;
AggrStats aggrStatsAllPartitions = null;
AggrStats aggrStatsAllButDefaultPartition = null;
if (table.isSetPartitionKeys()) {
Deadline.startTimer("getPartitions");
partitions = rawStore.getPartitions(dbName, tblName, Integer.MAX_VALUE);
Deadline.stopTimer();
List<String> partNames = new ArrayList<>(partitions.size());
for (Partition p : partitions) {
partNames.add(Warehouse.makePartName(table.getPartitionKeys(), p.getValues()));
}
if (!partNames.isEmpty()) {
// Get partition column stats for this table
Deadline.startTimer("getPartitionColumnStatistics");
partitionColStats = rawStore.getPartitionColumnStatistics(dbName, tblName, partNames, colNames);
Deadline.stopTimer();
// Get aggregate stats for all partitions of a table and for all but default
// partition
Deadline.startTimer("getAggrPartitionColumnStatistics");
aggrStatsAllPartitions = rawStore.get_aggr_stats_for(dbName, tblName, partNames, colNames);
Deadline.stopTimer();
// Remove default partition from partition names and get aggregate
// stats again
List<FieldSchema> partKeys = table.getPartitionKeys();
String defaultPartitionValue = MetastoreConf.getVar(rawStore.getConf(), ConfVars.DEFAULTPARTITIONNAME);
List<String> partCols = new ArrayList<>();
List<String> partVals = new ArrayList<>();
for (FieldSchema fs : partKeys) {
partCols.add(fs.getName());
partVals.add(defaultPartitionValue);
}
String defaultPartitionName = FileUtils.makePartName(partCols, partVals);
partNames.remove(defaultPartitionName);
Deadline.startTimer("getAggrPartitionColumnStatistics");
aggrStatsAllButDefaultPartition = rawStore.get_aggr_stats_for(dbName, tblName, partNames, colNames);
Deadline.stopTimer();
}
} else {
Deadline.startTimer("getTableColumnStatistics");
tableColStats = rawStore.getTableColumnStatistics(dbName, tblName, colNames);
Deadline.stopTimer();
}
sharedCache.populateTableInCache(table, tableColStats, partitions, partitionColStats, aggrStatsAllPartitions, aggrStatsAllButDefaultPartition);
} catch (MetaException | NoSuchObjectException e) {
// Continue with next table
continue;
}
LOG.debug("Processed database: {}'s table: {}. Cached {} / {} tables so far.", dbName, tblName, ++numberOfTablesCachedSoFar, tblNames.size());
}
LOG.debug("Processed database: {}. Cached {} / {} databases so far.", dbName, ++numberOfDatabasesCachedSoFar, dbNames.size());
}
isCachePrewarmed.set(true);
}
LOG.info("CachedStore initialized");
long endTime = System.nanoTime();
LOG.info("Time taken in prewarming = " + (endTime - startTime) / 1000000 + "ms");
sharedCache.completeTableCachePrewarm();
}
Aggregations