use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class HiveAlterHandler method alterPartition.
@Override
public Partition alterPartition(final RawStore msdb, Warehouse wh, final String dbname, final String name, final List<String> part_vals, final Partition new_part, EnvironmentContext environmentContext, IHMSHandler handler) throws InvalidOperationException, InvalidObjectException, AlreadyExistsException, MetaException {
boolean success = false;
Partition oldPart;
List<TransactionalMetaStoreEventListener> transactionalListeners = null;
if (handler != null) {
transactionalListeners = handler.getTransactionalListeners();
}
// Set DDL time to now if not specified
if (new_part.getParameters() == null || new_part.getParameters().get(hive_metastoreConstants.DDL_TIME) == null || Integer.parseInt(new_part.getParameters().get(hive_metastoreConstants.DDL_TIME)) == 0) {
new_part.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(System.currentTimeMillis() / 1000));
}
// alter partition
if (part_vals == null || part_vals.size() == 0) {
try {
msdb.openTransaction();
Table tbl = msdb.getTable(dbname, name);
if (tbl == null) {
throw new InvalidObjectException("Unable to alter partition because table or database does not exist.");
}
oldPart = msdb.getPartition(dbname, name, new_part.getValues());
if (MetaStoreUtils.requireCalStats(oldPart, new_part, tbl, environmentContext)) {
// if stats are same, no need to update
if (MetaStoreUtils.isFastStatsSame(oldPart, new_part)) {
MetaStoreUtils.updateBasicState(environmentContext, new_part.getParameters());
} else {
MetaStoreUtils.updatePartitionStatsFast(new_part, tbl, wh, false, true, environmentContext, false);
}
}
// PartitionView does not have SD. We do not need update its column stats
if (oldPart.getSd() != null) {
updateOrGetPartitionColumnStats(msdb, dbname, name, new_part.getValues(), oldPart.getSd().getCols(), tbl, new_part, null);
}
msdb.alterPartition(dbname, name, new_part.getValues(), new_part);
if (transactionalListeners != null && !transactionalListeners.isEmpty()) {
MetaStoreListenerNotifier.notifyEvent(transactionalListeners, EventMessage.EventType.ALTER_PARTITION, new AlterPartitionEvent(oldPart, new_part, tbl, false, true, handler), environmentContext);
}
success = msdb.commitTransaction();
} catch (InvalidObjectException e) {
throw new InvalidOperationException("alter is not possible");
} catch (NoSuchObjectException e) {
// old partition does not exist
throw new InvalidOperationException("alter is not possible");
} finally {
if (!success) {
msdb.rollbackTransaction();
}
}
return oldPart;
}
// rename partition
String oldPartLoc;
String newPartLoc;
Path srcPath = null;
Path destPath = null;
FileSystem srcFs;
FileSystem destFs = null;
boolean dataWasMoved = false;
try {
msdb.openTransaction();
Table tbl = msdb.getTable(dbname, name);
if (tbl == null) {
throw new InvalidObjectException("Unable to alter partition because table or database does not exist.");
}
try {
oldPart = msdb.getPartition(dbname, name, part_vals);
} catch (NoSuchObjectException e) {
// this means there is no existing partition
throw new InvalidObjectException("Unable to rename partition because old partition does not exist");
}
Partition check_part;
try {
check_part = msdb.getPartition(dbname, name, new_part.getValues());
} catch (NoSuchObjectException e) {
// this means there is no existing partition
check_part = null;
}
if (check_part != null) {
throw new AlreadyExistsException("Partition already exists:" + dbname + "." + name + "." + new_part.getValues());
}
// 3) rename the partition directory if it is not an external table
if (!tbl.getTableType().equals(TableType.EXTERNAL_TABLE.toString())) {
try {
// if tbl location is available use it
// else derive the tbl location from database location
destPath = wh.getPartitionPath(msdb.getDatabase(dbname), tbl, new_part.getValues());
destPath = constructRenamedPath(destPath, new Path(new_part.getSd().getLocation()));
} catch (NoSuchObjectException e) {
LOG.debug("Didn't find object in metastore ", e);
throw new InvalidOperationException("Unable to change partition or table. Database " + dbname + " does not exist" + " Check metastore logs for detailed stack." + e.getMessage());
}
if (destPath != null) {
newPartLoc = destPath.toString();
oldPartLoc = oldPart.getSd().getLocation();
LOG.info("srcPath:" + oldPartLoc);
LOG.info("descPath:" + newPartLoc);
srcPath = new Path(oldPartLoc);
srcFs = wh.getFs(srcPath);
destFs = wh.getFs(destPath);
// check that src and dest are on the same file system
if (!FileUtils.equalsFileSystem(srcFs, destFs)) {
throw new InvalidOperationException("New table location " + destPath + " is on a different file system than the old location " + srcPath + ". This operation is not supported.");
}
try {
if (srcFs.exists(srcPath)) {
if (newPartLoc.compareTo(oldPartLoc) != 0 && destFs.exists(destPath)) {
throw new InvalidOperationException("New location for this table " + tbl.getDbName() + "." + tbl.getTableName() + " already exists : " + destPath);
}
// if destPath's parent path doesn't exist, we should mkdir it
Path destParentPath = destPath.getParent();
if (!wh.mkdirs(destParentPath)) {
throw new MetaException("Unable to create path " + destParentPath);
}
// rename the data directory
wh.renameDir(srcPath, destPath, true);
LOG.info("Partition directory rename from " + srcPath + " to " + destPath + " done.");
dataWasMoved = true;
}
} catch (IOException e) {
LOG.error("Cannot rename partition directory from " + srcPath + " to " + destPath, e);
throw new InvalidOperationException("Unable to access src or dest location for partition " + tbl.getDbName() + "." + tbl.getTableName() + " " + new_part.getValues());
} catch (MetaException me) {
LOG.error("Cannot rename partition directory from " + srcPath + " to " + destPath, me);
throw me;
}
new_part.getSd().setLocation(newPartLoc);
}
} else {
new_part.getSd().setLocation(oldPart.getSd().getLocation());
}
if (MetaStoreUtils.requireCalStats(oldPart, new_part, tbl, environmentContext)) {
MetaStoreUtils.updatePartitionStatsFast(new_part, tbl, wh, false, true, environmentContext, false);
}
String newPartName = Warehouse.makePartName(tbl.getPartitionKeys(), new_part.getValues());
ColumnStatistics cs = updateOrGetPartitionColumnStats(msdb, dbname, name, oldPart.getValues(), oldPart.getSd().getCols(), tbl, new_part, null);
msdb.alterPartition(dbname, name, part_vals, new_part);
if (cs != null) {
cs.getStatsDesc().setPartName(newPartName);
try {
msdb.updatePartitionColumnStatistics(cs, new_part.getValues());
} catch (InvalidInputException iie) {
throw new InvalidOperationException("Unable to update partition stats in table rename." + iie);
} catch (NoSuchObjectException nsoe) {
// It is ok, ignore
}
}
if (transactionalListeners != null && !transactionalListeners.isEmpty()) {
MetaStoreListenerNotifier.notifyEvent(transactionalListeners, EventMessage.EventType.ALTER_PARTITION, new AlterPartitionEvent(oldPart, new_part, tbl, false, true, handler), environmentContext);
}
success = msdb.commitTransaction();
} finally {
if (!success) {
LOG.error("Failed to rename a partition. Rollback transaction");
msdb.rollbackTransaction();
if (dataWasMoved) {
LOG.error("Revert the data move in renaming a partition.");
try {
if (destFs.exists(destPath)) {
wh.renameDir(destPath, srcPath, false);
}
} catch (MetaException me) {
LOG.error("Failed to restore partition data from " + destPath + " to " + srcPath + " in alter partition failure. Manual restore is needed.");
} catch (IOException ioe) {
LOG.error("Failed to restore partition data from " + destPath + " to " + srcPath + " in alter partition failure. Manual restore is needed.");
}
}
}
}
return oldPart;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class HiveAlterHandler method alterTableUpdateTableColumnStats.
@VisibleForTesting
void alterTableUpdateTableColumnStats(RawStore msdb, Table oldTable, Table newTable) throws MetaException, InvalidObjectException {
String dbName = oldTable.getDbName().toLowerCase();
String tableName = org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier(oldTable.getTableName());
String newDbName = newTable.getDbName().toLowerCase();
String newTableName = org.apache.hadoop.hive.metastore.utils.StringUtils.normalizeIdentifier(newTable.getTableName());
try {
List<FieldSchema> oldCols = oldTable.getSd().getCols();
List<FieldSchema> newCols = newTable.getSd().getCols();
List<ColumnStatisticsObj> newStatsObjs = new ArrayList<>();
ColumnStatistics colStats = null;
boolean updateColumnStats = true;
// Nothing to update if everything is the same
if (newDbName.equals(dbName) && newTableName.equals(tableName) && MetaStoreUtils.columnsIncludedByNameType(oldCols, newCols)) {
updateColumnStats = false;
}
if (updateColumnStats) {
List<String> oldColNames = new ArrayList<>(oldCols.size());
for (FieldSchema oldCol : oldCols) {
oldColNames.add(oldCol.getName());
}
// Collect column stats which need to be rewritten and remove old stats
colStats = msdb.getTableColumnStatistics(dbName, tableName, oldColNames);
if (colStats == null) {
updateColumnStats = false;
} else {
List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
if (statsObjs != null) {
List<String> deletedCols = new ArrayList<>();
for (ColumnStatisticsObj statsObj : statsObjs) {
boolean found = false;
for (FieldSchema newCol : newCols) {
if (statsObj.getColName().equalsIgnoreCase(newCol.getName()) && statsObj.getColType().equalsIgnoreCase(newCol.getType())) {
found = true;
break;
}
}
if (found) {
if (!newDbName.equals(dbName) || !newTableName.equals(tableName)) {
msdb.deleteTableColumnStatistics(dbName, tableName, statsObj.getColName());
newStatsObjs.add(statsObj);
deletedCols.add(statsObj.getColName());
}
} else {
msdb.deleteTableColumnStatistics(dbName, tableName, statsObj.getColName());
deletedCols.add(statsObj.getColName());
}
}
StatsSetupConst.removeColumnStatsState(newTable.getParameters(), deletedCols);
}
}
}
// Change to new table and append stats for the new table
msdb.alterTable(dbName, tableName, newTable);
if (updateColumnStats && !newStatsObjs.isEmpty()) {
ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
statsDesc.setDbName(newDbName);
statsDesc.setTableName(newTableName);
colStats.setStatsObj(newStatsObjs);
msdb.updateTableColumnStatistics(colStats);
}
} catch (NoSuchObjectException nsoe) {
LOG.debug("Could not find db entry." + nsoe);
} catch (InvalidInputException e) {
// should not happen since the input were verified before passed in
throw new InvalidObjectException("Invalid inputs to update table column stats: " + e);
}
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class ColStatsProcessor method persistColumnStats.
public int persistColumnStats(Hive db, Table tbl) throws HiveException, MetaException, IOException {
// Construct a column statistics object from the result
List<ColumnStatistics> colStats = constructColumnStatsFromPackedRows(tbl);
// Note, this function is shared for both table and partition column stats.
if (colStats.isEmpty()) {
return 0;
}
SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
request.setNeedMerge(colStatDesc.isNeedMerge());
db.setPartitionColumnStatistics(request);
return 0;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class ColumnStatsUpdateTask method constructColumnStatsFromInput.
private ColumnStatistics constructColumnStatsFromInput() throws SemanticException, MetaException {
String dbName = work.dbName();
String tableName = work.getTableName();
String partName = work.getPartName();
String colName = work.getColName();
String columnType = work.getColType();
ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
// grammar prohibits more than 1 column so we are guaranteed to have only 1
// element in this lists.
statsObj.setColName(colName);
statsObj.setColType(columnType);
ColumnStatisticsData statsData = new ColumnStatisticsData();
if (columnType.equalsIgnoreCase("long") || columnType.equalsIgnoreCase("tinyint") || columnType.equalsIgnoreCase("smallint") || columnType.equalsIgnoreCase("int") || columnType.equalsIgnoreCase("bigint") || columnType.equalsIgnoreCase("timestamp")) {
LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector();
longStats.setNumNullsIsSet(false);
longStats.setNumDVsIsSet(false);
longStats.setLowValueIsSet(false);
longStats.setHighValueIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
longStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
longStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
longStats.setLowValue(Long.parseLong(value));
} else if (fName.equals("highValue")) {
longStats.setHighValue(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setLongStats(longStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("double") || columnType.equalsIgnoreCase("float")) {
DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector();
doubleStats.setNumNullsIsSet(false);
doubleStats.setNumDVsIsSet(false);
doubleStats.setLowValueIsSet(false);
doubleStats.setHighValueIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
doubleStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
doubleStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
doubleStats.setLowValue(Double.parseDouble(value));
} else if (fName.equals("highValue")) {
doubleStats.setHighValue(Double.parseDouble(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setDoubleStats(doubleStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("string") || columnType.toLowerCase().startsWith("char") || columnType.toLowerCase().startsWith("varchar")) {
// char(x),varchar(x) types
StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector();
stringStats.setMaxColLenIsSet(false);
stringStats.setAvgColLenIsSet(false);
stringStats.setNumNullsIsSet(false);
stringStats.setNumDVsIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
stringStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
stringStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("avgColLen")) {
stringStats.setAvgColLen(Double.parseDouble(value));
} else if (fName.equals("maxColLen")) {
stringStats.setMaxColLen(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setStringStats(stringStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("boolean")) {
BooleanColumnStatsData booleanStats = new BooleanColumnStatsData();
booleanStats.setNumNullsIsSet(false);
booleanStats.setNumTruesIsSet(false);
booleanStats.setNumFalsesIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
booleanStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numTrues")) {
booleanStats.setNumTrues(Long.parseLong(value));
} else if (fName.equals("numFalses")) {
booleanStats.setNumFalses(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setBooleanStats(booleanStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
binaryStats.setNumNullsIsSet(false);
binaryStats.setAvgColLenIsSet(false);
binaryStats.setMaxColLenIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
binaryStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("avgColLen")) {
binaryStats.setAvgColLen(Double.parseDouble(value));
} else if (fName.equals("maxColLen")) {
binaryStats.setMaxColLen(Long.parseLong(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setBinaryStats(binaryStats);
statsObj.setStatsData(statsData);
} else if (columnType.toLowerCase().startsWith("decimal")) {
// decimal(a,b) type
DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector();
decimalStats.setNumNullsIsSet(false);
decimalStats.setNumDVsIsSet(false);
decimalStats.setLowValueIsSet(false);
decimalStats.setHighValueIsSet(false);
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
decimalStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
decimalStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
BigDecimal d = new BigDecimal(value);
decimalStats.setLowValue(new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
} else if (fName.equals("highValue")) {
BigDecimal d = new BigDecimal(value);
decimalStats.setHighValue(new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setDecimalStats(decimalStats);
statsObj.setStatsData(statsData);
} else if (columnType.equalsIgnoreCase("date")) {
DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector();
Map<String, String> mapProp = work.getMapProp();
for (Entry<String, String> entry : mapProp.entrySet()) {
String fName = entry.getKey();
String value = entry.getValue();
if (fName.equals("numNulls")) {
dateStats.setNumNulls(Long.parseLong(value));
} else if (fName.equals("numDVs")) {
dateStats.setNumDVs(Long.parseLong(value));
} else if (fName.equals("lowValue")) {
// Date high/low value is stored as long in stats DB, but allow users to set high/low
// value using either date format (yyyy-mm-dd) or numeric format (days since epoch)
dateStats.setLowValue(readDateValue(value));
} else if (fName.equals("highValue")) {
dateStats.setHighValue(readDateValue(value));
} else {
throw new SemanticException("Unknown stat");
}
}
statsData.setDateStats(dateStats);
statsObj.setStatsData(statsData);
} else {
throw new SemanticException("Unsupported type");
}
ColumnStatisticsDesc statsDesc = getColumnStatsDesc(dbName, tableName, partName, partName == null);
ColumnStatistics colStat = new ColumnStatistics();
colStat.setStatsDesc(statsDesc);
colStat.addToStatsObj(statsObj);
return colStat;
}
use of org.apache.hadoop.hive.metastore.api.ColumnStatistics in project hive by apache.
the class MetaStoreDirectSql method getTableStats.
/**
* Retrieve the column statistics for the specified columns of the table. NULL
* is returned if the columns are not provided.
* @param dbName the database name of the table
* @param tableName the table name
* @param colNames the list of the column names
* @return the column statistics for the specified columns
* @throws MetaException
*/
public ColumnStatistics getTableStats(final String dbName, final String tableName, List<String> colNames) throws MetaException {
if (colNames == null || colNames.isEmpty()) {
return null;
}
final boolean doTrace = LOG.isDebugEnabled();
final String queryText0 = "select " + STATS_COLLIST + " from \"TAB_COL_STATS\" " + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in (";
Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
public List<Object[]> run(List<String> input) throws MetaException {
String queryText = queryText0 + makeParams(input.size()) + ")";
Object[] params = new Object[input.size() + 2];
params[0] = dbName;
params[1] = tableName;
for (int i = 0; i < input.size(); ++i) {
params[i + 2] = input.get(i);
}
long start = doTrace ? System.nanoTime() : 0;
Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
Object qResult = executeWithArray(query, params, queryText);
timingTrace(doTrace, queryText0 + "...)", start, (doTrace ? System.nanoTime() : 0));
if (qResult == null) {
query.closeAll();
return null;
}
addQueryAfterUse(query);
return ensureList(qResult);
}
};
List<Object[]> list = runBatched(colNames, b);
if (list.isEmpty())
return null;
ColumnStatisticsDesc csd = new ColumnStatisticsDesc(true, dbName, tableName);
ColumnStatistics result = makeColumnStats(list, csd, 0);
b.closeAllQueries();
return result;
}
Aggregations