use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class CachedStore method updateUsingNotificationEvents.
@VisibleForTesting
public static long updateUsingNotificationEvents(RawStore rawStore, long lastEventId) throws Exception {
LOG.debug("updating cache using notification events starting from event id " + lastEventId);
NotificationEventRequest rqst = new NotificationEventRequest(lastEventId);
// Add the events which are not related to metadata update
rqst.addToEventTypeSkipList(MessageBuilder.INSERT_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.OPEN_TXN_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.COMMIT_TXN_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.ABORT_TXN_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.ALLOC_WRITE_ID_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.ACID_WRITE_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.CREATE_FUNCTION_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.DROP_FUNCTION_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.CREATE_ISCHEMA_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.ALTER_ISCHEMA_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.DROP_ISCHEMA_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.ADD_SCHEMA_VERSION_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.ALTER_SCHEMA_VERSION_EVENT);
rqst.addToEventTypeSkipList(MessageBuilder.DROP_SCHEMA_VERSION_EVENT);
Deadline.startTimer("getNextNotification");
NotificationEventResponse resp = rawStore.getNextNotification(rqst);
Deadline.stopTimer();
if (resp == null || resp.getEvents() == null) {
LOG.debug("no events to process");
return lastEventId;
}
List<NotificationEvent> eventList = resp.getEvents();
LOG.debug("num events to process" + eventList.size());
for (NotificationEvent event : eventList) {
long eventId = event.getEventId();
if (eventId <= lastEventId) {
LOG.error("Event id is not valid " + lastEventId + " : " + eventId);
throw new RuntimeException(" event id is not valid " + lastEventId + " : " + eventId);
}
lastEventId = eventId;
String message = event.getMessage();
LOG.debug("Event to process " + event);
MessageDeserializer deserializer = MessageFactory.getInstance(event.getMessageFormat()).getDeserializer();
String catalogName = event.getCatName() == null ? "" : event.getCatName().toLowerCase();
String dbName = event.getDbName() == null ? "" : event.getDbName().toLowerCase();
String tableName = event.getTableName() == null ? "" : event.getTableName().toLowerCase();
if (!shouldCacheTable(catalogName, dbName, tableName)) {
continue;
}
switch(event.getEventType()) {
case MessageBuilder.ADD_PARTITION_EVENT:
AddPartitionMessage addPartMessage = deserializer.getAddPartitionMessage(message);
sharedCache.addPartitionsToCache(catalogName, dbName, tableName, addPartMessage.getPartitionObjs());
break;
case MessageBuilder.ALTER_PARTITION_EVENT:
AlterPartitionMessage alterPartitionMessage = deserializer.getAlterPartitionMessage(message);
sharedCache.alterPartitionInCache(catalogName, dbName, tableName, alterPartitionMessage.getPtnObjBefore().getValues(), alterPartitionMessage.getPtnObjAfter());
// TODO : Use the stat object stored in the alter table message to update the stats in cache.
updateStatsForAlterPart(rawStore, alterPartitionMessage.getTableObj(), catalogName, dbName, tableName, alterPartitionMessage.getPtnObjAfter());
break;
case MessageBuilder.DROP_PARTITION_EVENT:
DropPartitionMessage dropPartitionMessage = deserializer.getDropPartitionMessage(message);
for (Map<String, String> partMap : dropPartitionMessage.getPartitions()) {
sharedCache.removePartitionFromCache(catalogName, dbName, tableName, new ArrayList<>(partMap.values()));
}
break;
case MessageBuilder.CREATE_TABLE_EVENT:
CreateTableMessage createTableMessage = deserializer.getCreateTableMessage(message);
sharedCache.addTableToCache(catalogName, dbName, tableName, createTableMessage.getTableObj());
break;
case MessageBuilder.ALTER_TABLE_EVENT:
AlterTableMessage alterTableMessage = deserializer.getAlterTableMessage(message);
sharedCache.alterTableInCache(catalogName, dbName, tableName, alterTableMessage.getTableObjAfter());
// TODO : Use the stat object stored in the alter table message to update the stats in cache.
updateStatsForAlterTable(rawStore, alterTableMessage.getTableObjBefore(), alterTableMessage.getTableObjAfter(), catalogName, dbName, tableName);
break;
case MessageBuilder.DROP_TABLE_EVENT:
DropTableMessage dropTableMessage = deserializer.getDropTableMessage(message);
int batchSize = MetastoreConf.getIntVar(rawStore.getConf(), ConfVars.BATCH_RETRIEVE_OBJECTS_MAX);
String tableDnsPath = null;
Path tablePath = new Path(dropTableMessage.getTableObj().getSd().getLocation());
if (tablePath != null) {
tableDnsPath = new Warehouse(rawStore.getConf()).getDnsPath(tablePath).toString();
}
while (true) {
Map<String, String> partitionLocations = rawStore.getPartitionLocations(catalogName, dbName, tableName, tableDnsPath, batchSize);
if (partitionLocations == null || partitionLocations.isEmpty()) {
break;
}
sharedCache.removePartitionFromCache(catalogName, dbName, tableName, new ArrayList<>(partitionLocations.values()));
}
sharedCache.removeTableFromCache(catalogName, dbName, tableName);
break;
case MessageBuilder.CREATE_DATABASE_EVENT:
CreateDatabaseMessage createDatabaseMessage = deserializer.getCreateDatabaseMessage(message);
sharedCache.addDatabaseToCache(createDatabaseMessage.getDatabaseObject());
break;
case MessageBuilder.ALTER_DATABASE_EVENT:
AlterDatabaseMessage alterDatabaseMessage = deserializer.getAlterDatabaseMessage(message);
sharedCache.alterDatabaseInCache(catalogName, dbName, alterDatabaseMessage.getDbObjAfter());
break;
case MessageBuilder.DROP_DATABASE_EVENT:
sharedCache.removeDatabaseFromCache(catalogName, dbName);
break;
case MessageBuilder.CREATE_CATALOG_EVENT:
case MessageBuilder.DROP_CATALOG_EVENT:
case MessageBuilder.ALTER_CATALOG_EVENT:
// TODO : Need to add cache invalidation for catalog events
LOG.error("catalog Events are not supported for cache invalidation : " + event.getEventType());
break;
case MessageBuilder.UPDATE_TBL_COL_STAT_EVENT:
UpdateTableColumnStatMessage msg = deserializer.getUpdateTableColumnStatMessage(message);
sharedCache.alterTableAndStatsInCache(catalogName, dbName, tableName, msg.getWriteId(), msg.getColumnStatistics().getStatsObj(), msg.getParameters());
break;
case MessageBuilder.DELETE_TBL_COL_STAT_EVENT:
DeleteTableColumnStatMessage msgDel = deserializer.getDeleteTableColumnStatMessage(message);
sharedCache.removeTableColStatsFromCache(catalogName, dbName, tableName, msgDel.getColName());
break;
case MessageBuilder.UPDATE_PART_COL_STAT_EVENT:
UpdatePartitionColumnStatMessage msgPartUpdate = deserializer.getUpdatePartitionColumnStatMessage(message);
sharedCache.alterPartitionAndStatsInCache(catalogName, dbName, tableName, msgPartUpdate.getWriteId(), msgPartUpdate.getPartVals(), msgPartUpdate.getParameters(), msgPartUpdate.getColumnStatistics().getStatsObj());
break;
case MessageBuilder.DELETE_PART_COL_STAT_EVENT:
DeletePartitionColumnStatMessage msgPart = deserializer.getDeletePartitionColumnStatMessage(message);
sharedCache.removePartitionColStatsFromCache(catalogName, dbName, tableName, msgPart.getPartValues(), msgPart.getColName());
break;
case MessageBuilder.ADD_PRIMARYKEY_EVENT:
AddPrimaryKeyMessage addPrimaryKeyMessage = deserializer.getAddPrimaryKeyMessage(message);
sharedCache.addPrimaryKeysToCache(catalogName, dbName, tableName, addPrimaryKeyMessage.getPrimaryKeys());
break;
case MessageBuilder.ADD_FOREIGNKEY_EVENT:
AddForeignKeyMessage addForeignKeyMessage = deserializer.getAddForeignKeyMessage(message);
for (SQLForeignKey fk : addForeignKeyMessage.getForeignKeys()) {
// This is done because dbName and tblName for Foreign key events are currently set to PK table and db.
sharedCache.addForeignKeysToCache(catalogName, fk.getFktable_db(), fk.getFktable_name(), Arrays.asList(fk));
}
break;
case MessageBuilder.ADD_NOTNULLCONSTRAINT_EVENT:
AddNotNullConstraintMessage notNullConstraintMessage = deserializer.getAddNotNullConstraintMessage(message);
sharedCache.addNotNullConstraintsToCache(catalogName, dbName, tableName, notNullConstraintMessage.getNotNullConstraints());
break;
case MessageBuilder.ADD_UNIQUECONSTRAINT_EVENT:
AddUniqueConstraintMessage uniqueConstraintMessage = deserializer.getAddUniqueConstraintMessage(message);
sharedCache.addUniqueConstraintsToCache(catalogName, dbName, tableName, uniqueConstraintMessage.getUniqueConstraints());
break;
case MessageBuilder.ADD_DEFAULTCONSTRAINT_EVENT:
AddDefaultConstraintMessage defaultConstraintMessage = deserializer.getAddDefaultConstraintMessage(message);
sharedCache.addDefaultConstraintsToCache(catalogName, dbName, tableName, defaultConstraintMessage.getDefaultConstraints());
break;
case MessageBuilder.ADD_CHECKCONSTRAINT_EVENT:
AddCheckConstraintMessage checkConstraintMessage = deserializer.getAddCheckConstraintMessage(message);
sharedCache.addCheckConstraintsToCache(catalogName, dbName, tableName, checkConstraintMessage.getCheckConstraints());
break;
case MessageBuilder.DROP_CONSTRAINT_EVENT:
DropConstraintMessage dropConstraintMessage = deserializer.getDropConstraintMessage(message);
sharedCache.removeConstraintFromCache(catalogName, dbName, tableName, dropConstraintMessage.getConstraint());
break;
default:
LOG.error("Event is not supported for cache invalidation : " + event.getEventType());
}
}
return lastEventId;
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class TestMetaStoreServerUtils method testUpdateTableStatsSlow_doesNotUpdateStats.
/**
* Verify that updateTableStatsSlow() does not calculate table statistics when
* <ol>
* <li>newDir is true</li>
* <li>Table is partitioned</li>
* <li>Stats are already present and forceRecompute isn't set</li>
* </ol>
*/
@Test
public void testUpdateTableStatsSlow_doesNotUpdateStats() throws TException {
// Create database and table
FieldSchema fs = new FieldSchema("date", "string", "date column");
List<FieldSchema> cols = Collections.singletonList(fs);
Table tbl = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").build(null);
Warehouse wh = mock(Warehouse.class);
// newDir(true) => stats not updated
MetaStoreServerUtils.updateTableStatsSlow(db, tbl, wh, true, false, null);
verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl);
// partitioned table => stats not updated
Table tbl1 = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").setPartCols(cols).build(null);
MetaStoreServerUtils.updateTableStatsSlow(db, tbl1, wh, false, false, null);
verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl1);
// Already contains stats => stats not updated when forceRecompute isn't set
Table tbl2 = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").setTableParams(paramsWithStats).build(null);
MetaStoreServerUtils.updateTableStatsSlow(db, tbl2, wh, false, false, null);
verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl2);
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class HCatDriver method setFSPermsNGrp.
private int setFSPermsNGrp(SessionState ss) {
Configuration conf = ss.getConf();
String tblName = conf.get(HCatConstants.HCAT_CREATE_TBL_NAME, "");
if (tblName.isEmpty()) {
tblName = conf.get("import.destination.table", "");
conf.set("import.destination.table", "");
}
String dbName = conf.get(HCatConstants.HCAT_CREATE_DB_NAME, "");
String grp = conf.get(HCatConstants.HCAT_GROUP, null);
String permsStr = conf.get(HCatConstants.HCAT_PERMS, null);
if (tblName.isEmpty() && dbName.isEmpty()) {
// it wasn't create db/table
return 0;
}
if (null == grp && null == permsStr) {
// there were no grp and perms to begin with.
return 0;
}
FsPermission perms = FsPermission.valueOf(permsStr);
if (!tblName.isEmpty()) {
Hive db = null;
try {
db = Hive.get();
Table tbl = db.getTable(tblName);
Path tblPath = tbl.getPath();
FileSystem fs = tblPath.getFileSystem(conf);
if (null != perms) {
fs.setPermission(tblPath, perms);
}
if (null != grp) {
fs.setOwner(tblPath, null, grp);
}
return 0;
} catch (Exception e) {
ss.err.println(String.format("Failed to set permissions/groups on TABLE: <%s> %s", tblName, e.getMessage()));
try {
// We need to drop the table.
if (null != db) {
db.dropTable(tblName);
}
} catch (HiveException he) {
ss.err.println(String.format("Failed to drop TABLE <%s> after failing to set permissions/groups on it. %s", tblName, e.getMessage()));
}
return 1;
}
} else {
// looks like a db operation
if (dbName.isEmpty() || dbName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) {
// We dont set perms or groups for default dir.
return 0;
} else {
try {
Hive db = Hive.get();
Path dbPath = new Warehouse(conf).getDatabasePath(db.getDatabase(dbName));
FileSystem fs = dbPath.getFileSystem(conf);
if (perms != null) {
fs.setPermission(dbPath, perms);
}
if (null != grp) {
fs.setOwner(dbPath, null, grp);
}
return 0;
} catch (Exception e) {
ss.err.println(String.format("Failed to set permissions and/or group on DB: <%s> %s", dbName, e.getMessage()));
try {
Hive.get().dropDatabase(dbName);
} catch (Exception e1) {
ss.err.println(String.format("Failed to drop DB <%s> after failing to set permissions/group on it. %s", dbName, e1.getMessage()));
}
return 1;
}
}
}
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class ImportSemanticAnalyzer method prepareImport.
public static boolean prepareImport(boolean isLocationSet, boolean isExternalSet, boolean isPartSpecSet, boolean waitOnPrecursor, String parsedLocation, String parsedTableName, String parsedDbName, LinkedHashMap<String, String> parsedPartSpec, String fromLocn, EximUtil.SemanticAnalyzerWrapperContext x, Map<String, Long> dbsUpdated, Map<String, Long> tablesUpdated) throws IOException, MetaException, HiveException, URISyntaxException {
// initialize load path
URI fromURI = EximUtil.getValidatedURI(x.getConf(), stripQuotes(fromLocn));
Path fromPath = new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath());
FileSystem fs = FileSystem.get(fromURI, x.getConf());
x.getInputs().add(toReadEntity(fromPath, x.getConf()));
EximUtil.ReadMetaData rv = new EximUtil.ReadMetaData();
try {
rv = EximUtil.readMetaData(fs, new Path(fromPath, EximUtil.METADATA_NAME));
} catch (IOException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
}
ReplicationSpec replicationSpec = rv.getReplicationSpec();
if (replicationSpec.isNoop()) {
// nothing to do here, silently return.
return false;
}
String dbname = SessionState.get().getCurrentDatabase();
if ((parsedDbName != null) && (!parsedDbName.isEmpty())) {
// If the parsed statement contained a db.tablename specification, prefer that.
dbname = parsedDbName;
}
if (dbsUpdated != null) {
dbsUpdated.put(dbname, Long.valueOf(replicationSpec.get(ReplicationSpec.KEY.EVENT_ID)));
}
// Create table associated with the import
// Executed if relevant, and used to contain all the other details about the table if not.
ImportTableDesc tblDesc;
try {
tblDesc = getBaseCreateTableDescFromTable(dbname, rv.getTable());
} catch (Exception e) {
throw new HiveException(e);
}
if ((replicationSpec != null) && replicationSpec.isInReplicationScope()) {
tblDesc.setReplicationSpec(replicationSpec);
}
if (isExternalSet) {
tblDesc.setExternal(isExternalSet);
// This condition-check could have been avoided, but to honour the old
// default of not calling if it wasn't set, we retain that behaviour.
// TODO:cleanup after verification that the outer if isn't really needed here
}
if (isLocationSet) {
tblDesc.setLocation(parsedLocation);
x.getInputs().add(toReadEntity(new Path(parsedLocation), x.getConf()));
}
if ((parsedTableName != null) && (!parsedTableName.isEmpty())) {
tblDesc.setTableName(parsedTableName);
}
if (tablesUpdated != null) {
tablesUpdated.put(dbname + "." + tblDesc.getTableName(), Long.valueOf(replicationSpec.get(ReplicationSpec.KEY.EVENT_ID)));
}
List<AddPartitionDesc> partitionDescs = new ArrayList<AddPartitionDesc>();
Iterable<Partition> partitions = rv.getPartitions();
for (Partition partition : partitions) {
// TODO: this should ideally not create AddPartitionDesc per partition
AddPartitionDesc partsDesc = getBaseAddPartitionDescFromPartition(fromPath, dbname, tblDesc, partition);
partitionDescs.add(partsDesc);
}
if (isPartSpecSet) {
// The import specification asked for only a particular partition to be loaded
// We load only that, and ignore all the others.
boolean found = false;
for (Iterator<AddPartitionDesc> partnIter = partitionDescs.listIterator(); partnIter.hasNext(); ) {
AddPartitionDesc addPartitionDesc = partnIter.next();
if (!found && addPartitionDesc.getPartition(0).getPartSpec().equals(parsedPartSpec)) {
found = true;
} else {
partnIter.remove();
}
}
if (!found) {
throw new SemanticException(ErrorMsg.INVALID_PARTITION.getMsg(" - Specified partition not found in import directory"));
}
}
if (tblDesc.getTableName() == null) {
// or from the export dump.
throw new SemanticException(ErrorMsg.NEED_TABLE_SPECIFICATION.getMsg());
} else {
x.getConf().set("import.destination.table", tblDesc.getTableName());
for (AddPartitionDesc addPartitionDesc : partitionDescs) {
addPartitionDesc.setTableName(tblDesc.getTableName());
}
}
Warehouse wh = new Warehouse(x.getConf());
Table table = tableIfExists(tblDesc, x.getHive());
boolean tableExists = false;
if (table != null) {
checkTable(table, tblDesc, replicationSpec, x.getConf());
x.getLOG().debug("table " + tblDesc.getTableName() + " exists: metadata checked");
tableExists = true;
}
if (!replicationSpec.isInReplicationScope()) {
createRegularImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, table, fromURI, fs, wh, x);
} else {
createReplImportTasks(tblDesc, partitionDescs, isPartSpecSet, replicationSpec, waitOnPrecursor, table, fromURI, fs, wh, x);
}
return tableExists;
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class DDLTask method deleteDir.
private void deleteDir(Path dir) throws HiveException {
try {
Warehouse wh = new Warehouse(conf);
wh.deleteDir(dir, true);
} catch (MetaException e) {
throw new HiveException(e);
}
}
Aggregations