use of org.apache.hadoop.hive.metastore.api.LockResponse in project hive by apache.
the class TestTxnHandler method testLockSWSWSR.
public void testLockSWSWSR() throws Exception {
// Test that write blocks write but read can still acquire
LockComponent comp = new LockComponent(LockType.SHARED_WRITE, LockLevel.DB, "mydb");
List<LockComponent> components = new ArrayList<LockComponent>(1);
LockRequest req = new LockRequest(components, "me", "localhost");
LockResponse res = txnHandler.lock(req);
assertTrue(res.getState() == LockState.ACQUIRED);
comp = new LockComponent(LockType.SHARED_WRITE, LockLevel.DB, "mydb");
req = new LockRequest(components, "me", "localhost");
res = txnHandler.lock(req);
assertTrue(res.getState() == LockState.WAITING);
comp = new LockComponent(LockType.SHARED_READ, LockLevel.DB, "mydb");
req = new LockRequest(components, "me", "localhost");
res = txnHandler.lock(req);
assertTrue(res.getState() == LockState.ACQUIRED);
use of org.apache.hadoop.hive.metastore.api.LockResponse in project hive by apache.
the class TestInitiator method cleanEmptyAbortedTxns.
public void cleanEmptyAbortedTxns() throws Exception {
// Test that we are cleaning aborted transactions with no components left in txn_components.
// Put one aborted transaction with an entry in txn_components to make sure we don't
// accidently clean it too.
Table t = newTable("default", "ceat", false);
long txnid = openTxn();
LockComponent comp = new LockComponent(LockType.SHARED_WRITE, LockLevel.TABLE, "default");
List<LockComponent> components = new ArrayList<LockComponent>(1);
LockRequest req = new LockRequest(components, "me", "localhost");
LockResponse res = txnHandler.lock(req);
txnHandler.abortTxn(new AbortTxnRequest(txnid));
conf.setIntVar(HiveConf.ConfVars.HIVE_TXN_MAX_OPEN_BATCH, TxnStore.TIMED_OUT_TXN_ABORT_BATCH_SIZE + 50);
OpenTxnsResponse resp = txnHandler.openTxns(new OpenTxnRequest(TxnStore.TIMED_OUT_TXN_ABORT_BATCH_SIZE + 50, "user", "hostname"));
txnHandler.abortTxns(new AbortTxnsRequest(resp.getTxn_ids()));
GetOpenTxnsResponse openTxns = txnHandler.getOpenTxns();
Assert.assertEquals(TxnStore.TIMED_OUT_TXN_ABORT_BATCH_SIZE + 50 + 1, openTxns.getOpen_txnsSize());
openTxns = txnHandler.getOpenTxns();
Assert.assertEquals(1, openTxns.getOpen_txnsSize());
use of org.apache.hadoop.hive.metastore.api.LockResponse in project hive by apache.
the class TxnHandler method checkLock.
* Lock acquisition is meant to be fair, so every lock can only block on some lock with smaller
* hl_lock_ext_id by only checking earlier locks.
* For any given SQL statement all locks required by it are grouped under single extLockId and are
* granted all at once or all locks wait.
* This is expected to run at READ_COMMITTED.
* If there is a concurrent commitTxn/rollbackTxn, those can only remove rows from HIVE_LOCKS.
* If they happen to be for the same txnid, there will be a WW conflict (in MS DB), if different txnid,
* checkLock() will in the worst case keep locks in Waiting state a little longer.
@RetrySemantics.SafeToRetry("See @SafeToRetry")
private LockResponse checkLock(Connection dbConn, long extLockId) throws NoSuchLockException, NoSuchTxnException, TxnAbortedException, MetaException, SQLException {
TxnStore.MutexAPI.LockHandle handle = null;
Statement stmt = null;
ResultSet rs = null;
LockResponse response = new LockResponse();
* todo: Longer term we should pass this from client somehow - this would be an optimization; once
* that is in place make sure to build and test "writeSet" below using OperationType not LockType
* With Static Partitions we assume that the query modifies exactly the partitions it locked. (not entirely
* realistic since Update/Delete may have some predicate that filters out all records out of
* some partition(s), but plausible). For DP, we acquire locks very wide (all known partitions),
* but for most queries only a fraction will actually be updated. #addDynamicPartitions() tells
* us exactly which ones were written to. Thus using this trick to kill a query early for
* DP queries may be too restrictive.
boolean isPartOfDynamicPartitionInsert = true;
try {
* checkLock() must be mutex'd against any other checkLock to make sure 2 conflicting locks
* are not granted by parallel checkLock() calls.
handle = getMutexAPI().acquireLock(;
// being acquired now
List<LockInfo> locksBeingChecked = getLockInfoFromLockId(dbConn, extLockId);
LOG.debug("checkLock(): Setting savepoint. extLockId=" + JavaUtils.lockIdToString(extLockId));
Savepoint save = dbConn.setSavepoint();
StringBuilder query = new StringBuilder("select hl_lock_ext_id, " + "hl_lock_int_id, hl_db, hl_table, hl_partition, hl_lock_state, " + "hl_lock_type, hl_txnid from HIVE_LOCKS where hl_db in (");
Set<String> strings = new HashSet<>(locksBeingChecked.size());
// This the set of entities that the statement represented by extLockId wants to update
List<LockInfo> writeSet = new ArrayList<>();
for (LockInfo info : locksBeingChecked) {
if (!isPartOfDynamicPartitionInsert && info.type == LockType.SHARED_WRITE) {
if (!writeSet.isEmpty()) {
if (writeSet.get(0).txnId == 0) {
// Write operation always start a txn
throw new IllegalStateException("Found Write lock for " + JavaUtils.lockIdToString(extLockId) + " but no txnid");
stmt = dbConn.createStatement();
StringBuilder sb = new StringBuilder(" ws_database, ws_table, ws_partition, " + "ws_txnid, ws_commit_id " + "from WRITE_SET where ws_commit_id >= " + writeSet.get(0).txnId + // see commitTxn() for more info on this inequality
" and (");
for (LockInfo info : writeSet) {
sb.append("(ws_database = ").append(quoteString(info.db)).append(" and ws_table = ").append(quoteString(info.table)).append(" and ws_partition ").append(info.partition == null ? "is null" : "= " + quoteString(info.partition)).append(") or ");
// nuke trailing " or "
sb.setLength(sb.length() - 4);
// 1 row is sufficient to know we have to kill the query
rs = stmt.executeQuery(sqlGenerator.addLimitClause(1, sb.toString()));
if ( {
* if here, it means we found an already committed txn which overlaps with the current one and
* it updated the same resource the current txn wants to update. By First-committer-wins
* rule, current txn will not be allowed to commit so may as well kill it now; This is just an
* optimization to prevent wasting cluster resources to run a query which is known to be DOA.
* {@link #commitTxn(CommitTxnRequest)} has the primary responsibility to ensure this.
* checkLock() runs at READ_COMMITTED so you could have another (Hive) txn running commitTxn()
* in parallel and thus writing to WRITE_SET. commitTxn() logic is properly mutexed to ensure
* that we don't "miss" any WW conflicts. We could've mutexed the checkLock() and commitTxn()
* as well but this reduces concurrency for very little gain.
* Note that update/delete (which runs as dynamic partition insert) acquires a lock on the table,
* but WRITE_SET has entries for actual partitions updated. Thus this optimization will "miss"
* the WW conflict but it will be caught in commitTxn() where actual partitions written are known.
* This is OK since we want 2 concurrent updates that update different sets of partitions to both commit.
String resourceName = rs.getString(1) + '/' + rs.getString(2);
String partName = rs.getString(3);
if (partName != null) {
resourceName += '/' + partName;
String msg = "Aborting " + JavaUtils.txnIdToString(writeSet.get(0).txnId) + " since a concurrent committed transaction [" + JavaUtils.txnIdToString(rs.getLong(4)) + "," + rs.getLong(5) + "] has already updated resource '" + resourceName + "'";;
if (abortTxns(dbConn, Collections.singletonList(writeSet.get(0).txnId), true) != 1) {
throw new IllegalStateException(msg + " FAILED!");
throw new TxnAbortedException(msg);
close(rs, stmt, null);
boolean first = true;
for (String s : strings) {
if (first)
first = false;
query.append(", ");
// If any of the table requests are null, then I need to pull all the
// table locks for this db.
boolean sawNull = false;
for (LockInfo info : locksBeingChecked) {
if (info.table == null) {
sawNull = true;
} else {
if (!sawNull) {
query.append(" and (hl_table is null or hl_table in(");
first = true;
for (String s : strings) {
if (first)
first = false;
query.append(", ");
// If any of the partition requests are null, then I need to pull all
// partition locks for this table.
sawNull = false;
for (LockInfo info : locksBeingChecked) {
if (info.partition == null) {
sawNull = true;
} else {
if (!sawNull) {
query.append(" and (hl_partition is null or hl_partition in(");
first = true;
for (String s : strings) {
if (first)
first = false;
query.append(", ");
query.append(" and hl_lock_ext_id < ").append(extLockId);
LOG.debug("Going to execute query <" + query.toString() + ">");
stmt = dbConn.createStatement();
rs = stmt.executeQuery(query.toString());
SortedSet<LockInfo> lockSet = new TreeSet<LockInfo>(new LockInfoComparator());
while ( {
lockSet.add(new LockInfo(rs));
// Turn the tree set into an array so we can move back and forth easily
// in it.
LockInfo[] locks = lockSet.toArray(new LockInfo[lockSet.size()]);
if (LOG.isTraceEnabled()) {
LOG.trace("Locks to check(full): ");
for (LockInfo info : locks) {
LOG.trace(" " + info);
for (LockInfo info : locksBeingChecked) {
// then just look at the other locks.
if (info.state == LockState.ACQUIRED) {
*this is what makes this method @SafeToRetry
// it or not.
for (int i = locks.length - 1; i >= 0; i--) {
// Check if we're operating on the same database, if not, move on
if (!info.db.equals(locks[i].db)) {
// check if they are operating on the same table, if not, move on.
if (info.table != null && locks[i].table != null && !info.table.equals(locks[i].table)) {
// check if they are operating on the same partition, if not, move on.
if (info.partition != null && locks[i].partition != null && !info.partition.equals(locks[i].partition)) {
// We've found something that matches what we're trying to lock,
// so figure out if we can lock it too.
LockAction lockAction = jumpTable.get(info.type).get(locks[i].type).get(locks[i].state);
LOG.debug("desired Lock: " + info + " checked Lock: " + locks[i] + " action: " + lockAction);
switch(lockAction) {
case WAIT:
if (!ignoreConflict(info, locks[i])) {
/*we acquire all locks for a given query atomically; if 1 blocks, all go into (remain) in
* Waiting state. wait() will undo any 'acquire()' which may have happened as part of
* this (metastore db) transaction and then we record which lock blocked the lock
* we were testing ('info').*/
wait(dbConn, save);
String sqlText = "update HIVE_LOCKS" + " set HL_BLOCKEDBY_EXT_ID=" + locks[i].extLockId + ", HL_BLOCKEDBY_INT_ID=" + locks[i].intLockId + " where HL_LOCK_EXT_ID=" + info.extLockId + " and HL_LOCK_INT_ID=" + info.intLockId;
LOG.debug("Executing sql: " + sqlText);
int updCnt = stmt.executeUpdate(sqlText);
if (updCnt != 1) {
shouldNeverHappen(info.txnId, info.extLockId, info.intLockId);
LOG.debug("Going to commit");
LOG.debug("Lock(" + info + ") waiting for Lock(" + locks[i] + ")");
return response;
// fall through to ACQUIRE
// so exit the loop and check next lock
// if here, ther were no locks that blocked any locks in 'locksBeingChecked' - acquire them all
acquire(dbConn, stmt, locksBeingChecked);
// We acquired all of the locks, so commit and return acquired.
LOG.debug("Going to commit");
} finally {
close(rs, stmt, null);
if (handle != null) {
return response;
use of org.apache.hadoop.hive.metastore.api.LockResponse in project presto by prestodb.
the class ThriftHiveMetastore method lock.
public long lock(MetastoreContext metastoreContext, String databaseName, String tableName) {
try {
final LockComponent lockComponent = new LockComponent(EXCLUSIVE, LockLevel.TABLE, databaseName);
final LockRequest lockRequest = new LockRequest(Lists.newArrayList(lockComponent), metastoreContext.getUsername(), InetAddress.getLocalHost().getHostName());
LockResponse lockResponse = stats.getLock().wrap(() -> getMetastoreClientThenCall(metastoreContext, client -> client.lock(lockRequest))).call();
LockState state = lockResponse.getState();
long lockId = lockResponse.getLockid();
final AtomicBoolean acquired = new AtomicBoolean(state.equals(ACQUIRED));
try {
if (state.equals(WAITING)) {
retry().maxAttempts(Integer.MAX_VALUE - 100).stopOnIllegalExceptions().exceptionMapper(e -> {
if (e instanceof WaitingForLockException) {
// only retry on waiting for lock exception
return e;
} else {
return new IllegalStateException(e.getMessage(), e);
}).run("lock", stats.getLock().wrap(() -> getMetastoreClientThenCall(metastoreContext, client -> {
LockResponse response = client.checkLock(new CheckLockRequest(lockId));
LockState newState = response.getState();
if (newState.equals(WAITING)) {
throw new WaitingForLockException("Waiting for lock.");
} else if (newState.equals(ACQUIRED)) {
} else {
throw new RuntimeException(String.format("Failed to acquire lock: %s",;
return null;
} finally {
if (!acquired.get()) {
unlock(metastoreContext, lockId);
if (!acquired.get()) {
throw new RuntimeException("Failed to acquire lock");
return lockId;
} catch (TException e) {
throw new PrestoException(HIVE_METASTORE_ERROR, e);
} catch (Exception e) {
throw propagate(e);
use of org.apache.hadoop.hive.metastore.api.LockResponse in project hive by apache.
the class HiveTableOperations method acquireLock.
long acquireLock() throws UnknownHostException, TException, InterruptedException {
final LockComponent lockComponent = new LockComponent(LockType.EXCL_WRITE, LockLevel.TABLE, database);
final LockRequest lockRequest = new LockRequest(Lists.newArrayList(lockComponent), System.getProperty(""), InetAddress.getLocalHost().getHostName());
LockResponse lockResponse = -> client.lock(lockRequest));
AtomicReference<LockState> state = new AtomicReference<>(lockResponse.getState());
long lockId = lockResponse.getLockid();
final long start = System.currentTimeMillis();
long duration = 0;
boolean timeout = false;
try {
if (state.get().equals(LockState.WAITING)) {
// Retry count is the typical "upper bound of retries" for function. In fact, the maximum number of
// attempts the would try is `retries + 1`. Here, for checking locks, we use timeout as the
// upper bound of retries. So it is just reasonable to set a large retry count. However, if we set
// Integer.MAX_VALUE, the above logic of `retries + 1` would overflow into Integer.MIN_VALUE. Hence,
// the retry is set conservatively as `Integer.MAX_VALUE - 100` so it doesn't hit any boundary issues.
Tasks.foreach(lockId).retry(Integer.MAX_VALUE - 100).exponentialBackoff(lockCheckMinWaitTime, lockCheckMaxWaitTime, lockAcquireTimeout, 1.5).throwFailureWhenFinished().onlyRetryOn(WaitingForLockException.class).run(id -> {
try {
LockResponse response = -> client.checkLock(id));
LockState newState = response.getState();
if (newState.equals(LockState.WAITING)) {
throw new WaitingForLockException("Waiting for lock.");
} catch (InterruptedException e) {
// Clear the interrupt status flag
LOG.warn("Interrupted while waiting for lock.", e);
}, TException.class);
} catch (WaitingForLockException waitingForLockException) {
timeout = true;
duration = System.currentTimeMillis() - start;
} finally {
if (!state.get().equals(LockState.ACQUIRED)) {
// timeout and do not have lock acquired
if (timeout && !state.get().equals(LockState.ACQUIRED)) {
throw new CommitFailedException("Timed out after %s ms waiting for lock on %s.%s", duration, database, tableName);
if (!state.get().equals(LockState.ACQUIRED)) {
throw new CommitFailedException("Could not acquire the lock on %s.%s, " + "lock request ended in state %s", database, tableName, state);
return lockId;