use of org.apache.hadoop.hive.metastore.api.TxnAbortedException in project hive by apache.
the class TxnHandler method checkLock.
/**
* Lock acquisition is meant to be fair, so every lock can only block on some lock with smaller
* hl_lock_ext_id by only checking earlier locks.
*
* For any given SQL statement all locks required by it are grouped under single extLockId and are
* granted all at once or all locks wait.
*
* This is expected to run at READ_COMMITTED.
*
* If there is a concurrent commitTxn/rollbackTxn, those can only remove rows from HIVE_LOCKS.
* If they happen to be for the same txnid, there will be a WW conflict (in MS DB), if different txnid,
* checkLock() will in the worst case keep locks in Waiting state a little longer.
*/
@RetrySemantics.SafeToRetry("See @SafeToRetry")
private LockResponse checkLock(Connection dbConn, long extLockId) throws NoSuchLockException, NoSuchTxnException, TxnAbortedException, MetaException, SQLException {
TxnStore.MutexAPI.LockHandle handle = null;
Statement stmt = null;
ResultSet rs = null;
LockResponse response = new LockResponse();
/**
* todo: Longer term we should pass this from client somehow - this would be an optimization; once
* that is in place make sure to build and test "writeSet" below using OperationType not LockType
* With Static Partitions we assume that the query modifies exactly the partitions it locked. (not entirely
* realistic since Update/Delete may have some predicate that filters out all records out of
* some partition(s), but plausible). For DP, we acquire locks very wide (all known partitions),
* but for most queries only a fraction will actually be updated. #addDynamicPartitions() tells
* us exactly which ones were written to. Thus using this trick to kill a query early for
* DP queries may be too restrictive.
*/
boolean isPartOfDynamicPartitionInsert = true;
try {
/**
* checkLock() must be mutex'd against any other checkLock to make sure 2 conflicting locks
* are not granted by parallel checkLock() calls.
*/
handle = getMutexAPI().acquireLock(MUTEX_KEY.CheckLock.name());
// being acquired now
List<LockInfo> locksBeingChecked = getLockInfoFromLockId(dbConn, extLockId);
response.setLockid(extLockId);
LOG.debug("checkLock(): Setting savepoint. extLockId=" + JavaUtils.lockIdToString(extLockId));
Savepoint save = dbConn.setSavepoint();
StringBuilder query = new StringBuilder("select hl_lock_ext_id, " + "hl_lock_int_id, hl_db, hl_table, hl_partition, hl_lock_state, " + "hl_lock_type, hl_txnid from HIVE_LOCKS where hl_db in (");
Set<String> strings = new HashSet<>(locksBeingChecked.size());
// This the set of entities that the statement represented by extLockId wants to update
List<LockInfo> writeSet = new ArrayList<>();
for (LockInfo info : locksBeingChecked) {
strings.add(info.db);
if (!isPartOfDynamicPartitionInsert && info.type == LockType.SHARED_WRITE) {
writeSet.add(info);
}
}
if (!writeSet.isEmpty()) {
if (writeSet.get(0).txnId == 0) {
// Write operation always start a txn
throw new IllegalStateException("Found Write lock for " + JavaUtils.lockIdToString(extLockId) + " but no txnid");
}
stmt = dbConn.createStatement();
StringBuilder sb = new StringBuilder(" ws_database, ws_table, ws_partition, " + "ws_txnid, ws_commit_id " + "from WRITE_SET where ws_commit_id >= " + writeSet.get(0).txnId + // see commitTxn() for more info on this inequality
" and (");
for (LockInfo info : writeSet) {
sb.append("(ws_database = ").append(quoteString(info.db)).append(" and ws_table = ").append(quoteString(info.table)).append(" and ws_partition ").append(info.partition == null ? "is null" : "= " + quoteString(info.partition)).append(") or ");
}
// nuke trailing " or "
sb.setLength(sb.length() - 4);
sb.append(")");
// 1 row is sufficient to know we have to kill the query
rs = stmt.executeQuery(sqlGenerator.addLimitClause(1, sb.toString()));
if (rs.next()) {
/**
* if here, it means we found an already committed txn which overlaps with the current one and
* it updated the same resource the current txn wants to update. By First-committer-wins
* rule, current txn will not be allowed to commit so may as well kill it now; This is just an
* optimization to prevent wasting cluster resources to run a query which is known to be DOA.
* {@link #commitTxn(CommitTxnRequest)} has the primary responsibility to ensure this.
* checkLock() runs at READ_COMMITTED so you could have another (Hive) txn running commitTxn()
* in parallel and thus writing to WRITE_SET. commitTxn() logic is properly mutexed to ensure
* that we don't "miss" any WW conflicts. We could've mutexed the checkLock() and commitTxn()
* as well but this reduces concurrency for very little gain.
* Note that update/delete (which runs as dynamic partition insert) acquires a lock on the table,
* but WRITE_SET has entries for actual partitions updated. Thus this optimization will "miss"
* the WW conflict but it will be caught in commitTxn() where actual partitions written are known.
* This is OK since we want 2 concurrent updates that update different sets of partitions to both commit.
*/
String resourceName = rs.getString(1) + '/' + rs.getString(2);
String partName = rs.getString(3);
if (partName != null) {
resourceName += '/' + partName;
}
String msg = "Aborting " + JavaUtils.txnIdToString(writeSet.get(0).txnId) + " since a concurrent committed transaction [" + JavaUtils.txnIdToString(rs.getLong(4)) + "," + rs.getLong(5) + "] has already updated resource '" + resourceName + "'";
LOG.info(msg);
if (abortTxns(dbConn, Collections.singletonList(writeSet.get(0).txnId), true) != 1) {
throw new IllegalStateException(msg + " FAILED!");
}
dbConn.commit();
throw new TxnAbortedException(msg);
}
close(rs, stmt, null);
}
boolean first = true;
for (String s : strings) {
if (first)
first = false;
else
query.append(", ");
query.append('\'');
query.append(s);
query.append('\'');
}
query.append(")");
// If any of the table requests are null, then I need to pull all the
// table locks for this db.
boolean sawNull = false;
strings.clear();
for (LockInfo info : locksBeingChecked) {
if (info.table == null) {
sawNull = true;
break;
} else {
strings.add(info.table);
}
}
if (!sawNull) {
query.append(" and (hl_table is null or hl_table in(");
first = true;
for (String s : strings) {
if (first)
first = false;
else
query.append(", ");
query.append('\'');
query.append(s);
query.append('\'');
}
query.append("))");
// If any of the partition requests are null, then I need to pull all
// partition locks for this table.
sawNull = false;
strings.clear();
for (LockInfo info : locksBeingChecked) {
if (info.partition == null) {
sawNull = true;
break;
} else {
strings.add(info.partition);
}
}
if (!sawNull) {
query.append(" and (hl_partition is null or hl_partition in(");
first = true;
for (String s : strings) {
if (first)
first = false;
else
query.append(", ");
query.append('\'');
query.append(s);
query.append('\'');
}
query.append("))");
}
}
query.append(" and hl_lock_ext_id < ").append(extLockId);
LOG.debug("Going to execute query <" + query.toString() + ">");
stmt = dbConn.createStatement();
rs = stmt.executeQuery(query.toString());
SortedSet<LockInfo> lockSet = new TreeSet<LockInfo>(new LockInfoComparator());
while (rs.next()) {
lockSet.add(new LockInfo(rs));
}
// Turn the tree set into an array so we can move back and forth easily
// in it.
LockInfo[] locks = lockSet.toArray(new LockInfo[lockSet.size()]);
if (LOG.isTraceEnabled()) {
LOG.trace("Locks to check(full): ");
for (LockInfo info : locks) {
LOG.trace(" " + info);
}
}
for (LockInfo info : locksBeingChecked) {
// then just look at the other locks.
if (info.state == LockState.ACQUIRED) {
/**
*this is what makes this method @SafeToRetry
*/
continue;
}
// it or not.
for (int i = locks.length - 1; i >= 0; i--) {
// Check if we're operating on the same database, if not, move on
if (!info.db.equals(locks[i].db)) {
continue;
}
// check if they are operating on the same table, if not, move on.
if (info.table != null && locks[i].table != null && !info.table.equals(locks[i].table)) {
continue;
}
// check if they are operating on the same partition, if not, move on.
if (info.partition != null && locks[i].partition != null && !info.partition.equals(locks[i].partition)) {
continue;
}
// We've found something that matches what we're trying to lock,
// so figure out if we can lock it too.
LockAction lockAction = jumpTable.get(info.type).get(locks[i].type).get(locks[i].state);
LOG.debug("desired Lock: " + info + " checked Lock: " + locks[i] + " action: " + lockAction);
switch(lockAction) {
case WAIT:
if (!ignoreConflict(info, locks[i])) {
/*we acquire all locks for a given query atomically; if 1 blocks, all go into (remain) in
* Waiting state. wait() will undo any 'acquire()' which may have happened as part of
* this (metastore db) transaction and then we record which lock blocked the lock
* we were testing ('info').*/
wait(dbConn, save);
String sqlText = "update HIVE_LOCKS" + " set HL_BLOCKEDBY_EXT_ID=" + locks[i].extLockId + ", HL_BLOCKEDBY_INT_ID=" + locks[i].intLockId + " where HL_LOCK_EXT_ID=" + info.extLockId + " and HL_LOCK_INT_ID=" + info.intLockId;
LOG.debug("Executing sql: " + sqlText);
int updCnt = stmt.executeUpdate(sqlText);
if (updCnt != 1) {
shouldNeverHappen(info.txnId, info.extLockId, info.intLockId);
}
LOG.debug("Going to commit");
dbConn.commit();
response.setState(LockState.WAITING);
LOG.debug("Lock(" + info + ") waiting for Lock(" + locks[i] + ")");
return response;
}
// fall through to ACQUIRE
case ACQUIRE:
break;
case KEEP_LOOKING:
continue;
}
// so exit the loop and check next lock
break;
}
}
// if here, ther were no locks that blocked any locks in 'locksBeingChecked' - acquire them all
acquire(dbConn, stmt, locksBeingChecked);
// We acquired all of the locks, so commit and return acquired.
LOG.debug("Going to commit");
dbConn.commit();
response.setState(LockState.ACQUIRED);
} finally {
close(rs, stmt, null);
if (handle != null) {
handle.releaseLocks();
}
}
return response;
}
use of org.apache.hadoop.hive.metastore.api.TxnAbortedException in project hive by apache.
the class TestHeartbeatTimerTask method testRunHeartbeatFailsTxnAbortedException.
@Test
public void testRunHeartbeatFailsTxnAbortedException() throws Exception {
TxnAbortedException exception = new TxnAbortedException();
doThrow(exception).when(mockMetaStoreClient).heartbeat(TRANSACTION_ID, LOCK_ID);
task.run();
verify(mockListener).lockFailed(LOCK_ID, TRANSACTION_ID, Arrays.asList("DB.TABLE"), exception);
}
use of org.apache.hadoop.hive.metastore.api.TxnAbortedException in project hive by apache.
the class TestTxnHandler method testHeartbeatTxnAborted.
@Test
public void testHeartbeatTxnAborted() throws Exception {
// Test that when a transaction is aborted, the heartbeat fails
openTxn();
txnHandler.abortTxn(new AbortTxnRequest(1));
HeartbeatRequest h = new HeartbeatRequest();
h.setTxnid(1);
try {
txnHandler.heartbeat(h);
fail("Told there was a txn, when it should have been aborted.");
} catch (TxnAbortedException e) {
}
}
use of org.apache.hadoop.hive.metastore.api.TxnAbortedException in project hive by apache.
the class TestStreaming method testTimeOutReaper.
/**
* check that transactions that have not heartbeated and timedout get properly aborted
* @throws Exception
*/
@Test
public void testTimeOutReaper() throws Exception {
HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, dbName2, tblName2, null);
DelimitedInputWriter writer = new DelimitedInputWriter(fieldNames2, ",", endPt);
StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());
TransactionBatch txnBatch = connection.fetchTransactionBatch(5, writer);
txnBatch.beginNextTransaction();
conf.setTimeVar(HiveConf.ConfVars.HIVE_TIMEDOUT_TXN_REAPER_START, 0, TimeUnit.SECONDS);
// ensure txn timesout
conf.setTimeVar(HiveConf.ConfVars.HIVE_TXN_TIMEOUT, 1, TimeUnit.MILLISECONDS);
AcidHouseKeeperService houseKeeperService = new AcidHouseKeeperService();
houseKeeperService.setConf(conf);
houseKeeperService.run();
try {
// should fail because the TransactionBatch timed out
txnBatch.commit();
} catch (TransactionError e) {
Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException);
}
txnBatch.close();
txnBatch = connection.fetchTransactionBatch(10, writer);
txnBatch.beginNextTransaction();
txnBatch.commit();
txnBatch.beginNextTransaction();
houseKeeperService.run();
try {
// should fail because the TransactionBatch timed out
txnBatch.commit();
} catch (TransactionError e) {
Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException);
}
txnBatch.close();
connection.close();
}
use of org.apache.hadoop.hive.metastore.api.TxnAbortedException in project hive by apache.
the class TxnHandler method heartbeatTxnRange.
@Override
@RetrySemantics.SafeToRetry
public HeartbeatTxnRangeResponse heartbeatTxnRange(HeartbeatTxnRangeRequest rqst) throws MetaException {
try {
Connection dbConn = null;
Statement stmt = null;
HeartbeatTxnRangeResponse rsp = new HeartbeatTxnRangeResponse();
Set<Long> nosuch = new HashSet<>();
Set<Long> aborted = new HashSet<>();
rsp.setNosuch(nosuch);
rsp.setAborted(aborted);
try {
/**
* READ_COMMITTED is sufficient since {@link #heartbeatTxn(java.sql.Connection, long)}
* only has 1 update statement in it and
* we only update existing txns, i.e. nothing can add additional txns that this operation
* would care about (which would have required SERIALIZABLE)
*/
dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED);
/*do fast path first (in 1 statement) if doesn't work, rollback and do the long version*/
stmt = dbConn.createStatement();
List<String> queries = new ArrayList<>();
int numTxnsToHeartbeat = (int) (rqst.getMax() - rqst.getMin() + 1);
List<Long> txnIds = new ArrayList<>(numTxnsToHeartbeat);
for (long txn = rqst.getMin(); txn <= rqst.getMax(); txn++) {
txnIds.add(txn);
}
TxnUtils.buildQueryWithINClause(conf, queries, new StringBuilder("update TXNS set txn_last_heartbeat = " + getDbTime(dbConn) + " where txn_state = " + quoteChar(TXN_OPEN) + " and "), new StringBuilder(""), txnIds, "txn_id", true, false);
int updateCnt = 0;
for (String query : queries) {
LOG.debug("Going to execute update <" + query + ">");
updateCnt += stmt.executeUpdate(query);
}
if (updateCnt == numTxnsToHeartbeat) {
// fast pass worked, i.e. all txns we were asked to heartbeat were Open as expected
dbConn.commit();
return rsp;
}
// if here, do the slow path so that we can return info txns which were not in expected state
dbConn.rollback();
for (long txn = rqst.getMin(); txn <= rqst.getMax(); txn++) {
try {
heartbeatTxn(dbConn, txn);
} catch (NoSuchTxnException e) {
nosuch.add(txn);
} catch (TxnAbortedException e) {
aborted.add(txn);
}
}
return rsp;
} catch (SQLException e) {
LOG.debug("Going to rollback");
rollbackDBConn(dbConn);
checkRetryable(dbConn, e, "heartbeatTxnRange(" + rqst + ")");
throw new MetaException("Unable to select from transaction database " + StringUtils.stringifyException(e));
} finally {
close(null, stmt, dbConn);
}
} catch (RetryException e) {
return heartbeatTxnRange(rqst);
}
}
Aggregations