use of org.apache.hadoop.hive.metastore.api.LockResponse in project hive by apache.
the class TestInitiator method cleanEmptyAbortedTxns.
@Test
public void cleanEmptyAbortedTxns() throws Exception {
// Test that we are cleaning aborted transactions with no components left in txn_components.
// Put one aborted transaction with an entry in txn_components to make sure we don't
// accidently clean it too.
Table t = newTable("default", "ceat", false);
long txnid = openTxn();
LockComponent comp = new LockComponent(LockType.SHARED_WRITE, LockLevel.TABLE, "default");
comp.setTablename("ceat");
comp.setOperationType(DataOperationType.UPDATE);
List<LockComponent> components = new ArrayList<LockComponent>(1);
components.add(comp);
LockRequest req = new LockRequest(components, "me", "localhost");
req.setTxnid(txnid);
LockResponse res = txnHandler.lock(req);
txnHandler.abortTxn(new AbortTxnRequest(txnid));
conf.setIntVar(HiveConf.ConfVars.HIVE_TXN_MAX_OPEN_BATCH, TxnStore.TIMED_OUT_TXN_ABORT_BATCH_SIZE + 50);
OpenTxnsResponse resp = txnHandler.openTxns(new OpenTxnRequest(TxnStore.TIMED_OUT_TXN_ABORT_BATCH_SIZE + 50, "user", "hostname"));
txnHandler.abortTxns(new AbortTxnsRequest(resp.getTxn_ids()));
GetOpenTxnsResponse openTxns = txnHandler.getOpenTxns();
Assert.assertEquals(TxnStore.TIMED_OUT_TXN_ABORT_BATCH_SIZE + 50 + 1, openTxns.getOpen_txnsSize());
startInitiator();
openTxns = txnHandler.getOpenTxns();
Assert.assertEquals(1, openTxns.getOpen_txnsSize());
}
use of org.apache.hadoop.hive.metastore.api.LockResponse in project hive by apache.
the class TestInitiator method compactPartitionHighDeltaPct.
@Test
public void compactPartitionHighDeltaPct() throws Exception {
Table t = newTable("default", "cphdp", true);
Partition p = newPartition(t, "today");
addBaseFile(t, p, 20L, 20);
addDeltaFile(t, p, 21L, 22L, 2);
addDeltaFile(t, p, 23L, 24L, 2);
burnThroughTransactions("default", "cphdp", 23);
long txnid = openTxn();
LockComponent comp = new LockComponent(LockType.SHARED_WRITE, LockLevel.PARTITION, "default");
comp.setTablename("cphdp");
comp.setPartitionname("ds=today");
comp.setOperationType(DataOperationType.UPDATE);
List<LockComponent> components = new ArrayList<LockComponent>(1);
components.add(comp);
LockRequest req = new LockRequest(components, "me", "localhost");
req.setTxnid(txnid);
LockResponse res = txnHandler.lock(req);
long writeid = allocateWriteId("default", "cphdp", txnid);
Assert.assertEquals(24, writeid);
txnHandler.commitTxn(new CommitTxnRequest(txnid));
startInitiator();
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(1, compacts.size());
Assert.assertEquals("initiated", compacts.get(0).getState());
Assert.assertEquals("cphdp", compacts.get(0).getTablename());
Assert.assertEquals("ds=today", compacts.get(0).getPartitionname());
Assert.assertEquals(CompactionType.MAJOR, compacts.get(0).getType());
}
use of org.apache.hadoop.hive.metastore.api.LockResponse in project hive by apache.
the class TestInitiator method compactPartitionTooManyDeltas.
@Test
public void compactPartitionTooManyDeltas() throws Exception {
Table t = newTable("default", "cptmd", true);
Partition p = newPartition(t, "today");
addBaseFile(t, p, 200L, 200);
addDeltaFile(t, p, 201L, 201L, 1);
addDeltaFile(t, p, 202L, 202L, 1);
addDeltaFile(t, p, 203L, 203L, 1);
addDeltaFile(t, p, 204L, 204L, 1);
addDeltaFile(t, p, 205L, 205L, 1);
addDeltaFile(t, p, 206L, 206L, 1);
addDeltaFile(t, p, 207L, 207L, 1);
addDeltaFile(t, p, 208L, 208L, 1);
addDeltaFile(t, p, 209L, 209L, 1);
addDeltaFile(t, p, 210L, 210L, 1);
addDeltaFile(t, p, 211L, 211L, 1);
burnThroughTransactions("default", "cptmd", 210);
long txnid = openTxn();
LockComponent comp = new LockComponent(LockType.SHARED_WRITE, LockLevel.PARTITION, "default");
comp.setTablename("cptmd");
comp.setPartitionname("ds=today");
comp.setOperationType(DataOperationType.UPDATE);
List<LockComponent> components = new ArrayList<LockComponent>(1);
components.add(comp);
LockRequest req = new LockRequest(components, "me", "localhost");
req.setTxnid(txnid);
LockResponse res = txnHandler.lock(req);
long writeid = allocateWriteId("default", "cptmd", txnid);
Assert.assertEquals(211, writeid);
txnHandler.commitTxn(new CommitTxnRequest(txnid));
startInitiator();
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(1, compacts.size());
Assert.assertEquals("initiated", compacts.get(0).getState());
Assert.assertEquals("cptmd", compacts.get(0).getTablename());
Assert.assertEquals("ds=today", compacts.get(0).getPartitionname());
Assert.assertEquals(CompactionType.MINOR, compacts.get(0).getType());
}
use of org.apache.hadoop.hive.metastore.api.LockResponse in project hive by apache.
the class TestInitiator method noCompactTableDynamicPartitioning.
@Test
public void noCompactTableDynamicPartitioning() throws Exception {
Table t = newTable("default", "nctdp", true);
Partition p = newPartition(t, "today");
addBaseFile(t, p, 20L, 20);
addDeltaFile(t, p, 21L, 22L, 2);
addDeltaFile(t, p, 23L, 24L, 2);
burnThroughTransactions("default", "nctdp", 23);
long txnid = openTxn();
LockComponent comp = new LockComponent(LockType.SHARED_WRITE, LockLevel.TABLE, "default");
comp.setTablename("nctdp");
comp.setOperationType(DataOperationType.UPDATE);
List<LockComponent> components = new ArrayList<LockComponent>(1);
components.add(comp);
LockRequest req = new LockRequest(components, "me", "localhost");
req.setTxnid(txnid);
LockResponse res = txnHandler.lock(req);
long writeid = allocateWriteId("default", "nctdp", txnid);
Assert.assertEquals(24, writeid);
txnHandler.commitTxn(new CommitTxnRequest(txnid));
startInitiator();
ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
List<ShowCompactResponseElement> compacts = rsp.getCompacts();
Assert.assertEquals(0, compacts.size());
}
use of org.apache.hadoop.hive.metastore.api.LockResponse in project hive by apache.
the class TxnHandler method checkLock.
/**
* Lock acquisition is meant to be fair, so every lock can only block on some lock with smaller
* hl_lock_ext_id by only checking earlier locks.
*
* For any given SQL statement all locks required by it are grouped under single extLockId and are
* granted all at once or all locks wait.
*
* This is expected to run at READ_COMMITTED.
*
* If there is a concurrent commitTxn/rollbackTxn, those can only remove rows from HIVE_LOCKS.
* If they happen to be for the same txnid, there will be a WW conflict (in MS DB), if different txnid,
* checkLock() will in the worst case keep locks in Waiting state a little longer.
*/
@RetrySemantics.SafeToRetry("See @SafeToRetry")
private LockResponse checkLock(Connection dbConn, long extLockId) throws NoSuchLockException, NoSuchTxnException, TxnAbortedException, MetaException, SQLException {
TxnStore.MutexAPI.LockHandle handle = null;
Statement stmt = null;
ResultSet rs = null;
LockResponse response = new LockResponse();
/**
* todo: Longer term we should pass this from client somehow - this would be an optimization; once
* that is in place make sure to build and test "writeSet" below using OperationType not LockType
* With Static Partitions we assume that the query modifies exactly the partitions it locked. (not entirely
* realistic since Update/Delete may have some predicate that filters out all records out of
* some partition(s), but plausible). For DP, we acquire locks very wide (all known partitions),
* but for most queries only a fraction will actually be updated. #addDynamicPartitions() tells
* us exactly which ones were written to. Thus using this trick to kill a query early for
* DP queries may be too restrictive.
*/
boolean isPartOfDynamicPartitionInsert = true;
try {
/**
* checkLock() must be mutex'd against any other checkLock to make sure 2 conflicting locks
* are not granted by parallel checkLock() calls.
*/
handle = getMutexAPI().acquireLock(MUTEX_KEY.CheckLock.name());
// being acquired now
List<LockInfo> locksBeingChecked = getLockInfoFromLockId(dbConn, extLockId);
response.setLockid(extLockId);
LOG.debug("checkLock(): Setting savepoint. extLockId=" + JavaUtils.lockIdToString(extLockId));
Savepoint save = dbConn.setSavepoint();
StringBuilder query = new StringBuilder("select hl_lock_ext_id, " + "hl_lock_int_id, hl_db, hl_table, hl_partition, hl_lock_state, " + "hl_lock_type, hl_txnid from HIVE_LOCKS where hl_db in (");
Set<String> strings = new HashSet<>(locksBeingChecked.size());
// This the set of entities that the statement represented by extLockId wants to update
List<LockInfo> writeSet = new ArrayList<>();
for (LockInfo info : locksBeingChecked) {
strings.add(info.db);
if (!isPartOfDynamicPartitionInsert && info.type == LockType.SHARED_WRITE) {
writeSet.add(info);
}
}
if (!writeSet.isEmpty()) {
if (writeSet.get(0).txnId == 0) {
// Write operation always start a txn
throw new IllegalStateException("Found Write lock for " + JavaUtils.lockIdToString(extLockId) + " but no txnid");
}
stmt = dbConn.createStatement();
StringBuilder sb = new StringBuilder(" ws_database, ws_table, ws_partition, " + "ws_txnid, ws_commit_id " + "from WRITE_SET where ws_commit_id >= " + writeSet.get(0).txnId + // see commitTxn() for more info on this inequality
" and (");
for (LockInfo info : writeSet) {
sb.append("(ws_database = ").append(quoteString(info.db)).append(" and ws_table = ").append(quoteString(info.table)).append(" and ws_partition ").append(info.partition == null ? "is null" : "= " + quoteString(info.partition)).append(") or ");
}
// nuke trailing " or "
sb.setLength(sb.length() - 4);
sb.append(")");
// 1 row is sufficient to know we have to kill the query
rs = stmt.executeQuery(sqlGenerator.addLimitClause(1, sb.toString()));
if (rs.next()) {
/**
* if here, it means we found an already committed txn which overlaps with the current one and
* it updated the same resource the current txn wants to update. By First-committer-wins
* rule, current txn will not be allowed to commit so may as well kill it now; This is just an
* optimization to prevent wasting cluster resources to run a query which is known to be DOA.
* {@link #commitTxn(CommitTxnRequest)} has the primary responsibility to ensure this.
* checkLock() runs at READ_COMMITTED so you could have another (Hive) txn running commitTxn()
* in parallel and thus writing to WRITE_SET. commitTxn() logic is properly mutexed to ensure
* that we don't "miss" any WW conflicts. We could've mutexed the checkLock() and commitTxn()
* as well but this reduces concurrency for very little gain.
* Note that update/delete (which runs as dynamic partition insert) acquires a lock on the table,
* but WRITE_SET has entries for actual partitions updated. Thus this optimization will "miss"
* the WW conflict but it will be caught in commitTxn() where actual partitions written are known.
* This is OK since we want 2 concurrent updates that update different sets of partitions to both commit.
*/
String resourceName = rs.getString(1) + '/' + rs.getString(2);
String partName = rs.getString(3);
if (partName != null) {
resourceName += '/' + partName;
}
String msg = "Aborting " + JavaUtils.txnIdToString(writeSet.get(0).txnId) + " since a concurrent committed transaction [" + JavaUtils.txnIdToString(rs.getLong(4)) + "," + rs.getLong(5) + "] has already updated resource '" + resourceName + "'";
LOG.info(msg);
if (abortTxns(dbConn, Collections.singletonList(writeSet.get(0).txnId), true) != 1) {
throw new IllegalStateException(msg + " FAILED!");
}
dbConn.commit();
throw new TxnAbortedException(msg);
}
close(rs, stmt, null);
}
boolean first = true;
for (String s : strings) {
if (first)
first = false;
else
query.append(", ");
query.append('\'');
query.append(s);
query.append('\'');
}
query.append(")");
// If any of the table requests are null, then I need to pull all the
// table locks for this db.
boolean sawNull = false;
strings.clear();
for (LockInfo info : locksBeingChecked) {
if (info.table == null) {
sawNull = true;
break;
} else {
strings.add(info.table);
}
}
if (!sawNull) {
query.append(" and (hl_table is null or hl_table in(");
first = true;
for (String s : strings) {
if (first)
first = false;
else
query.append(", ");
query.append('\'');
query.append(s);
query.append('\'');
}
query.append("))");
// If any of the partition requests are null, then I need to pull all
// partition locks for this table.
sawNull = false;
strings.clear();
for (LockInfo info : locksBeingChecked) {
if (info.partition == null) {
sawNull = true;
break;
} else {
strings.add(info.partition);
}
}
if (!sawNull) {
query.append(" and (hl_partition is null or hl_partition in(");
first = true;
for (String s : strings) {
if (first)
first = false;
else
query.append(", ");
query.append('\'');
query.append(s);
query.append('\'');
}
query.append("))");
}
}
query.append(" and hl_lock_ext_id < ").append(extLockId);
LOG.debug("Going to execute query <" + query.toString() + ">");
stmt = dbConn.createStatement();
rs = stmt.executeQuery(query.toString());
SortedSet<LockInfo> lockSet = new TreeSet<LockInfo>(new LockInfoComparator());
while (rs.next()) {
lockSet.add(new LockInfo(rs));
}
// Turn the tree set into an array so we can move back and forth easily
// in it.
LockInfo[] locks = lockSet.toArray(new LockInfo[lockSet.size()]);
if (LOG.isTraceEnabled()) {
LOG.trace("Locks to check(full): ");
for (LockInfo info : locks) {
LOG.trace(" " + info);
}
}
for (LockInfo info : locksBeingChecked) {
// then just look at the other locks.
if (info.state == LockState.ACQUIRED) {
/**
*this is what makes this method @SafeToRetry
*/
continue;
}
// it or not.
for (int i = locks.length - 1; i >= 0; i--) {
// Check if we're operating on the same database, if not, move on
if (!info.db.equals(locks[i].db)) {
continue;
}
// check if they are operating on the same table, if not, move on.
if (info.table != null && locks[i].table != null && !info.table.equals(locks[i].table)) {
continue;
}
// check if they are operating on the same partition, if not, move on.
if (info.partition != null && locks[i].partition != null && !info.partition.equals(locks[i].partition)) {
continue;
}
// We've found something that matches what we're trying to lock,
// so figure out if we can lock it too.
LockAction lockAction = jumpTable.get(info.type).get(locks[i].type).get(locks[i].state);
LOG.debug("desired Lock: " + info + " checked Lock: " + locks[i] + " action: " + lockAction);
switch(lockAction) {
case WAIT:
if (!ignoreConflict(info, locks[i])) {
/*we acquire all locks for a given query atomically; if 1 blocks, all go into (remain) in
* Waiting state. wait() will undo any 'acquire()' which may have happened as part of
* this (metastore db) transaction and then we record which lock blocked the lock
* we were testing ('info').*/
wait(dbConn, save);
String sqlText = "update HIVE_LOCKS" + " set HL_BLOCKEDBY_EXT_ID=" + locks[i].extLockId + ", HL_BLOCKEDBY_INT_ID=" + locks[i].intLockId + " where HL_LOCK_EXT_ID=" + info.extLockId + " and HL_LOCK_INT_ID=" + info.intLockId;
LOG.debug("Executing sql: " + sqlText);
int updCnt = stmt.executeUpdate(sqlText);
if (updCnt != 1) {
shouldNeverHappen(info.txnId, info.extLockId, info.intLockId);
}
LOG.debug("Going to commit");
dbConn.commit();
response.setState(LockState.WAITING);
LOG.debug("Lock(" + info + ") waiting for Lock(" + locks[i] + ")");
return response;
}
// fall through to ACQUIRE
case ACQUIRE:
break;
case KEEP_LOOKING:
continue;
}
// so exit the loop and check next lock
break;
}
}
// if here, ther were no locks that blocked any locks in 'locksBeingChecked' - acquire them all
acquire(dbConn, stmt, locksBeingChecked);
// We acquired all of the locks, so commit and return acquired.
LOG.debug("Going to commit");
dbConn.commit();
response.setState(LockState.ACQUIRED);
} finally {
close(rs, stmt, null);
if (handle != null) {
handle.releaseLocks();
}
}
return response;
}
Aggregations