use of org.apache.hadoop.hive.metastore.api.TxnType in project hive by apache.
the class TxnHandler method abortTxns.
@Override
@RetrySemantics.Idempotent
public void abortTxns(AbortTxnsRequest rqst) throws MetaException {
List<Long> txnIds = rqst.getTxn_ids();
try {
Connection dbConn = null;
Statement stmt = null;
try {
dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED);
stmt = dbConn.createStatement();
List<String> queries = new ArrayList<>();
StringBuilder prefix = new StringBuilder("SELECT \"TXN_ID\", \"TXN_TYPE\" from \"TXNS\" where \"TXN_STATE\" = ").append(TxnStatus.OPEN).append(" and \"TXN_TYPE\" != ").append(TxnType.READ_ONLY.getValue()).append(" and ");
TxnUtils.buildQueryWithINClause(conf, queries, prefix, new StringBuilder(), txnIds, "\"TXN_ID\"", false, false);
Map<Long, TxnType> nonReadOnlyTxns = new HashMap<>();
for (String query : queries) {
LOG.debug("Going to execute query<" + query + ">");
try (ResultSet rs = stmt.executeQuery(sqlGenerator.addForUpdateClause(query))) {
while (rs.next()) {
TxnType txnType = TxnType.findByValue(rs.getInt(2));
nonReadOnlyTxns.put(rs.getLong(1), txnType);
}
}
}
int numAborted = abortTxns(dbConn, txnIds, false, false);
if (numAborted != txnIds.size()) {
LOG.warn("Abort Transactions command only aborted " + numAborted + " out of " + txnIds.size() + " transactions. It's possible that the other " + (txnIds.size() - numAborted) + " transactions have been aborted or committed, or the transaction ids are invalid.");
}
if (transactionalListeners != null) {
for (Long txnId : txnIds) {
MetaStoreListenerNotifier.notifyEventWithDirectSql(transactionalListeners, EventMessage.EventType.ABORT_TXN, new AbortTxnEvent(txnId, nonReadOnlyTxns.getOrDefault(txnId, TxnType.READ_ONLY)), dbConn, sqlGenerator);
}
}
LOG.debug("Going to commit");
dbConn.commit();
} catch (SQLException e) {
LOG.debug("Going to rollback: ", e);
rollbackDBConn(dbConn);
checkRetryable(e, "abortTxns(" + rqst + ")");
throw new MetaException("Unable to update transaction database " + StringUtils.stringifyException(e));
} finally {
closeStmt(stmt);
closeDbConn(dbConn);
}
} catch (RetryException e) {
abortTxns(rqst);
}
}
use of org.apache.hadoop.hive.metastore.api.TxnType in project hive by apache.
the class TxnHandler method openTxns.
private List<Long> openTxns(Connection dbConn, OpenTxnRequest rqst) throws SQLException, MetaException {
int numTxns = rqst.getNum_txns();
// Make sure the user has not requested an insane amount of txns.
int maxTxns = MetastoreConf.getIntVar(conf, ConfVars.TXN_MAX_OPEN_BATCH);
if (numTxns > maxTxns) {
numTxns = maxTxns;
}
List<PreparedStatement> insertPreparedStmts = null;
TxnType txnType = rqst.isSetTxn_type() ? rqst.getTxn_type() : TxnType.DEFAULT;
boolean isReplayedReplTxn = txnType == TxnType.REPL_CREATED;
boolean isHiveReplTxn = rqst.isSetReplPolicy() && txnType == TxnType.DEFAULT;
try {
if (isReplayedReplTxn) {
assert rqst.isSetReplPolicy();
List<Long> targetTxnIdList = getTargetTxnIdList(rqst.getReplPolicy(), rqst.getReplSrcTxnIds(), dbConn);
if (!targetTxnIdList.isEmpty()) {
if (targetTxnIdList.size() != rqst.getReplSrcTxnIds().size()) {
LOG.warn("target txn id number " + targetTxnIdList.toString() + " is not matching with source txn id number " + rqst.getReplSrcTxnIds().toString());
}
LOG.info("Target transactions " + targetTxnIdList.toString() + " are present for repl policy :" + rqst.getReplPolicy() + " and Source transaction id : " + rqst.getReplSrcTxnIds().toString());
return targetTxnIdList;
}
}
long minOpenTxnId = 0;
if (useMinHistoryLevel) {
minOpenTxnId = getMinOpenTxnIdWaterMark(dbConn);
}
List<Long> txnIds = new ArrayList<>(numTxns);
/*
* The getGeneratedKeys are not supported in every dbms, after executing a multi line insert.
* But it is supported in every used dbms for single line insert, even if the metadata says otherwise.
* If the getGeneratedKeys are not supported first we insert a random batchId in the TXN_META_INFO field,
* then the keys are selected beck with that batchid.
*/
boolean genKeySupport = dbProduct.supportsGetGeneratedKeys();
genKeySupport = genKeySupport || (numTxns == 1);
String insertQuery = String.format(TXNS_INSERT_QRY, getEpochFn(dbProduct), getEpochFn(dbProduct));
LOG.debug("Going to execute insert <" + insertQuery + ">");
try (PreparedStatement ps = dbConn.prepareStatement(insertQuery, new String[] { "TXN_ID" })) {
String state = genKeySupport ? TxnStatus.OPEN.getSqlConst() : TXN_TMP_STATE;
if (numTxns == 1) {
ps.setString(1, state);
ps.setString(2, rqst.getUser());
ps.setString(3, rqst.getHostname());
ps.setInt(4, txnType.getValue());
txnIds.addAll(executeTxnInsertBatchAndExtractGeneratedKeys(dbConn, genKeySupport, ps, false));
} else {
for (int i = 0; i < numTxns; ++i) {
ps.setString(1, state);
ps.setString(2, rqst.getUser());
ps.setString(3, rqst.getHostname());
ps.setInt(4, txnType.getValue());
ps.addBatch();
if ((i + 1) % maxBatchSize == 0) {
txnIds.addAll(executeTxnInsertBatchAndExtractGeneratedKeys(dbConn, genKeySupport, ps, true));
}
}
if (numTxns % maxBatchSize != 0) {
txnIds.addAll(executeTxnInsertBatchAndExtractGeneratedKeys(dbConn, genKeySupport, ps, true));
}
}
}
assert txnIds.size() == numTxns;
addTxnToMinHistoryLevel(dbConn, txnIds, minOpenTxnId);
if (isReplayedReplTxn) {
List<String> rowsRepl = new ArrayList<>(numTxns);
List<String> params = Collections.singletonList(rqst.getReplPolicy());
List<List<String>> paramsList = new ArrayList<>(numTxns);
for (int i = 0; i < numTxns; i++) {
rowsRepl.add("?," + rqst.getReplSrcTxnIds().get(i) + "," + txnIds.get(i));
paramsList.add(params);
}
insertPreparedStmts = sqlGenerator.createInsertValuesPreparedStmt(dbConn, "\"REPL_TXN_MAP\" (\"RTM_REPL_POLICY\", \"RTM_SRC_TXN_ID\", \"RTM_TARGET_TXN_ID\")", rowsRepl, paramsList);
for (PreparedStatement pst : insertPreparedStmts) {
pst.execute();
}
}
if (transactionalListeners != null && !isHiveReplTxn) {
MetaStoreListenerNotifier.notifyEventWithDirectSql(transactionalListeners, EventMessage.EventType.OPEN_TXN, new OpenTxnEvent(txnIds, txnType), dbConn, sqlGenerator);
}
return txnIds;
} finally {
if (insertPreparedStmts != null) {
for (PreparedStatement pst : insertPreparedStmts) {
pst.close();
}
}
}
}
use of org.apache.hadoop.hive.metastore.api.TxnType in project hive by apache.
the class TxnHandler method abortTxn.
@Override
@RetrySemantics.Idempotent
public void abortTxn(AbortTxnRequest rqst) throws NoSuchTxnException, MetaException, TxnAbortedException {
long txnid = rqst.getTxnid();
long sourceTxnId = -1;
boolean isReplayedReplTxn = TxnType.REPL_CREATED.equals(rqst.getTxn_type());
boolean isHiveReplTxn = rqst.isSetReplPolicy() && TxnType.DEFAULT.equals(rqst.getTxn_type());
try {
Connection dbConn = null;
Statement stmt = null;
try {
lockInternal();
dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED);
stmt = dbConn.createStatement();
if (isReplayedReplTxn) {
assert (rqst.isSetReplPolicy());
sourceTxnId = rqst.getTxnid();
List<Long> targetTxnIds = getTargetTxnIdList(rqst.getReplPolicy(), Collections.singletonList(sourceTxnId), dbConn);
if (targetTxnIds.isEmpty()) {
// Idempotent case where txn was already closed or abort txn event received without
// corresponding open txn event.
LOG.info("Target txn id is missing for source txn id : " + sourceTxnId + " and repl policy " + rqst.getReplPolicy());
return;
}
assert targetTxnIds.size() == 1;
txnid = targetTxnIds.get(0);
}
TxnType txnType = getOpenTxnTypeAndLock(stmt, txnid);
if (txnType == null) {
TxnStatus status = findTxnState(txnid, stmt);
if (status == TxnStatus.ABORTED) {
if (isReplayedReplTxn) {
// in case of replication, idempotent is taken care by getTargetTxnId
LOG.warn("Invalid state ABORTED for transactions started using replication replay task");
deleteReplTxnMapEntry(dbConn, sourceTxnId, rqst.getReplPolicy());
}
LOG.info("abortTxn(" + JavaUtils.txnIdToString(txnid) + ") requested by it is already " + TxnStatus.ABORTED);
return;
}
raiseTxnUnexpectedState(status, txnid);
}
abortTxns(dbConn, Collections.singletonList(txnid), true, isReplayedReplTxn);
if (isReplayedReplTxn) {
deleteReplTxnMapEntry(dbConn, sourceTxnId, rqst.getReplPolicy());
}
if (transactionalListeners != null && !isHiveReplTxn) {
MetaStoreListenerNotifier.notifyEventWithDirectSql(transactionalListeners, EventMessage.EventType.ABORT_TXN, new AbortTxnEvent(txnid, txnType), dbConn, sqlGenerator);
}
LOG.debug("Going to commit");
dbConn.commit();
} catch (SQLException e) {
LOG.debug("Going to rollback: ", e);
rollbackDBConn(dbConn);
checkRetryable(e, "abortTxn(" + rqst + ")");
throw new MetaException("Unable to update transaction database " + StringUtils.stringifyException(e));
} finally {
close(null, stmt, dbConn);
unlockInternal();
}
} catch (RetryException e) {
abortTxn(rqst);
}
}
use of org.apache.hadoop.hive.metastore.api.TxnType in project hive by apache.
the class TxnHandler method commitTxn.
/**
* Concurrency/isolation notes:
* This is mutexed with {@link #openTxns(OpenTxnRequest)} and other {@link #commitTxn(CommitTxnRequest)}
* operations using select4update on NEXT_TXN_ID. Also, mutexes on TXNS table for specific txnid:X
* see more notes below.
* In order to prevent lost updates, we need to determine if any 2 transactions overlap. Each txn
* is viewed as an interval [M,N]. M is the txnid and N is taken from the same NEXT_TXN_ID sequence
* so that we can compare commit time of txn T with start time of txn S. This sequence can be thought of
* as a logical time counter. If S.commitTime < T.startTime, T and S do NOT overlap.
*
* Motivating example:
* Suppose we have multi-statement transactions T and S both of which are attempting x = x + 1
* In order to prevent lost update problem, then the non-overlapping txns must lock in the snapshot
* that they read appropriately. In particular, if txns do not overlap, then one follows the other
* (assuming they write the same entity), and thus the 2nd must see changes of the 1st. We ensure
* this by locking in snapshot after
* {@link #openTxns(OpenTxnRequest)} call is made (see org.apache.hadoop.hive.ql.Driver.acquireLocksAndOpenTxn)
* and mutexing openTxn() with commit(). In other words, once a S.commit() starts we must ensure
* that txn T which will be considered a later txn, locks in a snapshot that includes the result
* of S's commit (assuming no other txns).
* As a counter example, suppose we have S[3,3] and T[4,4] (commitId=txnid means no other transactions
* were running in parallel). If T and S both locked in the same snapshot (for example commit of
* txnid:2, which is possible if commitTxn() and openTxnx() is not mutexed)
* 'x' would be updated to the same value by both, i.e. lost update.
*/
@Override
@RetrySemantics.Idempotent("No-op if already committed")
public void commitTxn(CommitTxnRequest rqst) throws NoSuchTxnException, TxnAbortedException, MetaException {
char isUpdateDelete = 'N';
long txnid = rqst.getTxnid();
long sourceTxnId = -1;
boolean isReplayedReplTxn = TxnType.REPL_CREATED.equals(rqst.getTxn_type());
boolean isHiveReplTxn = rqst.isSetReplPolicy() && TxnType.DEFAULT.equals(rqst.getTxn_type());
try {
Connection dbConn = null;
Statement stmt = null;
Long commitId = null;
try {
lockInternal();
dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED);
stmt = dbConn.createStatement();
if (rqst.isSetReplLastIdInfo()) {
updateReplId(dbConn, rqst.getReplLastIdInfo());
}
if (isReplayedReplTxn) {
assert (rqst.isSetReplPolicy());
sourceTxnId = rqst.getTxnid();
List<Long> targetTxnIds = getTargetTxnIdList(rqst.getReplPolicy(), Collections.singletonList(sourceTxnId), dbConn);
if (targetTxnIds.isEmpty()) {
// Idempotent case where txn was already closed or commit txn event received without
// corresponding open txn event.
LOG.info("Target txn id is missing for source txn id : " + sourceTxnId + " and repl policy " + rqst.getReplPolicy());
return;
}
assert targetTxnIds.size() == 1;
txnid = targetTxnIds.get(0);
}
/**
* Runs at READ_COMMITTED with S4U on TXNS row for "txnid". S4U ensures that no other
* operation can change this txn (such acquiring locks). While lock() and commitTxn()
* should not normally run concurrently (for same txn) but could due to bugs in the client
* which could then corrupt internal transaction manager state. Also competes with abortTxn().
*/
TxnType txnType = getOpenTxnTypeAndLock(stmt, txnid);
if (txnType == null) {
// if here, txn was not found (in expected state)
TxnStatus actualTxnStatus = findTxnState(txnid, stmt);
if (actualTxnStatus == TxnStatus.COMMITTED) {
if (isReplayedReplTxn) {
// in case of replication, idempotent is taken care by getTargetTxnId
LOG.warn("Invalid state COMMITTED for transactions started using replication replay task");
}
/**
* This makes the operation idempotent
* (assume that this is most likely due to retry logic)
*/
LOG.info("Nth commitTxn(" + JavaUtils.txnIdToString(txnid) + ") msg");
return;
}
raiseTxnUnexpectedState(actualTxnStatus, txnid);
}
String conflictSQLSuffix = "FROM \"TXN_COMPONENTS\" WHERE \"TC_TXNID\"=" + txnid + " AND \"TC_OPERATION_TYPE\" IN (" + OperationType.UPDATE + "," + OperationType.DELETE + ")";
long tempCommitId = generateTemporaryId();
if (txnType == TxnType.SOFT_DELETE || txnType == TxnType.COMPACTION) {
acquireTxnLock(stmt, false);
commitId = getHighWaterMark(stmt);
} else if (txnType != TxnType.READ_ONLY && !isReplayedReplTxn) {
String writeSetInsertSql = "INSERT INTO \"WRITE_SET\" (\"WS_DATABASE\", \"WS_TABLE\", \"WS_PARTITION\"," + " \"WS_TXNID\", \"WS_COMMIT_ID\", \"WS_OPERATION_TYPE\")" + " SELECT DISTINCT \"TC_DATABASE\", \"TC_TABLE\", \"TC_PARTITION\", \"TC_TXNID\", " + tempCommitId + ", \"TC_OPERATION_TYPE\" ";
if (isUpdateOrDelete(stmt, conflictSQLSuffix)) {
isUpdateDelete = 'Y';
// if here it means currently committing txn performed update/delete and we should check WW conflict
/**
* "select distinct" is used below because
* 1. once we get to multi-statement txns, we only care to record that something was updated once
* 2. if {@link #addDynamicPartitions(AddDynamicPartitions)} is retried by caller it may create
* duplicate entries in TXN_COMPONENTS
* but we want to add a PK on WRITE_SET which won't have unique rows w/o this distinct
* even if it includes all of its columns
*
* First insert into write_set using a temporary commitID, which will be updated in a separate call,
* see: {@link #updateWSCommitIdAndCleanUpMetadata(Statement, long, TxnType, Long, long)}}.
* This should decrease the scope of the S4U lock on the next_txn_id table.
*/
Savepoint undoWriteSetForCurrentTxn = dbConn.setSavepoint();
stmt.executeUpdate(writeSetInsertSql + (useMinHistoryLevel ? conflictSQLSuffix : "FROM \"TXN_COMPONENTS\" WHERE \"TC_TXNID\"=" + txnid + " AND \"TC_OPERATION_TYPE\" <> " + OperationType.COMPACT));
/**
* This S4U will mutex with other commitTxn() and openTxns().
* -1 below makes txn intervals look like [3,3] [4,4] if all txns are serial
* Note: it's possible to have several txns have the same commit id. Suppose 3 txns start
* at the same time and no new txns start until all 3 commit.
* We could've incremented the sequence for commitId as well but it doesn't add anything functionally.
*/
acquireTxnLock(stmt, false);
commitId = getHighWaterMark(stmt);
if (!rqst.isExclWriteEnabled()) {
/**
* see if there are any overlapping txns that wrote the same element, i.e. have a conflict
* Since entire commit operation is mutexed wrt other start/commit ops,
* committed.ws_commit_id <= current.ws_commit_id for all txns
* thus if committed.ws_commit_id < current.ws_txnid, transactions do NOT overlap
* For example, [17,20] is committed, [6,80] is being committed right now - these overlap
* [17,20] committed and [21,21] committing now - these do not overlap.
* [17,18] committed and [18,19] committing now - these overlap (here 18 started while 17 was still running)
*/
try (ResultSet rs = checkForWriteConflict(stmt, txnid)) {
if (rs.next()) {
// found a conflict, so let's abort the txn
String committedTxn = "[" + JavaUtils.txnIdToString(rs.getLong(1)) + "," + rs.getLong(2) + "]";
StringBuilder resource = new StringBuilder(rs.getString(3)).append("/").append(rs.getString(4));
String partitionName = rs.getString(5);
if (partitionName != null) {
resource.append('/').append(partitionName);
}
String msg = "Aborting [" + JavaUtils.txnIdToString(txnid) + "," + commitId + "]" + " due to a write conflict on " + resource + " committed by " + committedTxn + " " + rs.getString(7) + "/" + rs.getString(8);
// remove WRITE_SET info for current txn since it's about to abort
dbConn.rollback(undoWriteSetForCurrentTxn);
LOG.info(msg);
// todo: should make abortTxns() write something into TXNS.TXN_META_INFO about this
if (abortTxns(dbConn, Collections.singletonList(txnid), false, isReplayedReplTxn) != 1) {
throw new IllegalStateException(msg + " FAILED!");
}
dbConn.commit();
throw new TxnAbortedException(msg);
}
}
}
} else if (!useMinHistoryLevel) {
stmt.executeUpdate(writeSetInsertSql + "FROM \"TXN_COMPONENTS\" WHERE \"TC_TXNID\"=" + txnid + " AND \"TC_OPERATION_TYPE\" <> " + OperationType.COMPACT);
commitId = getHighWaterMark(stmt);
}
} else {
/*
* current txn didn't update/delete anything (may have inserted), so just proceed with commit
*
* We only care about commit id for write txns, so for RO (when supported) txns we don't
* have to mutex on NEXT_TXN_ID.
* Consider: if RO txn is after a W txn, then RO's openTxns() will be mutexed with W's
* commitTxn() because both do S4U on NEXT_TXN_ID and thus RO will see result of W txn.
* If RO < W, then there is no reads-from relationship.
* In replication flow we don't expect any write write conflict as it should have been handled at source.
*/
assert true;
}
if (txnType != TxnType.READ_ONLY && !isReplayedReplTxn) {
moveTxnComponentsToCompleted(stmt, txnid, isUpdateDelete);
} else if (isReplayedReplTxn) {
if (rqst.isSetWriteEventInfos()) {
String sql = String.format(COMPL_TXN_COMPONENTS_INSERT_QUERY, txnid, quoteChar(isUpdateDelete));
try (PreparedStatement pstmt = dbConn.prepareStatement(sql)) {
int insertCounter = 0;
for (WriteEventInfo writeEventInfo : rqst.getWriteEventInfos()) {
pstmt.setString(1, writeEventInfo.getDatabase());
pstmt.setString(2, writeEventInfo.getTable());
pstmt.setString(3, writeEventInfo.getPartition());
pstmt.setLong(4, writeEventInfo.getWriteId());
pstmt.addBatch();
insertCounter++;
if (insertCounter % maxBatchSize == 0) {
LOG.debug("Executing a batch of <" + sql + "> queries. Batch size: " + maxBatchSize);
pstmt.executeBatch();
}
}
if (insertCounter % maxBatchSize != 0) {
LOG.debug("Executing a batch of <" + sql + "> queries. Batch size: " + insertCounter % maxBatchSize);
pstmt.executeBatch();
}
}
}
deleteReplTxnMapEntry(dbConn, sourceTxnId, rqst.getReplPolicy());
}
updateWSCommitIdAndCleanUpMetadata(stmt, txnid, txnType, commitId, tempCommitId);
removeTxnsFromMinHistoryLevel(dbConn, ImmutableList.of(txnid));
if (rqst.isSetKeyValue()) {
updateKeyValueAssociatedWithTxn(rqst, stmt);
}
if (!isHiveReplTxn) {
createCommitNotificationEvent(dbConn, txnid, txnType);
}
LOG.debug("Going to commit");
dbConn.commit();
if (MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_EXT_ON)) {
Metrics.getOrCreateCounter(MetricsConstants.TOTAL_NUM_COMMITTED_TXNS).inc();
}
} catch (SQLException e) {
LOG.debug("Going to rollback: ", e);
rollbackDBConn(dbConn);
checkRetryable(e, "commitTxn(" + rqst + ")");
throw new MetaException("Unable to update transaction database " + StringUtils.stringifyException(e));
} finally {
close(null, stmt, dbConn);
unlockInternal();
}
} catch (RetryException e) {
commitTxn(rqst);
}
}
use of org.apache.hadoop.hive.metastore.api.TxnType in project hive by apache.
the class TxnHandler method openTxns.
/**
* Retry-by-caller note:
* Worst case, it will leave an open txn which will timeout.
*/
@Override
@RetrySemantics.Idempotent
public OpenTxnsResponse openTxns(OpenTxnRequest rqst) throws MetaException {
if (!tooManyOpenTxns && numOpenTxns.get() >= maxOpenTxns) {
tooManyOpenTxns = true;
}
if (tooManyOpenTxns) {
if (numOpenTxns.get() < maxOpenTxns * 0.9) {
tooManyOpenTxns = false;
} else {
LOG.warn("Maximum allowed number of open transactions (" + maxOpenTxns + ") has been " + "reached. Current number of open transactions: " + numOpenTxns);
throw new MetaException("Maximum allowed number of open transactions has been reached. " + "See hive.max.open.txns.");
}
}
int numTxns = rqst.getNum_txns();
if (numTxns <= 0) {
throw new MetaException("Invalid input for number of txns: " + numTxns);
}
try {
Connection dbConn = null;
Statement stmt = null;
try {
/*
* To make {@link #getOpenTxns()}/{@link #getOpenTxnsInfo()} work correctly, this operation must ensure
* that looking at the TXNS table every open transaction could be identified below a given High Water Mark.
* One way to do it, would be to serialize the openTxns call with a S4U lock, but that would cause
* performance degradation with high transaction load.
* To enable parallel openTxn calls, we define a time period (TXN_OPENTXN_TIMEOUT) and consider every
* transaction missing from the TXNS table in that period open, and prevent opening transaction outside
* the period.
* Example: At t[0] there is one open transaction in the TXNS table, T[1].
* T[2] acquires the next sequence at t[1] but only commits into the TXNS table at t[10].
* T[3] acquires its sequence at t[2], and commits into the TXNS table at t[3].
* Then T[3] calculates it’s snapshot at t[4] and puts T[1] and also T[2] in the snapshot’s
* open transaction list. T[1] because it is presented as open in TXNS,
* T[2] because it is a missing sequence.
*
* In the current design, there can be several metastore instances running in a given Warehouse.
* This makes ideas like reserving a range of IDs to save trips to DB impossible. For example,
* a client may go to MS1 and start a transaction with ID 500 to update a particular row.
* Now the same client will start another transaction, except it ends up on MS2 and may get
* transaction ID 400 and update the same row. Now the merge that happens to materialize the snapshot
* on read will thing the version of the row from transaction ID 500 is the latest one.
*
* Longer term we can consider running Active-Passive MS (at least wrt to ACID operations). This
* set could support a write-through cache for added performance.
*/
dbConn = getDbConn(Connection.TRANSACTION_READ_COMMITTED);
stmt = dbConn.createStatement();
/*
* The openTxn and commitTxn must be mutexed, when committing a not read only transaction.
* This is achieved by requesting a shared table lock here, and an exclusive one at commit.
* Since table locks are working in Derby, we don't need the lockInternal call here.
* Example: Suppose we have two transactions with update like x = x+1.
* We have T[3,3] that was using a value from a snapshot with T[2,2]. If we allow committing T[3,3]
* and opening T[4] parallel it is possible, that T[4] will be using the value from a snapshot with T[2,2],
* and we will have a lost update problem
*/
acquireTxnLock(stmt, true);
// Measure the time from acquiring the sequence value, till committing in the TXNS table
StopWatch generateTransactionWatch = new StopWatch();
generateTransactionWatch.start();
List<Long> txnIds = openTxns(dbConn, rqst);
LOG.debug("Going to commit");
dbConn.commit();
generateTransactionWatch.stop();
long elapsedMillis = generateTransactionWatch.getTime(TimeUnit.MILLISECONDS);
TxnType txnType = rqst.isSetTxn_type() ? rqst.getTxn_type() : TxnType.DEFAULT;
if (txnType != TxnType.READ_ONLY && elapsedMillis >= openTxnTimeOutMillis) {
/*
* The commit was too slow, we can not allow this to continue (except if it is read only,
* since that can not cause dirty reads).
* When calculating the snapshot for a given transaction, we look back for possible open transactions
* (that are not yet committed in the TXNS table), for TXN_OPENTXN_TIMEOUT period.
* We can not allow a write transaction, that was slower than TXN_OPENTXN_TIMEOUT to continue,
* because there can be other transactions running, that didn't considered this transactionId open,
* this could cause dirty reads.
*/
LOG.error("OpenTxnTimeOut exceeded commit duration {}, deleting transactionIds: {}", elapsedMillis, txnIds);
deleteInvalidOpenTransactions(dbConn, txnIds);
dbConn.commit();
/*
* We do not throw RetryException directly, to not circumvent the max retry limit
*/
throw new SQLException("OpenTxnTimeOut exceeded", MANUAL_RETRY);
}
return new OpenTxnsResponse(txnIds);
} catch (SQLException e) {
LOG.debug("Going to rollback: ", e);
rollbackDBConn(dbConn);
checkRetryable(e, "openTxns(" + rqst + ")");
throw new MetaException("Unable to select from transaction database " + StringUtils.stringifyException(e));
} finally {
close(null, stmt, dbConn);
}
} catch (RetryException e) {
return openTxns(rqst);
}
}
Aggregations