use of org.apache.cassandra.exceptions.WriteTimeoutException in project cassandra by apache.
the class CommitLogSegmentManagerCDCTest method testSegmentFlaggingOnCreation.
@Test
public void testSegmentFlaggingOnCreation() throws Throwable {
CommitLogSegmentManagerCDC cdcMgr = (CommitLogSegmentManagerCDC) CommitLog.instance.segmentManager;
String ct = createTable("CREATE TABLE %s (idx int, data text, primary key(idx)) WITH cdc=true;");
int origSize = DatabaseDescriptor.getCDCSpaceInMB();
try {
DatabaseDescriptor.setCDCSpaceInMB(16);
TableMetadata ccfm = Keyspace.open(keyspace()).getColumnFamilyStore(ct).metadata();
// Spin until we hit CDC capacity and make sure we get a WriteTimeout
try {
for (int i = 0; i < 1000; i++) {
new RowUpdateBuilder(ccfm, 0, i).add("data", randomizeBuffer(DatabaseDescriptor.getCommitLogSegmentSize() / 3)).build().apply();
}
Assert.fail("Expected WriteTimeoutException from full CDC but did not receive it.");
} catch (WriteTimeoutException e) {
}
expectCurrentCDCState(CDCState.FORBIDDEN);
CommitLog.instance.forceRecycleAllSegments();
cdcMgr.awaitManagementTasksCompletion();
new File(DatabaseDescriptor.getCDCLogLocation()).listFiles()[0].delete();
cdcMgr.updateCDCTotalSize();
// Confirm cdc update process changes flag on active segment
expectCurrentCDCState(CDCState.PERMITTED);
// Clear out archived CDC files
for (File f : new File(DatabaseDescriptor.getCDCLogLocation()).listFiles()) {
FileUtils.deleteWithConfirm(f);
}
// Set space to 0, confirm newly allocated segments are FORBIDDEN
DatabaseDescriptor.setCDCSpaceInMB(0);
CommitLog.instance.forceRecycleAllSegments();
CommitLog.instance.segmentManager.awaitManagementTasksCompletion();
expectCurrentCDCState(CDCState.FORBIDDEN);
} finally {
DatabaseDescriptor.setCDCSpaceInMB(origSize);
}
}
use of org.apache.cassandra.exceptions.WriteTimeoutException in project cassandra by apache.
the class CommitLogSegmentManagerCDCTest method testCDCWriteTimeout.
@Test
public void testCDCWriteTimeout() throws Throwable {
createTable("CREATE TABLE %s (idx int, data text, primary key(idx)) WITH cdc=true;");
CommitLogSegmentManagerCDC cdcMgr = (CommitLogSegmentManagerCDC) CommitLog.instance.segmentManager;
TableMetadata cfm = currentTableMetadata();
// Confirm that logic to check for whether or not we can allocate new CDC segments works
Integer originalCDCSize = DatabaseDescriptor.getCDCSpaceInMB();
try {
DatabaseDescriptor.setCDCSpaceInMB(32);
// Spin until we hit CDC capacity and make sure we get a WriteTimeout
try {
// Should trigger on anything < 20:1 compression ratio during compressed test
for (int i = 0; i < 100; i++) {
new RowUpdateBuilder(cfm, 0, i).add("data", randomizeBuffer(DatabaseDescriptor.getCommitLogSegmentSize() / 3)).build().apply();
}
Assert.fail("Expected WriteTimeoutException from full CDC but did not receive it.");
} catch (WriteTimeoutException e) {
// expected, do nothing
}
expectCurrentCDCState(CDCState.FORBIDDEN);
// Confirm we can create a non-cdc table and write to it even while at cdc capacity
createTable("CREATE TABLE %s (idx int, data text, primary key(idx)) WITH cdc=false;");
execute("INSERT INTO %s (idx, data) VALUES (1, '1');");
// Confirm that, on flush+recyle, we see files show up in cdc_raw
Keyspace.open(keyspace()).getColumnFamilyStore(currentTable()).forceBlockingFlush();
CommitLog.instance.forceRecycleAllSegments();
cdcMgr.awaitManagementTasksCompletion();
Assert.assertTrue("Expected files to be moved to overflow.", getCDCRawCount() > 0);
// Simulate a CDC consumer reading files then deleting them
for (File f : new File(DatabaseDescriptor.getCDCLogLocation()).listFiles()) FileUtils.deleteWithConfirm(f);
// Update size tracker to reflect deleted files. Should flip flag on current allocatingFrom to allow.
cdcMgr.updateCDCTotalSize();
expectCurrentCDCState(CDCState.PERMITTED);
} finally {
DatabaseDescriptor.setCDCSpaceInMB(originalCDCSize);
}
}
use of org.apache.cassandra.exceptions.WriteTimeoutException in project cassandra by apache.
the class Keyspace method applyInternal.
/**
* This method appends a row to the global CommitLog, then updates memtables and indexes.
*
* @param mutation the row to write. Must not be modified after calling apply, since commitlog append
* may happen concurrently, depending on the CL Executor type.
* @param writeCommitLog false to disable commitlog append entirely
* @param updateIndexes false to disable index updates (used by CollationController "defragmenting")
* @param isDroppable true if this should throw WriteTimeoutException if it does not acquire lock within write_request_timeout_in_ms
* @param isDeferrable true if caller is not waiting for future to complete, so that future may be deferred
*/
private CompletableFuture<?> applyInternal(final Mutation mutation, final boolean writeCommitLog, boolean updateIndexes, boolean isDroppable, boolean isDeferrable, CompletableFuture<?> future) {
if (TEST_FAIL_WRITES && metadata.name.equals(TEST_FAIL_WRITES_KS))
throw new RuntimeException("Testing write failures");
Lock[] locks = null;
boolean requiresViewUpdate = updateIndexes && viewManager.updatesAffectView(Collections.singleton(mutation), false);
if (requiresViewUpdate) {
mutation.viewLockAcquireStart.compareAndSet(0L, System.currentTimeMillis());
// the order of lock acquisition doesn't matter (from a deadlock perspective) because we only use tryLock()
Collection<TableId> tableIds = mutation.getTableIds();
Iterator<TableId> idIterator = tableIds.iterator();
locks = new Lock[tableIds.size()];
for (int i = 0; i < tableIds.size(); i++) {
TableId tableId = idIterator.next();
int lockKey = Objects.hash(mutation.key().getKey(), tableId);
while (true) {
Lock lock = null;
if (TEST_FAIL_MV_LOCKS_COUNT == 0)
lock = ViewManager.acquireLockFor(lockKey);
else
TEST_FAIL_MV_LOCKS_COUNT--;
if (lock == null) {
//throw WTE only if request is droppable
if (isDroppable && (System.currentTimeMillis() - mutation.createdAt) > DatabaseDescriptor.getWriteRpcTimeout()) {
for (int j = 0; j < i; j++) locks[j].unlock();
logger.trace("Could not acquire lock for {} and table {}", ByteBufferUtil.bytesToHex(mutation.key().getKey()), columnFamilyStores.get(tableId).name);
Tracing.trace("Could not acquire MV lock");
if (future != null) {
future.completeExceptionally(new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1));
return future;
} else
throw new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1);
} else if (isDeferrable) {
for (int j = 0; j < i; j++) locks[j].unlock();
// This view update can't happen right now. so rather than keep this thread busy
// we will re-apply ourself to the queue and try again later
final CompletableFuture<?> mark = future;
StageManager.getStage(Stage.MUTATION).execute(() -> applyInternal(mutation, writeCommitLog, true, isDroppable, true, mark));
return future;
} else {
// being blocked by waiting for futures which will never be processed as all workers are blocked
try {
// Wait a little bit before retrying to lock
Thread.sleep(10);
} catch (InterruptedException e) {
// Just continue
}
continue;
}
} else {
locks[i] = lock;
}
break;
}
}
long acquireTime = System.currentTimeMillis() - mutation.viewLockAcquireStart.get();
// Bulk non-droppable operations (e.g. commitlog replay, hint delivery) are not measured
if (isDroppable) {
for (TableId tableId : tableIds) columnFamilyStores.get(tableId).metric.viewLockAcquireTime.update(acquireTime, TimeUnit.MILLISECONDS);
}
}
int nowInSec = FBUtilities.nowInSeconds();
try (OpOrder.Group opGroup = writeOrder.start()) {
// write the mutation to the commitlog and memtables
CommitLogPosition commitLogPosition = null;
if (writeCommitLog) {
Tracing.trace("Appending to commitlog");
commitLogPosition = CommitLog.instance.add(mutation);
}
for (PartitionUpdate upd : mutation.getPartitionUpdates()) {
ColumnFamilyStore cfs = columnFamilyStores.get(upd.metadata().id);
if (cfs == null) {
logger.error("Attempting to mutate non-existant table {} ({}.{})", upd.metadata().id, upd.metadata().keyspace, upd.metadata().name);
continue;
}
AtomicLong baseComplete = new AtomicLong(Long.MAX_VALUE);
if (requiresViewUpdate) {
try {
Tracing.trace("Creating materialized view mutations from base table replica");
viewManager.forTable(upd.metadata().id).pushViewReplicaUpdates(upd, writeCommitLog, baseComplete);
} catch (Throwable t) {
JVMStabilityInspector.inspectThrowable(t);
logger.error(String.format("Unknown exception caught while attempting to update MaterializedView! %s", upd.metadata().toString()), t);
throw t;
}
}
Tracing.trace("Adding to {} memtable", upd.metadata().name);
UpdateTransaction indexTransaction = updateIndexes ? cfs.indexManager.newUpdateTransaction(upd, opGroup, nowInSec) : UpdateTransaction.NO_OP;
cfs.apply(upd, indexTransaction, opGroup, commitLogPosition);
if (requiresViewUpdate)
baseComplete.set(System.currentTimeMillis());
}
if (future != null) {
future.complete(null);
}
return future;
} finally {
if (locks != null) {
for (Lock lock : locks) if (lock != null)
lock.unlock();
}
}
}
use of org.apache.cassandra.exceptions.WriteTimeoutException in project cassandra by apache.
the class StorageProxy method doPaxos.
/**
* Performs the Paxos rounds for a given proposal, retrying when preempted until the timeout.
*
* <p>The main 'configurable' of this method is the {@code createUpdateProposal} method: it is called by the method
* once a ballot has been successfully 'prepared' to generate the update to 'propose' (and commit if the proposal is
* successful). That method also generates the result that the whole method will return. Note that due to retrying,
* this method may be called multiple times and does not have to return the same results.
*
* @param metadata the table to update with Paxos.
* @param key the partition updated.
* @param consistencyForPaxos the serial consistency of the operation (either {@link ConsistencyLevel#SERIAL} or
* {@link ConsistencyLevel#LOCAL_SERIAL}).
* @param consistencyForReplayCommits the consistency for the commit phase of "replayed" in-progress operations.
* @param consistencyForCommit the consistency for the commit phase of _this_ operation update.
* @param queryStartNanoTime the nano time for the start of the query this is part of. This is the base time for
* timeouts.
* @param casMetrics the metrics to update for this operation.
* @param createUpdateProposal method called after a successful 'prepare' phase to obtain 1) the actual update of
* this operation and 2) the result that the whole method should return. This can return {@code null} in the
* special where, after having "prepared" (and thus potentially replayed in-progress upgdates), we don't want
* to propose anything (the whole method then return {@code null}).
* @return the second element of the pair returned by {@code createUpdateProposal} (for the last call of that method
* if that method is called multiple times due to retries).
*/
private static RowIterator doPaxos(TableMetadata metadata, DecoratedKey key, ConsistencyLevel consistencyForPaxos, ConsistencyLevel consistencyForReplayCommits, ConsistencyLevel consistencyForCommit, long queryStartNanoTime, CASClientRequestMetrics casMetrics, Supplier<Pair<PartitionUpdate, RowIterator>> createUpdateProposal) throws UnavailableException, IsBootstrappingException, RequestFailureException, RequestTimeoutException, InvalidRequestException {
int contentions = 0;
Keyspace keyspace = Keyspace.open(metadata.keyspace);
AbstractReplicationStrategy latestRs = keyspace.getReplicationStrategy();
try {
consistencyForPaxos.validateForCas();
consistencyForReplayCommits.validateForCasCommit(latestRs);
consistencyForCommit.validateForCasCommit(latestRs);
long timeoutNanos = DatabaseDescriptor.getCasContentionTimeout(NANOSECONDS);
while (nanoTime() - queryStartNanoTime < timeoutNanos) {
// for simplicity, we'll do a single liveness check at the start of each attempt
ReplicaPlan.ForPaxosWrite replicaPlan = ReplicaPlans.forPaxos(keyspace, key, consistencyForPaxos);
latestRs = replicaPlan.replicationStrategy();
PaxosBallotAndContention pair = beginAndRepairPaxos(queryStartNanoTime, key, metadata, replicaPlan, consistencyForPaxos, consistencyForReplayCommits, casMetrics);
final UUID ballot = pair.ballot;
contentions += pair.contentions;
Pair<PartitionUpdate, RowIterator> proposalPair = createUpdateProposal.get();
// See method javadoc: null here is code for "stop here and return null".
if (proposalPair == null)
return null;
Commit proposal = Commit.newProposal(ballot, proposalPair.left);
Tracing.trace("CAS precondition is met; proposing client-requested updates for {}", ballot);
if (proposePaxos(proposal, replicaPlan, true, queryStartNanoTime)) {
// them), this is worth bothering.
if (!proposal.update.isEmpty())
commitPaxos(proposal, consistencyForCommit, true, queryStartNanoTime);
RowIterator result = proposalPair.right;
if (result != null)
Tracing.trace("CAS did not apply");
else
Tracing.trace("CAS applied successfully");
return result;
}
Tracing.trace("Paxos proposal not accepted (pre-empted by a higher ballot)");
contentions++;
Uninterruptibles.sleepUninterruptibly(ThreadLocalRandom.current().nextInt(100), TimeUnit.MILLISECONDS);
// continue to retry
}
} catch (CasWriteTimeoutException e) {
// Might be thrown by beginRepairAndPaxos. In that case, any contention that happened within the method and
// led up to the timeout was not accounted in our local 'contentions' variable and we add it now so it the
// contention recorded in the finally is correct.
contentions += e.contentions;
throw e;
} catch (WriteTimeoutException e) {
// Might be thrown by proposePaxos or commitPaxos
throw new CasWriteTimeoutException(e.writeType, e.consistency, e.received, e.blockFor, contentions);
} finally {
recordCasContention(metadata, key, casMetrics, contentions);
}
throw new CasWriteTimeoutException(WriteType.CAS, consistencyForPaxos, 0, consistencyForPaxos.blockFor(latestRs), contentions);
}
use of org.apache.cassandra.exceptions.WriteTimeoutException in project cassandra by apache.
the class StorageProxy method proposePaxos.
/**
* Propose the {@param proposal} accoding to the {@param replicaPlan}.
* When {@param backoffIfPartial} is true, the proposer backs off when seeing the proposal being accepted by some but not a quorum.
* The result of the cooresponding CAS in uncertain as the accepted proposal may or may not be spread to other nodes in later rounds.
*/
private static boolean proposePaxos(Commit proposal, ReplicaPlan.ForPaxosWrite replicaPlan, boolean backoffIfPartial, long queryStartNanoTime) throws WriteTimeoutException, CasWriteUnknownResultException {
ProposeCallback callback = new ProposeCallback(replicaPlan.contacts().size(), replicaPlan.requiredParticipants(), !backoffIfPartial, replicaPlan.consistencyLevel(), queryStartNanoTime);
Message<Commit> message = Message.out(PAXOS_PROPOSE_REQ, proposal);
for (Replica replica : replicaPlan.contacts()) {
if (replica.isSelf()) {
PAXOS_PROPOSE_REQ.stage.execute(() -> {
try {
Message<Boolean> response = message.responseWith(doPropose(proposal));
callback.onResponse(response);
} catch (Exception ex) {
logger.error("Failed paxos propose locally", ex);
}
});
} else {
MessagingService.instance().sendWithCallback(message, replica.endpoint(), callback);
}
}
callback.await();
if (callback.isSuccessful())
return true;
if (backoffIfPartial && !callback.isFullyRefused())
throw new CasWriteUnknownResultException(replicaPlan.consistencyLevel(), callback.getAcceptCount(), replicaPlan.requiredParticipants());
return false;
}
Aggregations