use of org.apache.cassandra.exceptions.WriteTimeoutException in project cassandra by apache.
the class StorageProxy method mutateMV.
/**
* Use this method to have these Mutations applied
* across all replicas.
*
* @param mutations the mutations to be applied across the replicas
* @param writeCommitLog if commitlog should be written
* @param baseComplete time from epoch in ms that the local base mutation was(or will be) completed
* @param queryStartNanoTime the value of nanoTime() when the query started to be processed
*/
public static void mutateMV(ByteBuffer dataKey, Collection<Mutation> mutations, boolean writeCommitLog, AtomicLong baseComplete, long queryStartNanoTime) throws UnavailableException, OverloadedException, WriteTimeoutException {
Tracing.trace("Determining replicas for mutation");
final String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
long startTime = nanoTime();
try {
// if we haven't joined the ring, write everything to batchlog because paired replicas may be stale
final UUID batchUUID = UUIDGen.getTimeUUID();
if (StorageService.instance.isStarting() || StorageService.instance.isJoining() || StorageService.instance.isMoving()) {
BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), mutations), writeCommitLog);
} else {
List<WriteResponseHandlerWrapper> wrappers = new ArrayList<>(mutations.size());
// non-local mutations rely on the base mutation commit-log entry for eventual consistency
Set<Mutation> nonLocalMutations = new HashSet<>(mutations);
Token baseToken = StorageService.instance.getTokenMetadata().partitioner.getToken(dataKey);
ConsistencyLevel consistencyLevel = ConsistencyLevel.ONE;
// Since the base -> view replication is 1:1 we only need to store the BL locally
ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forLocalBatchlogWrite();
BatchlogCleanup cleanup = new BatchlogCleanup(mutations.size(), () -> asyncRemoveFromBatchlog(replicaPlan, batchUUID));
// add a handler for each mutation - includes checking availability, but doesn't initiate any writes, yet
for (Mutation mutation : mutations) {
if (hasLocalMutation(mutation))
writeMetrics.localRequests.mark();
else
writeMetrics.remoteRequests.mark();
String keyspaceName = mutation.getKeyspaceName();
Token tk = mutation.key().getToken();
AbstractReplicationStrategy replicationStrategy = Keyspace.open(keyspaceName).getReplicationStrategy();
Optional<Replica> pairedEndpoint = ViewUtils.getViewNaturalEndpoint(replicationStrategy, baseToken, tk);
EndpointsForToken pendingReplicas = StorageService.instance.getTokenMetadata().pendingEndpointsForToken(tk, keyspaceName);
// if there are no paired endpoints there are probably range movements going on, so we write to the local batchlog to replay later
if (!pairedEndpoint.isPresent()) {
if (pendingReplicas.isEmpty())
logger.warn("Received base materialized view mutation for key {} that does not belong " + "to this node. There is probably a range movement happening (move or decommission)," + "but this node hasn't updated its ring metadata yet. Adding mutation to " + "local batchlog to be replayed later.", mutation.key());
continue;
}
// write so the view mutation is sent to the pending endpoint
if (pairedEndpoint.get().isSelf() && StorageService.instance.isJoined() && pendingReplicas.isEmpty()) {
try {
mutation.apply(writeCommitLog);
nonLocalMutations.remove(mutation);
// won't trigger cleanup
cleanup.decrement();
} catch (Exception exc) {
logger.error("Error applying local view update: Mutation (keyspace {}, tables {}, partition key {})", mutation.getKeyspaceName(), mutation.getTableIds(), mutation.key());
throw exc;
}
} else {
ReplicaLayout.ForTokenWrite liveAndDown = ReplicaLayout.forTokenWrite(replicationStrategy, EndpointsForToken.of(tk, pairedEndpoint.get()), pendingReplicas);
wrappers.add(wrapViewBatchResponseHandler(mutation, consistencyLevel, consistencyLevel, liveAndDown, baseComplete, WriteType.BATCH, cleanup, queryStartNanoTime));
}
}
// Apply to local batchlog memtable in this thread
if (!nonLocalMutations.isEmpty())
BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), nonLocalMutations), writeCommitLog);
// Perform remote writes
if (!wrappers.isEmpty())
asyncWriteBatchedMutations(wrappers, localDataCenter, Stage.VIEW_MUTATION);
}
} finally {
viewWriteMetrics.addNano(nanoTime() - startTime);
}
}
use of org.apache.cassandra.exceptions.WriteTimeoutException in project cassandra by apache.
the class StorageProxy method readWithPaxos.
private static PartitionIterator readWithPaxos(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, ClientState state, long queryStartNanoTime) throws InvalidRequestException, UnavailableException, ReadFailureException, ReadTimeoutException {
assert state != null;
if (group.queries.size() > 1)
throw new InvalidRequestException("SERIAL/LOCAL_SERIAL consistency may only be requested for one partition at a time");
long start = nanoTime();
SinglePartitionReadCommand command = group.queries.get(0);
TableMetadata metadata = command.metadata();
DecoratedKey key = command.partitionKey();
// calculate the blockFor before repair any paxos round to avoid RS being altered in between.
int blockForRead = consistencyLevel.blockFor(Keyspace.open(metadata.keyspace).getReplicationStrategy());
PartitionIterator result = null;
try {
final ConsistencyLevel consistencyForReplayCommitsOrFetch = consistencyLevel == ConsistencyLevel.LOCAL_SERIAL ? ConsistencyLevel.LOCAL_QUORUM : ConsistencyLevel.QUORUM;
try {
// Commit an empty update to make sure all in-progress updates that should be finished first is, _and_
// that no other in-progress can get resurrected.
Supplier<Pair<PartitionUpdate, RowIterator>> updateProposer = Paxos.getPaxosVariant() == Config.PaxosVariant.v1_without_linearizable_reads ? () -> null : () -> Pair.create(PartitionUpdate.emptyUpdate(metadata, key), null);
// When replaying, we commit at quorum/local quorum, as we want to be sure the following read (done at
// quorum/local_quorum) sees any replayed updates. Our own update is however empty, and those don't even
// get committed due to an optimiation described in doPaxos/beingRepairAndPaxos, so the commit
// consistency is irrelevant (we use ANY just to emphasis that we don't wait on our commit).
doPaxos(metadata, key, consistencyLevel, consistencyForReplayCommitsOrFetch, ConsistencyLevel.ANY, start, casReadMetrics, updateProposer);
} catch (WriteTimeoutException e) {
throw new ReadTimeoutException(consistencyLevel, 0, blockForRead, false);
} catch (WriteFailureException e) {
throw new ReadFailureException(consistencyLevel, e.received, e.blockFor, false, e.failureReasonByEndpoint);
}
result = fetchRows(group.queries, consistencyForReplayCommitsOrFetch, queryStartNanoTime);
} catch (UnavailableException e) {
readMetrics.unavailables.mark();
casReadMetrics.unavailables.mark();
readMetricsForLevel(consistencyLevel).unavailables.mark();
logRequestException(e, group.queries);
throw e;
} catch (ReadTimeoutException e) {
readMetrics.timeouts.mark();
casReadMetrics.timeouts.mark();
readMetricsForLevel(consistencyLevel).timeouts.mark();
logRequestException(e, group.queries);
throw e;
} catch (ReadAbortException e) {
readMetrics.markAbort(e);
casReadMetrics.markAbort(e);
readMetricsForLevel(consistencyLevel).markAbort(e);
throw e;
} catch (ReadFailureException e) {
readMetrics.failures.mark();
casReadMetrics.failures.mark();
readMetricsForLevel(consistencyLevel).failures.mark();
throw e;
} finally {
long latency = nanoTime() - start;
readMetrics.addNano(latency);
casReadMetrics.addNano(latency);
readMetricsForLevel(consistencyLevel).addNano(latency);
Keyspace.open(metadata.keyspace).getColumnFamilyStore(metadata.name).metric.coordinatorReadLatency.update(latency, TimeUnit.NANOSECONDS);
}
return result;
}
use of org.apache.cassandra.exceptions.WriteTimeoutException in project cassandra by apache.
the class Keyspace method applyInternal.
/**
* This method appends a row to the global CommitLog, then updates memtables and indexes.
*
* @param mutation the row to write. Must not be modified after calling apply, since commitlog append
* may happen concurrently, depending on the CL Executor type.
* @param makeDurable if true, don't return unless write has been made durable
* @param updateIndexes false to disable index updates (used by CollationController "defragmenting")
* @param isDroppable true if this should throw WriteTimeoutException if it does not acquire lock within write_request_timeout
* @param isDeferrable true if caller is not waiting for future to complete, so that future may be deferred
*/
private Future<?> applyInternal(final Mutation mutation, final boolean makeDurable, boolean updateIndexes, boolean isDroppable, boolean isDeferrable, Promise<?> future) {
if (TEST_FAIL_WRITES && metadata.name.equals(TEST_FAIL_WRITES_KS))
throw new RuntimeException("Testing write failures");
Lock[] locks = null;
boolean requiresViewUpdate = updateIndexes && viewManager.updatesAffectView(Collections.singleton(mutation), false);
if (requiresViewUpdate) {
mutation.viewLockAcquireStart.compareAndSet(0L, currentTimeMillis());
// the order of lock acquisition doesn't matter (from a deadlock perspective) because we only use tryLock()
Collection<TableId> tableIds = mutation.getTableIds();
Iterator<TableId> idIterator = tableIds.iterator();
locks = new Lock[tableIds.size()];
for (int i = 0; i < tableIds.size(); i++) {
TableId tableId = idIterator.next();
int lockKey = Objects.hash(mutation.key().getKey(), tableId);
while (true) {
Lock lock = null;
if (TEST_FAIL_MV_LOCKS_COUNT == 0)
lock = ViewManager.acquireLockFor(lockKey);
else
TEST_FAIL_MV_LOCKS_COUNT--;
if (lock == null) {
// throw WTE only if request is droppable
if (isDroppable && (approxTime.isAfter(mutation.approxCreatedAtNanos + DatabaseDescriptor.getWriteRpcTimeout(NANOSECONDS)))) {
for (int j = 0; j < i; j++) locks[j].unlock();
if (logger.isTraceEnabled())
logger.trace("Could not acquire lock for {} and table {}", ByteBufferUtil.bytesToHex(mutation.key().getKey()), columnFamilyStores.get(tableId).name);
Tracing.trace("Could not acquire MV lock");
if (future != null) {
future.tryFailure(new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1));
return future;
} else
throw new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1);
} else if (isDeferrable) {
for (int j = 0; j < i; j++) locks[j].unlock();
// This view update can't happen right now. so rather than keep this thread busy
// we will re-apply ourself to the queue and try again later
Stage.MUTATION.execute(() -> applyInternal(mutation, makeDurable, true, isDroppable, true, future));
return future;
} else {
// being blocked by waiting for futures which will never be processed as all workers are blocked
try {
// Wait a little bit before retrying to lock
Thread.sleep(10);
} catch (InterruptedException e) {
throw new UncheckedInterruptedException(e);
}
continue;
}
} else {
locks[i] = lock;
}
break;
}
}
long acquireTime = currentTimeMillis() - mutation.viewLockAcquireStart.get();
// Bulk non-droppable operations (e.g. commitlog replay, hint delivery) are not measured
if (isDroppable) {
for (TableId tableId : tableIds) columnFamilyStores.get(tableId).metric.viewLockAcquireTime.update(acquireTime, MILLISECONDS);
}
}
int nowInSec = FBUtilities.nowInSeconds();
try (WriteContext ctx = getWriteHandler().beginWrite(mutation, makeDurable)) {
for (PartitionUpdate upd : mutation.getPartitionUpdates()) {
ColumnFamilyStore cfs = columnFamilyStores.get(upd.metadata().id);
if (cfs == null) {
logger.error("Attempting to mutate non-existant table {} ({}.{})", upd.metadata().id, upd.metadata().keyspace, upd.metadata().name);
continue;
}
AtomicLong baseComplete = new AtomicLong(Long.MAX_VALUE);
if (requiresViewUpdate) {
try {
Tracing.trace("Creating materialized view mutations from base table replica");
viewManager.forTable(upd.metadata().id).pushViewReplicaUpdates(upd, makeDurable, baseComplete);
} catch (Throwable t) {
JVMStabilityInspector.inspectThrowable(t);
logger.error(String.format("Unknown exception caught while attempting to update MaterializedView! %s", upd.metadata().toString()), t);
throw t;
}
}
UpdateTransaction indexTransaction = updateIndexes ? cfs.indexManager.newUpdateTransaction(upd, ctx, nowInSec) : UpdateTransaction.NO_OP;
cfs.getWriteHandler().write(upd, ctx, indexTransaction);
if (requiresViewUpdate)
baseComplete.set(currentTimeMillis());
}
if (future != null) {
future.trySuccess(null);
}
return future;
} finally {
if (locks != null) {
for (Lock lock : locks) if (lock != null)
lock.unlock();
}
}
}
Aggregations