use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class StorageProxy method sendMessagesToNonlocalDC.
/*
* Send the message to the first replica of targets, and have it forward the message to others in its DC
*/
private static void sendMessagesToNonlocalDC(Message<? extends IMutation> message, EndpointsForToken targets, AbstractWriteResponseHandler<IMutation> handler) {
final Replica target;
if (targets.size() > 1) {
target = targets.get(ThreadLocalRandom.current().nextInt(0, targets.size()));
EndpointsForToken forwardToReplicas = targets.filter(r -> r != target, targets.size());
for (Replica replica : forwardToReplicas) {
MessagingService.instance().callbacks.addWithExpiration(handler, message, replica, handler.replicaPlan.consistencyLevel(), true);
logger.trace("Adding FWD message to {}@{}", message.id(), replica);
}
// starting with 4.0, use the same message id for all replicas
long[] messageIds = new long[forwardToReplicas.size()];
Arrays.fill(messageIds, message.id());
message = message.withForwardTo(new ForwardingInfo(forwardToReplicas.endpointList(), messageIds));
} else {
target = targets.get(0);
}
MessagingService.instance().sendWriteWithCallback(message, target, handler, true);
logger.trace("Sending message to {}@{}", message.id(), target);
}
use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class StorageProxy method commitPaxos.
private static void commitPaxos(Commit proposal, ConsistencyLevel consistencyLevel, boolean allowHints, long queryStartNanoTime) throws WriteTimeoutException {
boolean shouldBlock = consistencyLevel != ConsistencyLevel.ANY;
Keyspace keyspace = Keyspace.open(proposal.update.metadata().keyspace);
Token tk = proposal.update.partitionKey().getToken();
AbstractWriteResponseHandler<Commit> responseHandler = null;
// NOTE: this ReplicaPlan is a lie, this usage of ReplicaPlan could do with being clarified - the selected() collection is essentially (I think) never used
ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forWrite(keyspace, consistencyLevel, tk, ReplicaPlans.writeAll);
if (shouldBlock) {
AbstractReplicationStrategy rs = replicaPlan.replicationStrategy();
responseHandler = rs.getWriteResponseHandler(replicaPlan, null, WriteType.SIMPLE, queryStartNanoTime);
}
Message<Commit> message = Message.outWithFlag(PAXOS_COMMIT_REQ, proposal, MessageFlag.CALL_BACK_ON_FAILURE);
for (Replica replica : replicaPlan.liveAndDown()) {
InetAddressAndPort destination = replica.endpoint();
checkHintOverload(replica);
if (replicaPlan.isAlive(replica)) {
if (shouldBlock) {
if (replica.isSelf())
commitPaxosLocal(replica, message, responseHandler);
else
MessagingService.instance().sendWriteWithCallback(message, replica, responseHandler, allowHints && shouldHint(replica));
} else {
MessagingService.instance().send(message, destination);
}
} else {
if (responseHandler != null) {
responseHandler.expired();
}
if (allowHints && shouldHint(replica)) {
submitHint(proposal.makeMutation(), replica, null);
}
}
}
if (shouldBlock)
responseHandler.get();
}
use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class StorageProxy method preparePaxos.
private static PrepareCallback preparePaxos(Commit toPrepare, ReplicaPlan.ForPaxosWrite replicaPlan, long queryStartNanoTime) throws WriteTimeoutException {
PrepareCallback callback = new PrepareCallback(toPrepare.update.partitionKey(), toPrepare.update.metadata(), replicaPlan.requiredParticipants(), replicaPlan.consistencyLevel(), queryStartNanoTime);
Message<Commit> message = Message.out(PAXOS_PREPARE_REQ, toPrepare);
boolean hasLocalRequest = false;
for (Replica replica : replicaPlan.contacts()) {
if (replica.isSelf()) {
hasLocalRequest = true;
PAXOS_PREPARE_REQ.stage.execute(() -> {
try {
callback.onResponse(message.responseWith(doPrepare(toPrepare)));
} catch (Exception ex) {
logger.error("Failed paxos prepare locally", ex);
}
});
} else {
MessagingService.instance().sendWithCallback(message, replica.endpoint(), callback);
}
}
if (hasLocalRequest)
writeMetrics.localRequests.mark();
else
writeMetrics.remoteRequests.mark();
callback.await();
return callback;
}
use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class StorageProxy method mutateMV.
/**
* Use this method to have these Mutations applied
* across all replicas.
*
* @param mutations the mutations to be applied across the replicas
* @param writeCommitLog if commitlog should be written
* @param baseComplete time from epoch in ms that the local base mutation was(or will be) completed
* @param queryStartNanoTime the value of nanoTime() when the query started to be processed
*/
public static void mutateMV(ByteBuffer dataKey, Collection<Mutation> mutations, boolean writeCommitLog, AtomicLong baseComplete, long queryStartNanoTime) throws UnavailableException, OverloadedException, WriteTimeoutException {
Tracing.trace("Determining replicas for mutation");
final String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
long startTime = nanoTime();
try {
// if we haven't joined the ring, write everything to batchlog because paired replicas may be stale
final UUID batchUUID = UUIDGen.getTimeUUID();
if (StorageService.instance.isStarting() || StorageService.instance.isJoining() || StorageService.instance.isMoving()) {
BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), mutations), writeCommitLog);
} else {
List<WriteResponseHandlerWrapper> wrappers = new ArrayList<>(mutations.size());
// non-local mutations rely on the base mutation commit-log entry for eventual consistency
Set<Mutation> nonLocalMutations = new HashSet<>(mutations);
Token baseToken = StorageService.instance.getTokenMetadata().partitioner.getToken(dataKey);
ConsistencyLevel consistencyLevel = ConsistencyLevel.ONE;
// Since the base -> view replication is 1:1 we only need to store the BL locally
ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forLocalBatchlogWrite();
BatchlogCleanup cleanup = new BatchlogCleanup(mutations.size(), () -> asyncRemoveFromBatchlog(replicaPlan, batchUUID));
// add a handler for each mutation - includes checking availability, but doesn't initiate any writes, yet
for (Mutation mutation : mutations) {
if (hasLocalMutation(mutation))
writeMetrics.localRequests.mark();
else
writeMetrics.remoteRequests.mark();
String keyspaceName = mutation.getKeyspaceName();
Token tk = mutation.key().getToken();
AbstractReplicationStrategy replicationStrategy = Keyspace.open(keyspaceName).getReplicationStrategy();
Optional<Replica> pairedEndpoint = ViewUtils.getViewNaturalEndpoint(replicationStrategy, baseToken, tk);
EndpointsForToken pendingReplicas = StorageService.instance.getTokenMetadata().pendingEndpointsForToken(tk, keyspaceName);
// if there are no paired endpoints there are probably range movements going on, so we write to the local batchlog to replay later
if (!pairedEndpoint.isPresent()) {
if (pendingReplicas.isEmpty())
logger.warn("Received base materialized view mutation for key {} that does not belong " + "to this node. There is probably a range movement happening (move or decommission)," + "but this node hasn't updated its ring metadata yet. Adding mutation to " + "local batchlog to be replayed later.", mutation.key());
continue;
}
// write so the view mutation is sent to the pending endpoint
if (pairedEndpoint.get().isSelf() && StorageService.instance.isJoined() && pendingReplicas.isEmpty()) {
try {
mutation.apply(writeCommitLog);
nonLocalMutations.remove(mutation);
// won't trigger cleanup
cleanup.decrement();
} catch (Exception exc) {
logger.error("Error applying local view update: Mutation (keyspace {}, tables {}, partition key {})", mutation.getKeyspaceName(), mutation.getTableIds(), mutation.key());
throw exc;
}
} else {
ReplicaLayout.ForTokenWrite liveAndDown = ReplicaLayout.forTokenWrite(replicationStrategy, EndpointsForToken.of(tk, pairedEndpoint.get()), pendingReplicas);
wrappers.add(wrapViewBatchResponseHandler(mutation, consistencyLevel, consistencyLevel, liveAndDown, baseComplete, WriteType.BATCH, cleanup, queryStartNanoTime));
}
}
// Apply to local batchlog memtable in this thread
if (!nonLocalMutations.isEmpty())
BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), nonLocalMutations), writeCommitLog);
// Perform remote writes
if (!wrappers.isEmpty())
asyncWriteBatchedMutations(wrappers, localDataCenter, Stage.VIEW_MUTATION);
}
} finally {
viewWriteMetrics.addNano(nanoTime() - startTime);
}
}
use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class StorageProxy method mutateCounter.
/**
* Handle counter mutation on the coordinator host.
*
* A counter mutation needs to first be applied to a replica (that we'll call the leader for the mutation) before being
* replicated to the other endpoint. To achieve so, there is two case:
* 1) the coordinator host is a replica: we proceed to applying the update locally and replicate throug
* applyCounterMutationOnCoordinator
* 2) the coordinator is not a replica: we forward the (counter)mutation to a chosen replica (that will proceed through
* applyCounterMutationOnLeader upon receive) and wait for its acknowledgment.
*
* Implementation note: We check if we can fulfill the CL on the coordinator host even if he is not a replica to allow
* quicker response and because the WriteResponseHandlers don't make it easy to send back an error. We also always gather
* the write latencies at the coordinator node to make gathering point similar to the case of standard writes.
*/
public static AbstractWriteResponseHandler<IMutation> mutateCounter(CounterMutation cm, String localDataCenter, long queryStartNanoTime) throws UnavailableException, OverloadedException {
Replica replica = findSuitableReplica(cm.getKeyspaceName(), cm.key(), localDataCenter, cm.consistency());
if (replica.isSelf()) {
return applyCounterMutationOnCoordinator(cm, localDataCenter, queryStartNanoTime);
} else {
// Exit now if we can't fulfill the CL here instead of forwarding to the leader replica
String keyspaceName = cm.getKeyspaceName();
Keyspace keyspace = Keyspace.open(keyspaceName);
Token tk = cm.key().getToken();
// we build this ONLY to perform the sufficiency check that happens on construction
ReplicaPlans.forWrite(keyspace, cm.consistency(), tk, ReplicaPlans.writeAll);
// Forward the actual update to the chosen leader replica
AbstractWriteResponseHandler<IMutation> responseHandler = new WriteResponseHandler<>(ReplicaPlans.forForwardingCounterWrite(keyspace, tk, replica), WriteType.COUNTER, queryStartNanoTime);
Tracing.trace("Enqueuing counter update to {}", replica);
Message message = Message.outWithFlag(Verb.COUNTER_MUTATION_REQ, cm, MessageFlag.CALL_BACK_ON_FAILURE);
MessagingService.instance().sendWriteWithCallback(message, replica, responseHandler, false);
return responseHandler;
}
}
Aggregations