use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.
the class StorageProxy method sendMessagesToNonlocalDC.
/*
* Send the message to the first replica of targets, and have it forward the message to others in its DC
*/
private static void sendMessagesToNonlocalDC(Message<? extends IMutation> message, EndpointsForToken targets, AbstractWriteResponseHandler<IMutation> handler) {
final Replica target;
if (targets.size() > 1) {
target = targets.get(ThreadLocalRandom.current().nextInt(0, targets.size()));
EndpointsForToken forwardToReplicas = targets.filter(r -> r != target, targets.size());
for (Replica replica : forwardToReplicas) {
MessagingService.instance().callbacks.addWithExpiration(handler, message, replica, handler.replicaPlan.consistencyLevel(), true);
logger.trace("Adding FWD message to {}@{}", message.id(), replica);
}
// starting with 4.0, use the same message id for all replicas
long[] messageIds = new long[forwardToReplicas.size()];
Arrays.fill(messageIds, message.id());
message = message.withForwardTo(new ForwardingInfo(forwardToReplicas.endpointList(), messageIds));
} else {
target = targets.get(0);
}
MessagingService.instance().sendWriteWithCallback(message, target, handler, true);
logger.trace("Sending message to {}@{}", message.id(), target);
}
use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.
the class StorageProxy method mutateMV.
/**
* Use this method to have these Mutations applied
* across all replicas.
*
* @param mutations the mutations to be applied across the replicas
* @param writeCommitLog if commitlog should be written
* @param baseComplete time from epoch in ms that the local base mutation was(or will be) completed
* @param queryStartNanoTime the value of nanoTime() when the query started to be processed
*/
public static void mutateMV(ByteBuffer dataKey, Collection<Mutation> mutations, boolean writeCommitLog, AtomicLong baseComplete, long queryStartNanoTime) throws UnavailableException, OverloadedException, WriteTimeoutException {
Tracing.trace("Determining replicas for mutation");
final String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
long startTime = nanoTime();
try {
// if we haven't joined the ring, write everything to batchlog because paired replicas may be stale
final UUID batchUUID = UUIDGen.getTimeUUID();
if (StorageService.instance.isStarting() || StorageService.instance.isJoining() || StorageService.instance.isMoving()) {
BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), mutations), writeCommitLog);
} else {
List<WriteResponseHandlerWrapper> wrappers = new ArrayList<>(mutations.size());
// non-local mutations rely on the base mutation commit-log entry for eventual consistency
Set<Mutation> nonLocalMutations = new HashSet<>(mutations);
Token baseToken = StorageService.instance.getTokenMetadata().partitioner.getToken(dataKey);
ConsistencyLevel consistencyLevel = ConsistencyLevel.ONE;
// Since the base -> view replication is 1:1 we only need to store the BL locally
ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forLocalBatchlogWrite();
BatchlogCleanup cleanup = new BatchlogCleanup(mutations.size(), () -> asyncRemoveFromBatchlog(replicaPlan, batchUUID));
// add a handler for each mutation - includes checking availability, but doesn't initiate any writes, yet
for (Mutation mutation : mutations) {
if (hasLocalMutation(mutation))
writeMetrics.localRequests.mark();
else
writeMetrics.remoteRequests.mark();
String keyspaceName = mutation.getKeyspaceName();
Token tk = mutation.key().getToken();
AbstractReplicationStrategy replicationStrategy = Keyspace.open(keyspaceName).getReplicationStrategy();
Optional<Replica> pairedEndpoint = ViewUtils.getViewNaturalEndpoint(replicationStrategy, baseToken, tk);
EndpointsForToken pendingReplicas = StorageService.instance.getTokenMetadata().pendingEndpointsForToken(tk, keyspaceName);
// if there are no paired endpoints there are probably range movements going on, so we write to the local batchlog to replay later
if (!pairedEndpoint.isPresent()) {
if (pendingReplicas.isEmpty())
logger.warn("Received base materialized view mutation for key {} that does not belong " + "to this node. There is probably a range movement happening (move or decommission)," + "but this node hasn't updated its ring metadata yet. Adding mutation to " + "local batchlog to be replayed later.", mutation.key());
continue;
}
// write so the view mutation is sent to the pending endpoint
if (pairedEndpoint.get().isSelf() && StorageService.instance.isJoined() && pendingReplicas.isEmpty()) {
try {
mutation.apply(writeCommitLog);
nonLocalMutations.remove(mutation);
// won't trigger cleanup
cleanup.decrement();
} catch (Exception exc) {
logger.error("Error applying local view update: Mutation (keyspace {}, tables {}, partition key {})", mutation.getKeyspaceName(), mutation.getTableIds(), mutation.key());
throw exc;
}
} else {
ReplicaLayout.ForTokenWrite liveAndDown = ReplicaLayout.forTokenWrite(replicationStrategy, EndpointsForToken.of(tk, pairedEndpoint.get()), pendingReplicas);
wrappers.add(wrapViewBatchResponseHandler(mutation, consistencyLevel, consistencyLevel, liveAndDown, baseComplete, WriteType.BATCH, cleanup, queryStartNanoTime));
}
}
// Apply to local batchlog memtable in this thread
if (!nonLocalMutations.isEmpty())
BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), nonLocalMutations), writeCommitLog);
// Perform remote writes
if (!wrappers.isEmpty())
asyncWriteBatchedMutations(wrappers, localDataCenter, Stage.VIEW_MUTATION);
}
} finally {
viewWriteMetrics.addNano(nanoTime() - startTime);
}
}
use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.
the class StorageProxy method syncWriteBatchedMutations.
private static void syncWriteBatchedMutations(List<WriteResponseHandlerWrapper> wrappers, Stage stage) throws WriteTimeoutException, OverloadedException {
String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
for (WriteResponseHandlerWrapper wrapper : wrappers) {
EndpointsForToken sendTo = wrapper.handler.replicaPlan.liveAndDown();
// TODO: CASSANDRA-14549
Replicas.temporaryAssertFull(sendTo);
sendToHintedReplicas(wrapper.mutation, wrapper.handler.replicaPlan.withContact(sendTo), wrapper.handler, localDataCenter, stage);
}
for (WriteResponseHandlerWrapper wrapper : wrappers) wrapper.handler.get();
}
use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.
the class AbstractWriteResponseHandler method maybeTryAdditionalReplicas.
/**
* Cheap Quorum backup. If we failed to reach quorum with our initial (full) nodes, reach out to other nodes.
*/
public void maybeTryAdditionalReplicas(IMutation mutation, WritePerformer writePerformer, String localDC) {
EndpointsForToken uncontacted = replicaPlan.liveUncontacted();
if (uncontacted.isEmpty())
return;
long timeout = MAX_VALUE;
List<ColumnFamilyStore> cfs = mutation.getTableIds().stream().map(instance::getColumnFamilyStoreInstance).collect(toList());
for (ColumnFamilyStore cf : cfs) timeout = min(timeout, cf.additionalWriteLatencyNanos);
// no latency information, or we're overloaded
if (timeout > mutation.getTimeout(NANOSECONDS))
return;
try {
if (!condition.await(timeout, NANOSECONDS)) {
for (ColumnFamilyStore cf : cfs) cf.metric.additionalWrites.inc();
writePerformer.apply(mutation, replicaPlan.withContact(uncontacted), (AbstractWriteResponseHandler<IMutation>) this, localDC);
}
} catch (InterruptedException e) {
throw new UncheckedInterruptedException(e);
}
}
use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.
the class AbstractReadExecutor method executeAsync.
/**
* send the initial set of requests
*/
public void executeAsync() {
EndpointsForToken selected = replicaPlan().contacts();
EndpointsForToken fullDataRequests = selected.filter(Replica::isFull, initialDataRequestCount);
makeFullDataRequests(fullDataRequests);
makeTransientDataRequests(selected.filterLazily(Replica::isTransient));
makeDigestRequests(selected.filterLazily(r -> r.isFull() && !fullDataRequests.contains(r)));
}
Aggregations