use of org.apache.cassandra.locator.ReplicaPlan in project cassandra by apache.
the class BlockingReadRepairTest method remoteDCTest.
/**
* For dc local consistency levels, noop mutations and responses from remote dcs should not affect effective blockFor
*/
@Test
public void remoteDCTest() throws Exception {
Map<Replica, Mutation> repairs = new HashMap<>();
repairs.put(replica1, mutation(cell1));
Replica remote1 = ReplicaUtils.full(InetAddressAndPort.getByName("10.0.0.1"));
Replica remote2 = ReplicaUtils.full(InetAddressAndPort.getByName("10.0.0.2"));
repairs.put(remote1, mutation(cell1));
EndpointsForRange participants = EndpointsForRange.of(replica1, replica2, remote1, remote2);
ReplicaPlan.ForTokenWrite writePlan = repairPlan(replicaPlan(ks, ConsistencyLevel.LOCAL_QUORUM, participants));
InstrumentedReadRepairHandler handler = createRepairHandler(repairs, writePlan);
handler.sendInitialRepairs();
Assert.assertEquals(2, handler.mutationsSent.size());
Assert.assertTrue(handler.mutationsSent.containsKey(replica1.endpoint()));
Assert.assertTrue(handler.mutationsSent.containsKey(remote1.endpoint()));
Assert.assertEquals(1, handler.waitingOn());
Assert.assertFalse(getCurrentRepairStatus(handler));
handler.ack(remote1.endpoint());
Assert.assertEquals(1, handler.waitingOn());
Assert.assertFalse(getCurrentRepairStatus(handler));
handler.ack(replica1.endpoint());
Assert.assertEquals(0, handler.waitingOn());
Assert.assertTrue(getCurrentRepairStatus(handler));
}
use of org.apache.cassandra.locator.ReplicaPlan in project cassandra by apache.
the class StorageProxy method performWrite.
/**
* Perform the write of a mutation given a WritePerformer.
* Gather the list of write endpoints, apply locally and/or forward the mutation to
* said write endpoint (deletaged to the actual WritePerformer) and wait for the
* responses based on consistency level.
*
* @param mutation the mutation to be applied
* @param consistencyLevel the consistency level for the write operation
* @param performer the WritePerformer in charge of appliying the mutation
* given the list of write endpoints (either standardWritePerformer for
* standard writes or counterWritePerformer for counter writes).
* @param callback an optional callback to be run if and when the write is
* @param queryStartNanoTime the value of nanoTime() when the query started to be processed
*/
public static AbstractWriteResponseHandler<IMutation> performWrite(IMutation mutation, ConsistencyLevel consistencyLevel, String localDataCenter, WritePerformer performer, Runnable callback, WriteType writeType, long queryStartNanoTime) {
String keyspaceName = mutation.getKeyspaceName();
Keyspace keyspace = Keyspace.open(keyspaceName);
Token tk = mutation.key().getToken();
ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forWrite(keyspace, consistencyLevel, tk, ReplicaPlans.writeNormal);
AbstractReplicationStrategy rs = replicaPlan.replicationStrategy();
AbstractWriteResponseHandler<IMutation> responseHandler = rs.getWriteResponseHandler(replicaPlan, callback, writeType, queryStartNanoTime);
performer.apply(mutation, replicaPlan, responseHandler, localDataCenter);
return responseHandler;
}
use of org.apache.cassandra.locator.ReplicaPlan in project cassandra by apache.
the class StorageProxy method commitPaxos.
private static void commitPaxos(Commit proposal, ConsistencyLevel consistencyLevel, boolean allowHints, long queryStartNanoTime) throws WriteTimeoutException {
boolean shouldBlock = consistencyLevel != ConsistencyLevel.ANY;
Keyspace keyspace = Keyspace.open(proposal.update.metadata().keyspace);
Token tk = proposal.update.partitionKey().getToken();
AbstractWriteResponseHandler<Commit> responseHandler = null;
// NOTE: this ReplicaPlan is a lie, this usage of ReplicaPlan could do with being clarified - the selected() collection is essentially (I think) never used
ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forWrite(keyspace, consistencyLevel, tk, ReplicaPlans.writeAll);
if (shouldBlock) {
AbstractReplicationStrategy rs = replicaPlan.replicationStrategy();
responseHandler = rs.getWriteResponseHandler(replicaPlan, null, WriteType.SIMPLE, queryStartNanoTime);
}
Message<Commit> message = Message.outWithFlag(PAXOS_COMMIT_REQ, proposal, MessageFlag.CALL_BACK_ON_FAILURE);
for (Replica replica : replicaPlan.liveAndDown()) {
InetAddressAndPort destination = replica.endpoint();
checkHintOverload(replica);
if (replicaPlan.isAlive(replica)) {
if (shouldBlock) {
if (replica.isSelf())
commitPaxosLocal(replica, message, responseHandler);
else
MessagingService.instance().sendWriteWithCallback(message, replica, responseHandler, allowHints && shouldHint(replica));
} else {
MessagingService.instance().send(message, destination);
}
} else {
if (responseHandler != null) {
responseHandler.expired();
}
if (allowHints && shouldHint(replica)) {
submitHint(proposal.makeMutation(), replica, null);
}
}
}
if (shouldBlock)
responseHandler.get();
}
use of org.apache.cassandra.locator.ReplicaPlan in project cassandra by apache.
the class StorageProxy method mutateMV.
/**
* Use this method to have these Mutations applied
* across all replicas.
*
* @param mutations the mutations to be applied across the replicas
* @param writeCommitLog if commitlog should be written
* @param baseComplete time from epoch in ms that the local base mutation was(or will be) completed
* @param queryStartNanoTime the value of nanoTime() when the query started to be processed
*/
public static void mutateMV(ByteBuffer dataKey, Collection<Mutation> mutations, boolean writeCommitLog, AtomicLong baseComplete, long queryStartNanoTime) throws UnavailableException, OverloadedException, WriteTimeoutException {
Tracing.trace("Determining replicas for mutation");
final String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
long startTime = nanoTime();
try {
// if we haven't joined the ring, write everything to batchlog because paired replicas may be stale
final UUID batchUUID = UUIDGen.getTimeUUID();
if (StorageService.instance.isStarting() || StorageService.instance.isJoining() || StorageService.instance.isMoving()) {
BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), mutations), writeCommitLog);
} else {
List<WriteResponseHandlerWrapper> wrappers = new ArrayList<>(mutations.size());
// non-local mutations rely on the base mutation commit-log entry for eventual consistency
Set<Mutation> nonLocalMutations = new HashSet<>(mutations);
Token baseToken = StorageService.instance.getTokenMetadata().partitioner.getToken(dataKey);
ConsistencyLevel consistencyLevel = ConsistencyLevel.ONE;
// Since the base -> view replication is 1:1 we only need to store the BL locally
ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forLocalBatchlogWrite();
BatchlogCleanup cleanup = new BatchlogCleanup(mutations.size(), () -> asyncRemoveFromBatchlog(replicaPlan, batchUUID));
// add a handler for each mutation - includes checking availability, but doesn't initiate any writes, yet
for (Mutation mutation : mutations) {
if (hasLocalMutation(mutation))
writeMetrics.localRequests.mark();
else
writeMetrics.remoteRequests.mark();
String keyspaceName = mutation.getKeyspaceName();
Token tk = mutation.key().getToken();
AbstractReplicationStrategy replicationStrategy = Keyspace.open(keyspaceName).getReplicationStrategy();
Optional<Replica> pairedEndpoint = ViewUtils.getViewNaturalEndpoint(replicationStrategy, baseToken, tk);
EndpointsForToken pendingReplicas = StorageService.instance.getTokenMetadata().pendingEndpointsForToken(tk, keyspaceName);
// if there are no paired endpoints there are probably range movements going on, so we write to the local batchlog to replay later
if (!pairedEndpoint.isPresent()) {
if (pendingReplicas.isEmpty())
logger.warn("Received base materialized view mutation for key {} that does not belong " + "to this node. There is probably a range movement happening (move or decommission)," + "but this node hasn't updated its ring metadata yet. Adding mutation to " + "local batchlog to be replayed later.", mutation.key());
continue;
}
// write so the view mutation is sent to the pending endpoint
if (pairedEndpoint.get().isSelf() && StorageService.instance.isJoined() && pendingReplicas.isEmpty()) {
try {
mutation.apply(writeCommitLog);
nonLocalMutations.remove(mutation);
// won't trigger cleanup
cleanup.decrement();
} catch (Exception exc) {
logger.error("Error applying local view update: Mutation (keyspace {}, tables {}, partition key {})", mutation.getKeyspaceName(), mutation.getTableIds(), mutation.key());
throw exc;
}
} else {
ReplicaLayout.ForTokenWrite liveAndDown = ReplicaLayout.forTokenWrite(replicationStrategy, EndpointsForToken.of(tk, pairedEndpoint.get()), pendingReplicas);
wrappers.add(wrapViewBatchResponseHandler(mutation, consistencyLevel, consistencyLevel, liveAndDown, baseComplete, WriteType.BATCH, cleanup, queryStartNanoTime));
}
}
// Apply to local batchlog memtable in this thread
if (!nonLocalMutations.isEmpty())
BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), nonLocalMutations), writeCommitLog);
// Perform remote writes
if (!wrappers.isEmpty())
asyncWriteBatchedMutations(wrappers, localDataCenter, Stage.VIEW_MUTATION);
}
} finally {
viewWriteMetrics.addNano(nanoTime() - startTime);
}
}
use of org.apache.cassandra.locator.ReplicaPlan in project cassandra by apache.
the class RangeCommandIterator method query.
/**
* Queries the provided sub-range.
*
* @param replicaPlan the subRange to query.
* @param isFirst in the case where multiple queries are sent in parallel, whether that's the first query on
* that batch or not. The reason it matters is that whe paging queries, the command (more specifically the
* {@code DataLimits}) may have "state" information and that state may only be valid for the first query (in
* that it's the query that "continues" whatever we're previously queried).
*/
private SingleRangeResponse query(ReplicaPlan.ForRangeRead replicaPlan, boolean isFirst) {
PartitionRangeReadCommand rangeCommand = command.forSubRange(replicaPlan.range(), isFirst);
// If enabled, request repaired data tracking info from full replicas, but
// only if there are multiple full replicas to compare results from.
boolean trackRepairedStatus = DatabaseDescriptor.getRepairedDataTrackingForRangeReadsEnabled() && replicaPlan.contacts().filter(Replica::isFull).size() > 1;
ReplicaPlan.SharedForRangeRead sharedReplicaPlan = ReplicaPlan.shared(replicaPlan);
ReadRepair<EndpointsForRange, ReplicaPlan.ForRangeRead> readRepair = ReadRepair.create(command, sharedReplicaPlan, queryStartNanoTime);
DataResolver<EndpointsForRange, ReplicaPlan.ForRangeRead> resolver = new DataResolver<>(rangeCommand, sharedReplicaPlan, readRepair, queryStartNanoTime, trackRepairedStatus);
ReadCallback<EndpointsForRange, ReplicaPlan.ForRangeRead> handler = new ReadCallback<>(resolver, rangeCommand, sharedReplicaPlan, queryStartNanoTime);
if (replicaPlan.contacts().size() == 1 && replicaPlan.contacts().get(0).isSelf()) {
Stage.READ.execute(new StorageProxy.LocalReadRunnable(rangeCommand, handler, trackRepairedStatus));
} else {
for (Replica replica : replicaPlan.contacts()) {
Tracing.trace("Enqueuing request to {}", replica);
ReadCommand command = replica.isFull() ? rangeCommand : rangeCommand.copyAsTransientQuery(replica);
Message<ReadCommand> message = command.createMessage(trackRepairedStatus && replica.isFull());
MessagingService.instance().sendWithCallback(message, replica.endpoint(), handler);
}
}
return new SingleRangeResponse(resolver, handler, readRepair);
}
Aggregations