use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class BlockingPartitionRepair method sendInitialRepairs.
public void sendInitialRepairs() {
mutationsSentTime = nanoTime();
Replicas.assertFull(pendingRepairs.keySet());
for (Map.Entry<Replica, Mutation> entry : pendingRepairs.entrySet()) {
Replica destination = entry.getKey();
Preconditions.checkArgument(destination.isFull(), "Can't send repairs to transient replicas: %s", destination);
Mutation mutation = entry.getValue();
TableId tableId = extractUpdate(mutation).metadata().id;
Tracing.trace("Sending read-repair-mutation to {}", destination);
// use a separate verb here to avoid writing hints on timeouts
sendRR(Message.out(READ_REPAIR_REQ, mutation), destination.endpoint());
ColumnFamilyStore.metricsFor(tableId).readRepairRequests.mark();
if (!shouldBlockOn.test(destination.endpoint()))
pendingRepairs.remove(destination);
ReadRepairDiagnostics.sendInitialRepair(this, destination.endpoint(), mutation);
}
}
use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class BlockingPartitionRepair method maybeSendAdditionalWrites.
/**
* If it looks like we might not receive acks for all the repair mutations we sent out, combine all
* the unacked mutations and send them to the minority of nodes not involved in the read repair data
* read / write cycle. We will accept acks from them in lieu of acks from the initial mutations sent
* out, so long as we receive the same number of acks as repair mutations transmitted. This prevents
* misbehaving nodes from killing a quorum read, while continuing to guarantee monotonic quorum reads
*/
public void maybeSendAdditionalWrites(long timeout, TimeUnit timeoutUnit) {
if (awaitRepairsUntil(timeout + timeoutUnit.convert(mutationsSentTime, TimeUnit.NANOSECONDS), timeoutUnit))
return;
EndpointsForToken newCandidates = writePlan.liveUncontacted();
if (newCandidates.isEmpty())
return;
PartitionUpdate update = mergeUnackedUpdates();
if (update == null)
// timeout and call to get unacked mutation.
return;
ReadRepairMetrics.speculatedWrite.mark();
Mutation[] versionedMutations = new Mutation[msgVersionIdx(MessagingService.current_version) + 1];
for (Replica replica : newCandidates) {
int versionIdx = msgVersionIdx(MessagingService.instance().versions.get(replica.endpoint()));
Mutation mutation = versionedMutations[versionIdx];
if (mutation == null) {
mutation = BlockingReadRepairs.createRepairMutation(update, writePlan.consistencyLevel(), replica.endpoint(), true);
versionedMutations[versionIdx] = mutation;
}
if (mutation == null) {
// the mutation is too large to send.
ReadRepairDiagnostics.speculatedWriteOversized(this, replica.endpoint());
continue;
}
Tracing.trace("Sending speculative read-repair-mutation to {}", replica);
sendRR(Message.out(READ_REPAIR_REQ, mutation), replica.endpoint());
ReadRepairDiagnostics.speculatedWrite(this, replica.endpoint(), mutation);
}
}
use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class RowIteratorMergeListener method close.
public void close() {
boolean hasRepairs = false;
for (int i = 0; !hasRepairs && i < repairs.length; ++i) hasRepairs = repairs[i] != null;
if (!hasRepairs)
return;
PartitionUpdate fullDiffRepair = null;
if (buildFullDiff && repairs[repairs.length - 1] != null)
fullDiffRepair = repairs[repairs.length - 1].build();
Map<Replica, Mutation> mutations = Maps.newHashMapWithExpectedSize(writePlan.contacts().size());
ObjectIntHashMap<InetAddressAndPort> sourceIds = new ObjectIntHashMap<>(((repairs.length + 1) * 4) / 3);
for (int i = 0; i < readPlan.contacts().size(); ++i) sourceIds.put(readPlan.contacts().get(i).endpoint(), 1 + i);
for (Replica replica : writePlan.contacts()) {
PartitionUpdate update = null;
int i = -1 + sourceIds.get(replica.endpoint());
if (i < 0)
update = fullDiffRepair;
else if (repairs[i] != null)
update = repairs[i].build();
Mutation mutation = BlockingReadRepairs.createRepairMutation(update, readPlan.consistencyLevel(), replica.endpoint(), false);
if (mutation == null)
continue;
mutations.put(replica, mutation);
}
readRepair.repairPartition(partitionKey, mutations, writePlan);
}
use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class AbstractReadRepair method startRepair.
// digestResolver isn't used here because we resend read requests to all participants
public void startRepair(DigestResolver<E, P> digestResolver, Consumer<PartitionIterator> resultConsumer) {
getRepairMeter().mark();
/*
* When repaired data tracking is enabled, a digest will be created from data reads from repaired SSTables.
* The digests from each replica can then be compared on the coordinator to detect any divergence in their
* repaired datasets. In this context, an SSTable is considered repaired if it is marked repaired or has a
* pending repair session which has been committed. In addition to the digest, a set of ids for any pending but
* as yet uncommitted repair sessions is recorded and returned to the coordinator. This is to help reduce false
* positives caused by compaction lagging which can leave sstables from committed sessions in the pending state
* for a time.
*/
boolean trackRepairedStatus = DatabaseDescriptor.getRepairedDataTrackingForPartitionReadsEnabled();
// Do a full data read to resolve the correct response (and repair node that need be)
DataResolver<E, P> resolver = new DataResolver<>(command, replicaPlan, this, queryStartNanoTime, trackRepairedStatus);
ReadCallback<E, P> readCallback = new ReadCallback<>(resolver, command, replicaPlan, queryStartNanoTime);
digestRepair = new DigestRepair<>(resolver, readCallback, resultConsumer);
// if enabled, request additional info about repaired data from any full replicas
for (Replica replica : replicaPlan().contacts()) {
sendReadCommand(replica, readCallback, false, trackRepairedStatus);
}
ReadRepairDiagnostics.startRepair(this, replicaPlan(), digestResolver);
}
use of org.apache.cassandra.locator.Replica in project cassandra by apache.
the class AbstractReadRepair method maybeSendAdditionalReads.
public void maybeSendAdditionalReads() {
Preconditions.checkState(command instanceof SinglePartitionReadCommand, "maybeSendAdditionalReads can only be called for SinglePartitionReadCommand");
DigestRepair<E, P> repair = digestRepair;
if (repair == null)
return;
if (shouldSpeculate() && !repair.readCallback.await(cfs.sampleReadLatencyNanos, NANOSECONDS)) {
Replica uncontacted = replicaPlan().firstUncontactedCandidate(replica -> true);
if (uncontacted == null)
return;
replicaPlan.addToContacts(uncontacted);
sendReadCommand(uncontacted, repair.readCallback, true, false);
ReadRepairMetrics.speculatedRead.mark();
ReadRepairDiagnostics.speculatedRead(this, uncontacted.endpoint(), replicaPlan());
}
}
Aggregations