Search in sources :

Example 1 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class BlockingPartitionRepair method sendInitialRepairs.

public void sendInitialRepairs() {
    mutationsSentTime = nanoTime();
    Replicas.assertFull(pendingRepairs.keySet());
    for (Map.Entry<Replica, Mutation> entry : pendingRepairs.entrySet()) {
        Replica destination = entry.getKey();
        Preconditions.checkArgument(destination.isFull(), "Can't send repairs to transient replicas: %s", destination);
        Mutation mutation = entry.getValue();
        TableId tableId = extractUpdate(mutation).metadata().id;
        Tracing.trace("Sending read-repair-mutation to {}", destination);
        // use a separate verb here to avoid writing hints on timeouts
        sendRR(Message.out(READ_REPAIR_REQ, mutation), destination.endpoint());
        ColumnFamilyStore.metricsFor(tableId).readRepairRequests.mark();
        if (!shouldBlockOn.test(destination.endpoint()))
            pendingRepairs.remove(destination);
        ReadRepairDiagnostics.sendInitialRepair(this, destination.endpoint(), mutation);
    }
}
Also used : TableId(org.apache.cassandra.schema.TableId) Mutation(org.apache.cassandra.db.Mutation) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Replica(org.apache.cassandra.locator.Replica)

Example 2 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class BlockingPartitionRepair method maybeSendAdditionalWrites.

/**
 * If it looks like we might not receive acks for all the repair mutations we sent out, combine all
 * the unacked mutations and send them to the minority of nodes not involved in the read repair data
 * read / write cycle. We will accept acks from them in lieu of acks from the initial mutations sent
 * out, so long as we receive the same number of acks as repair mutations transmitted. This prevents
 * misbehaving nodes from killing a quorum read, while continuing to guarantee monotonic quorum reads
 */
public void maybeSendAdditionalWrites(long timeout, TimeUnit timeoutUnit) {
    if (awaitRepairsUntil(timeout + timeoutUnit.convert(mutationsSentTime, TimeUnit.NANOSECONDS), timeoutUnit))
        return;
    EndpointsForToken newCandidates = writePlan.liveUncontacted();
    if (newCandidates.isEmpty())
        return;
    PartitionUpdate update = mergeUnackedUpdates();
    if (update == null)
        // timeout and call to get unacked mutation.
        return;
    ReadRepairMetrics.speculatedWrite.mark();
    Mutation[] versionedMutations = new Mutation[msgVersionIdx(MessagingService.current_version) + 1];
    for (Replica replica : newCandidates) {
        int versionIdx = msgVersionIdx(MessagingService.instance().versions.get(replica.endpoint()));
        Mutation mutation = versionedMutations[versionIdx];
        if (mutation == null) {
            mutation = BlockingReadRepairs.createRepairMutation(update, writePlan.consistencyLevel(), replica.endpoint(), true);
            versionedMutations[versionIdx] = mutation;
        }
        if (mutation == null) {
            // the mutation is too large to send.
            ReadRepairDiagnostics.speculatedWriteOversized(this, replica.endpoint());
            continue;
        }
        Tracing.trace("Sending speculative read-repair-mutation to {}", replica);
        sendRR(Message.out(READ_REPAIR_REQ, mutation), replica.endpoint());
        ReadRepairDiagnostics.speculatedWrite(this, replica.endpoint(), mutation);
    }
}
Also used : EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) Mutation(org.apache.cassandra.db.Mutation) Replica(org.apache.cassandra.locator.Replica) PartitionUpdate(org.apache.cassandra.db.partitions.PartitionUpdate)

Example 3 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class RowIteratorMergeListener method close.

public void close() {
    boolean hasRepairs = false;
    for (int i = 0; !hasRepairs && i < repairs.length; ++i) hasRepairs = repairs[i] != null;
    if (!hasRepairs)
        return;
    PartitionUpdate fullDiffRepair = null;
    if (buildFullDiff && repairs[repairs.length - 1] != null)
        fullDiffRepair = repairs[repairs.length - 1].build();
    Map<Replica, Mutation> mutations = Maps.newHashMapWithExpectedSize(writePlan.contacts().size());
    ObjectIntHashMap<InetAddressAndPort> sourceIds = new ObjectIntHashMap<>(((repairs.length + 1) * 4) / 3);
    for (int i = 0; i < readPlan.contacts().size(); ++i) sourceIds.put(readPlan.contacts().get(i).endpoint(), 1 + i);
    for (Replica replica : writePlan.contacts()) {
        PartitionUpdate update = null;
        int i = -1 + sourceIds.get(replica.endpoint());
        if (i < 0)
            update = fullDiffRepair;
        else if (repairs[i] != null)
            update = repairs[i].build();
        Mutation mutation = BlockingReadRepairs.createRepairMutation(update, readPlan.consistencyLevel(), replica.endpoint(), false);
        if (mutation == null)
            continue;
        mutations.put(replica, mutation);
    }
    readRepair.repairPartition(partitionKey, mutations, writePlan);
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) ObjectIntHashMap(com.carrotsearch.hppc.ObjectIntHashMap) Mutation(org.apache.cassandra.db.Mutation) Replica(org.apache.cassandra.locator.Replica) PartitionUpdate(org.apache.cassandra.db.partitions.PartitionUpdate)

Example 4 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class AbstractReadRepair method startRepair.

// digestResolver isn't used here because we resend read requests to all participants
public void startRepair(DigestResolver<E, P> digestResolver, Consumer<PartitionIterator> resultConsumer) {
    getRepairMeter().mark();
    /*
         * When repaired data tracking is enabled, a digest will be created from data reads from repaired SSTables.
         * The digests from each replica can then be compared on the coordinator to detect any divergence in their
         * repaired datasets. In this context, an SSTable is considered repaired if it is marked repaired or has a 
         * pending repair session which has been committed. In addition to the digest, a set of ids for any pending but 
         * as yet uncommitted repair sessions is recorded and returned to the coordinator. This is to help reduce false 
         * positives caused by compaction lagging which can leave sstables from committed sessions in the pending state
         * for a time.
         */
    boolean trackRepairedStatus = DatabaseDescriptor.getRepairedDataTrackingForPartitionReadsEnabled();
    // Do a full data read to resolve the correct response (and repair node that need be)
    DataResolver<E, P> resolver = new DataResolver<>(command, replicaPlan, this, queryStartNanoTime, trackRepairedStatus);
    ReadCallback<E, P> readCallback = new ReadCallback<>(resolver, command, replicaPlan, queryStartNanoTime);
    digestRepair = new DigestRepair<>(resolver, readCallback, resultConsumer);
    // if enabled, request additional info about repaired data from any full replicas
    for (Replica replica : replicaPlan().contacts()) {
        sendReadCommand(replica, readCallback, false, trackRepairedStatus);
    }
    ReadRepairDiagnostics.startRepair(this, replicaPlan(), digestResolver);
}
Also used : ReadCallback(org.apache.cassandra.service.reads.ReadCallback) Replica(org.apache.cassandra.locator.Replica) DataResolver(org.apache.cassandra.service.reads.DataResolver)

Example 5 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class AbstractReadRepair method maybeSendAdditionalReads.

public void maybeSendAdditionalReads() {
    Preconditions.checkState(command instanceof SinglePartitionReadCommand, "maybeSendAdditionalReads can only be called for SinglePartitionReadCommand");
    DigestRepair<E, P> repair = digestRepair;
    if (repair == null)
        return;
    if (shouldSpeculate() && !repair.readCallback.await(cfs.sampleReadLatencyNanos, NANOSECONDS)) {
        Replica uncontacted = replicaPlan().firstUncontactedCandidate(replica -> true);
        if (uncontacted == null)
            return;
        replicaPlan.addToContacts(uncontacted);
        sendReadCommand(uncontacted, repair.readCallback, true, false);
        ReadRepairMetrics.speculatedRead.mark();
        ReadRepairDiagnostics.speculatedRead(this, uncontacted.endpoint(), replicaPlan());
    }
}
Also used : SinglePartitionReadCommand(org.apache.cassandra.db.SinglePartitionReadCommand) Replica(org.apache.cassandra.locator.Replica)

Aggregations

Replica (org.apache.cassandra.locator.Replica)69 InetAddressAndPort (org.apache.cassandra.locator.InetAddressAndPort)24 Token (org.apache.cassandra.dht.Token)22 Test (org.junit.Test)20 HashMap (java.util.HashMap)18 Mutation (org.apache.cassandra.db.Mutation)15 EndpointsByReplica (org.apache.cassandra.locator.EndpointsByReplica)15 Range (org.apache.cassandra.dht.Range)14 RangesAtEndpoint (org.apache.cassandra.locator.RangesAtEndpoint)13 TokenMetadata (org.apache.cassandra.locator.TokenMetadata)12 Keyspace (org.apache.cassandra.db.Keyspace)11 AbstractReplicationStrategy (org.apache.cassandra.locator.AbstractReplicationStrategy)11 EndpointsForRange (org.apache.cassandra.locator.EndpointsForRange)11 Replica.fullReplica (org.apache.cassandra.locator.Replica.fullReplica)10 Collection (java.util.Collection)8 Map (java.util.Map)8 EndpointsForToken (org.apache.cassandra.locator.EndpointsForToken)8 ReplicaPlan (org.apache.cassandra.locator.ReplicaPlan)8 ArrayList (java.util.ArrayList)7 HashSet (java.util.HashSet)6