Search in sources :

Example 6 with ReplicaPlan

use of org.apache.cassandra.locator.ReplicaPlan in project cassandra by apache.

the class BlockingReadRepairTest method remoteDCTest.

/**
 * For dc local consistency levels, noop mutations and responses from remote dcs should not affect effective blockFor
 */
@Test
public void remoteDCTest() throws Exception {
    Map<Replica, Mutation> repairs = new HashMap<>();
    repairs.put(replica1, mutation(cell1));
    Replica remote1 = ReplicaUtils.full(InetAddressAndPort.getByName("10.0.0.1"));
    Replica remote2 = ReplicaUtils.full(InetAddressAndPort.getByName("10.0.0.2"));
    repairs.put(remote1, mutation(cell1));
    EndpointsForRange participants = EndpointsForRange.of(replica1, replica2, remote1, remote2);
    ReplicaPlan.ForTokenWrite writePlan = repairPlan(replicaPlan(ks, ConsistencyLevel.LOCAL_QUORUM, participants));
    InstrumentedReadRepairHandler handler = createRepairHandler(repairs, writePlan);
    handler.sendInitialRepairs();
    Assert.assertEquals(2, handler.mutationsSent.size());
    Assert.assertTrue(handler.mutationsSent.containsKey(replica1.endpoint()));
    Assert.assertTrue(handler.mutationsSent.containsKey(remote1.endpoint()));
    Assert.assertEquals(1, handler.waitingOn());
    Assert.assertFalse(getCurrentRepairStatus(handler));
    handler.ack(remote1.endpoint());
    Assert.assertEquals(1, handler.waitingOn());
    Assert.assertFalse(getCurrentRepairStatus(handler));
    handler.ack(replica1.endpoint());
    Assert.assertEquals(0, handler.waitingOn());
    Assert.assertTrue(getCurrentRepairStatus(handler));
}
Also used : ReplicaPlan(org.apache.cassandra.locator.ReplicaPlan) HashMap(java.util.HashMap) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) Mutation(org.apache.cassandra.db.Mutation) Replica(org.apache.cassandra.locator.Replica) Test(org.junit.Test)

Example 7 with ReplicaPlan

use of org.apache.cassandra.locator.ReplicaPlan in project cassandra by apache.

the class StorageProxy method performWrite.

/**
 * Perform the write of a mutation given a WritePerformer.
 * Gather the list of write endpoints, apply locally and/or forward the mutation to
 * said write endpoint (deletaged to the actual WritePerformer) and wait for the
 * responses based on consistency level.
 *
 * @param mutation the mutation to be applied
 * @param consistencyLevel the consistency level for the write operation
 * @param performer the WritePerformer in charge of appliying the mutation
 * given the list of write endpoints (either standardWritePerformer for
 * standard writes or counterWritePerformer for counter writes).
 * @param callback an optional callback to be run if and when the write is
 * @param queryStartNanoTime the value of nanoTime() when the query started to be processed
 */
public static AbstractWriteResponseHandler<IMutation> performWrite(IMutation mutation, ConsistencyLevel consistencyLevel, String localDataCenter, WritePerformer performer, Runnable callback, WriteType writeType, long queryStartNanoTime) {
    String keyspaceName = mutation.getKeyspaceName();
    Keyspace keyspace = Keyspace.open(keyspaceName);
    Token tk = mutation.key().getToken();
    ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forWrite(keyspace, consistencyLevel, tk, ReplicaPlans.writeNormal);
    AbstractReplicationStrategy rs = replicaPlan.replicationStrategy();
    AbstractWriteResponseHandler<IMutation> responseHandler = rs.getWriteResponseHandler(replicaPlan, callback, writeType, queryStartNanoTime);
    performer.apply(mutation, replicaPlan, responseHandler, localDataCenter);
    return responseHandler;
}
Also used : ReplicaPlan(org.apache.cassandra.locator.ReplicaPlan) IMutation(org.apache.cassandra.db.IMutation) Keyspace(org.apache.cassandra.db.Keyspace) EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) Token(org.apache.cassandra.dht.Token) AbstractReplicationStrategy(org.apache.cassandra.locator.AbstractReplicationStrategy)

Example 8 with ReplicaPlan

use of org.apache.cassandra.locator.ReplicaPlan in project cassandra by apache.

the class StorageProxy method commitPaxos.

private static void commitPaxos(Commit proposal, ConsistencyLevel consistencyLevel, boolean allowHints, long queryStartNanoTime) throws WriteTimeoutException {
    boolean shouldBlock = consistencyLevel != ConsistencyLevel.ANY;
    Keyspace keyspace = Keyspace.open(proposal.update.metadata().keyspace);
    Token tk = proposal.update.partitionKey().getToken();
    AbstractWriteResponseHandler<Commit> responseHandler = null;
    // NOTE: this ReplicaPlan is a lie, this usage of ReplicaPlan could do with being clarified - the selected() collection is essentially (I think) never used
    ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forWrite(keyspace, consistencyLevel, tk, ReplicaPlans.writeAll);
    if (shouldBlock) {
        AbstractReplicationStrategy rs = replicaPlan.replicationStrategy();
        responseHandler = rs.getWriteResponseHandler(replicaPlan, null, WriteType.SIMPLE, queryStartNanoTime);
    }
    Message<Commit> message = Message.outWithFlag(PAXOS_COMMIT_REQ, proposal, MessageFlag.CALL_BACK_ON_FAILURE);
    for (Replica replica : replicaPlan.liveAndDown()) {
        InetAddressAndPort destination = replica.endpoint();
        checkHintOverload(replica);
        if (replicaPlan.isAlive(replica)) {
            if (shouldBlock) {
                if (replica.isSelf())
                    commitPaxosLocal(replica, message, responseHandler);
                else
                    MessagingService.instance().sendWriteWithCallback(message, replica, responseHandler, allowHints && shouldHint(replica));
            } else {
                MessagingService.instance().send(message, destination);
            }
        } else {
            if (responseHandler != null) {
                responseHandler.expired();
            }
            if (allowHints && shouldHint(replica)) {
                submitHint(proposal.makeMutation(), replica, null);
            }
        }
    }
    if (shouldBlock)
        responseHandler.get();
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) ReplicaPlan(org.apache.cassandra.locator.ReplicaPlan) Keyspace(org.apache.cassandra.db.Keyspace) EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) Token(org.apache.cassandra.dht.Token) AbstractReplicationStrategy(org.apache.cassandra.locator.AbstractReplicationStrategy) Replica(org.apache.cassandra.locator.Replica)

Example 9 with ReplicaPlan

use of org.apache.cassandra.locator.ReplicaPlan in project cassandra by apache.

the class StorageProxy method mutateMV.

/**
 * Use this method to have these Mutations applied
 * across all replicas.
 *
 * @param mutations the mutations to be applied across the replicas
 * @param writeCommitLog if commitlog should be written
 * @param baseComplete time from epoch in ms that the local base mutation was(or will be) completed
 * @param queryStartNanoTime the value of nanoTime() when the query started to be processed
 */
public static void mutateMV(ByteBuffer dataKey, Collection<Mutation> mutations, boolean writeCommitLog, AtomicLong baseComplete, long queryStartNanoTime) throws UnavailableException, OverloadedException, WriteTimeoutException {
    Tracing.trace("Determining replicas for mutation");
    final String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
    long startTime = nanoTime();
    try {
        // if we haven't joined the ring, write everything to batchlog because paired replicas may be stale
        final UUID batchUUID = UUIDGen.getTimeUUID();
        if (StorageService.instance.isStarting() || StorageService.instance.isJoining() || StorageService.instance.isMoving()) {
            BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), mutations), writeCommitLog);
        } else {
            List<WriteResponseHandlerWrapper> wrappers = new ArrayList<>(mutations.size());
            // non-local mutations rely on the base mutation commit-log entry for eventual consistency
            Set<Mutation> nonLocalMutations = new HashSet<>(mutations);
            Token baseToken = StorageService.instance.getTokenMetadata().partitioner.getToken(dataKey);
            ConsistencyLevel consistencyLevel = ConsistencyLevel.ONE;
            // Since the base -> view replication is 1:1 we only need to store the BL locally
            ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forLocalBatchlogWrite();
            BatchlogCleanup cleanup = new BatchlogCleanup(mutations.size(), () -> asyncRemoveFromBatchlog(replicaPlan, batchUUID));
            // add a handler for each mutation - includes checking availability, but doesn't initiate any writes, yet
            for (Mutation mutation : mutations) {
                if (hasLocalMutation(mutation))
                    writeMetrics.localRequests.mark();
                else
                    writeMetrics.remoteRequests.mark();
                String keyspaceName = mutation.getKeyspaceName();
                Token tk = mutation.key().getToken();
                AbstractReplicationStrategy replicationStrategy = Keyspace.open(keyspaceName).getReplicationStrategy();
                Optional<Replica> pairedEndpoint = ViewUtils.getViewNaturalEndpoint(replicationStrategy, baseToken, tk);
                EndpointsForToken pendingReplicas = StorageService.instance.getTokenMetadata().pendingEndpointsForToken(tk, keyspaceName);
                // if there are no paired endpoints there are probably range movements going on, so we write to the local batchlog to replay later
                if (!pairedEndpoint.isPresent()) {
                    if (pendingReplicas.isEmpty())
                        logger.warn("Received base materialized view mutation for key {} that does not belong " + "to this node. There is probably a range movement happening (move or decommission)," + "but this node hasn't updated its ring metadata yet. Adding mutation to " + "local batchlog to be replayed later.", mutation.key());
                    continue;
                }
                // write so the view mutation is sent to the pending endpoint
                if (pairedEndpoint.get().isSelf() && StorageService.instance.isJoined() && pendingReplicas.isEmpty()) {
                    try {
                        mutation.apply(writeCommitLog);
                        nonLocalMutations.remove(mutation);
                        // won't trigger cleanup
                        cleanup.decrement();
                    } catch (Exception exc) {
                        logger.error("Error applying local view update: Mutation (keyspace {}, tables {}, partition key {})", mutation.getKeyspaceName(), mutation.getTableIds(), mutation.key());
                        throw exc;
                    }
                } else {
                    ReplicaLayout.ForTokenWrite liveAndDown = ReplicaLayout.forTokenWrite(replicationStrategy, EndpointsForToken.of(tk, pairedEndpoint.get()), pendingReplicas);
                    wrappers.add(wrapViewBatchResponseHandler(mutation, consistencyLevel, consistencyLevel, liveAndDown, baseComplete, WriteType.BATCH, cleanup, queryStartNanoTime));
                }
            }
            // Apply to local batchlog memtable in this thread
            if (!nonLocalMutations.isEmpty())
                BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), nonLocalMutations), writeCommitLog);
            // Perform remote writes
            if (!wrappers.isEmpty())
                asyncWriteBatchedMutations(wrappers, localDataCenter, Stage.VIEW_MUTATION);
        }
    } finally {
        viewWriteMetrics.addNano(nanoTime() - startTime);
    }
}
Also used : EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) ReplicaPlan(org.apache.cassandra.locator.ReplicaPlan) ArrayList(java.util.ArrayList) EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) Token(org.apache.cassandra.dht.Token) Replica(org.apache.cassandra.locator.Replica) OverloadedException(org.apache.cassandra.exceptions.OverloadedException) ReadAbortException(org.apache.cassandra.exceptions.ReadAbortException) RejectException(org.apache.cassandra.db.RejectException) CasWriteTimeoutException(org.apache.cassandra.exceptions.CasWriteTimeoutException) WriteFailureException(org.apache.cassandra.exceptions.WriteFailureException) InvalidRequestException(org.apache.cassandra.exceptions.InvalidRequestException) RequestTimeoutException(org.apache.cassandra.exceptions.RequestTimeoutException) ReadTimeoutException(org.apache.cassandra.exceptions.ReadTimeoutException) CasWriteUnknownResultException(org.apache.cassandra.exceptions.CasWriteUnknownResultException) TimeoutException(java.util.concurrent.TimeoutException) UnavailableException(org.apache.cassandra.exceptions.UnavailableException) WriteTimeoutException(org.apache.cassandra.exceptions.WriteTimeoutException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) TombstoneOverwhelmingException(org.apache.cassandra.db.filter.TombstoneOverwhelmingException) RequestFailureException(org.apache.cassandra.exceptions.RequestFailureException) IsBootstrappingException(org.apache.cassandra.exceptions.IsBootstrappingException) ReadFailureException(org.apache.cassandra.exceptions.ReadFailureException) ConsistencyLevel(org.apache.cassandra.db.ConsistencyLevel) ReplicaLayout(org.apache.cassandra.locator.ReplicaLayout) BatchlogCleanup(org.apache.cassandra.service.BatchlogResponseHandler.BatchlogCleanup) AbstractReplicationStrategy(org.apache.cassandra.locator.AbstractReplicationStrategy) Mutation(org.apache.cassandra.db.Mutation) CounterMutation(org.apache.cassandra.db.CounterMutation) IMutation(org.apache.cassandra.db.IMutation) UUID(java.util.UUID) HashSet(java.util.HashSet)

Example 10 with ReplicaPlan

use of org.apache.cassandra.locator.ReplicaPlan in project cassandra by apache.

the class RangeCommandIterator method query.

/**
 * Queries the provided sub-range.
 *
 * @param replicaPlan the subRange to query.
 * @param isFirst in the case where multiple queries are sent in parallel, whether that's the first query on
 * that batch or not. The reason it matters is that whe paging queries, the command (more specifically the
 * {@code DataLimits}) may have "state" information and that state may only be valid for the first query (in
 * that it's the query that "continues" whatever we're previously queried).
 */
private SingleRangeResponse query(ReplicaPlan.ForRangeRead replicaPlan, boolean isFirst) {
    PartitionRangeReadCommand rangeCommand = command.forSubRange(replicaPlan.range(), isFirst);
    // If enabled, request repaired data tracking info from full replicas, but
    // only if there are multiple full replicas to compare results from.
    boolean trackRepairedStatus = DatabaseDescriptor.getRepairedDataTrackingForRangeReadsEnabled() && replicaPlan.contacts().filter(Replica::isFull).size() > 1;
    ReplicaPlan.SharedForRangeRead sharedReplicaPlan = ReplicaPlan.shared(replicaPlan);
    ReadRepair<EndpointsForRange, ReplicaPlan.ForRangeRead> readRepair = ReadRepair.create(command, sharedReplicaPlan, queryStartNanoTime);
    DataResolver<EndpointsForRange, ReplicaPlan.ForRangeRead> resolver = new DataResolver<>(rangeCommand, sharedReplicaPlan, readRepair, queryStartNanoTime, trackRepairedStatus);
    ReadCallback<EndpointsForRange, ReplicaPlan.ForRangeRead> handler = new ReadCallback<>(resolver, rangeCommand, sharedReplicaPlan, queryStartNanoTime);
    if (replicaPlan.contacts().size() == 1 && replicaPlan.contacts().get(0).isSelf()) {
        Stage.READ.execute(new StorageProxy.LocalReadRunnable(rangeCommand, handler, trackRepairedStatus));
    } else {
        for (Replica replica : replicaPlan.contacts()) {
            Tracing.trace("Enqueuing request to {}", replica);
            ReadCommand command = replica.isFull() ? rangeCommand : rangeCommand.copyAsTransientQuery(replica);
            Message<ReadCommand> message = command.createMessage(trackRepairedStatus && replica.isFull());
            MessagingService.instance().sendWithCallback(message, replica.endpoint(), handler);
        }
    }
    return new SingleRangeResponse(resolver, handler, readRepair);
}
Also used : ReplicaPlan(org.apache.cassandra.locator.ReplicaPlan) PartitionRangeReadCommand(org.apache.cassandra.db.PartitionRangeReadCommand) StorageProxy(org.apache.cassandra.service.StorageProxy) ReadCommand(org.apache.cassandra.db.ReadCommand) PartitionRangeReadCommand(org.apache.cassandra.db.PartitionRangeReadCommand) Replica(org.apache.cassandra.locator.Replica) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) ReadCallback(org.apache.cassandra.service.reads.ReadCallback) DataResolver(org.apache.cassandra.service.reads.DataResolver)

Aggregations

ReplicaPlan (org.apache.cassandra.locator.ReplicaPlan)14 Keyspace (org.apache.cassandra.db.Keyspace)7 AbstractReplicationStrategy (org.apache.cassandra.locator.AbstractReplicationStrategy)6 EndpointsForToken (org.apache.cassandra.locator.EndpointsForToken)6 IMutation (org.apache.cassandra.db.IMutation)5 Replica (org.apache.cassandra.locator.Replica)5 Token (org.apache.cassandra.dht.Token)4 ArrayList (java.util.ArrayList)3 UUID (java.util.UUID)3 ColumnFamilyStore (org.apache.cassandra.db.ColumnFamilyStore)3 ConsistencyLevel (org.apache.cassandra.db.ConsistencyLevel)3 Mutation (org.apache.cassandra.db.Mutation)3 ReadCommand (org.apache.cassandra.db.ReadCommand)3 UnavailableException (org.apache.cassandra.exceptions.UnavailableException)3 List (java.util.List)2 CounterMutation (org.apache.cassandra.db.CounterMutation)2 DecoratedKey (org.apache.cassandra.db.DecoratedKey)2 PartitionRangeReadCommand (org.apache.cassandra.db.PartitionRangeReadCommand)2 PartitionIterator (org.apache.cassandra.db.partitions.PartitionIterator)2 CasWriteTimeoutException (org.apache.cassandra.exceptions.CasWriteTimeoutException)2