Search in sources :

Example 36 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class StorageProxy method sendMessagesToNonlocalDC.

/*
     * Send the message to the first replica of targets, and have it forward the message to others in its DC
     */
private static void sendMessagesToNonlocalDC(Message<? extends IMutation> message, EndpointsForToken targets, AbstractWriteResponseHandler<IMutation> handler) {
    final Replica target;
    if (targets.size() > 1) {
        target = targets.get(ThreadLocalRandom.current().nextInt(0, targets.size()));
        EndpointsForToken forwardToReplicas = targets.filter(r -> r != target, targets.size());
        for (Replica replica : forwardToReplicas) {
            MessagingService.instance().callbacks.addWithExpiration(handler, message, replica, handler.replicaPlan.consistencyLevel(), true);
            logger.trace("Adding FWD message to {}@{}", message.id(), replica);
        }
        // starting with 4.0, use the same message id for all replicas
        long[] messageIds = new long[forwardToReplicas.size()];
        Arrays.fill(messageIds, message.id());
        message = message.withForwardTo(new ForwardingInfo(forwardToReplicas.endpointList(), messageIds));
    } else {
        target = targets.get(0);
    }
    MessagingService.instance().sendWriteWithCallback(message, target, handler, true);
    logger.trace("Sending message to {}@{}", message.id(), target);
}
Also used : EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) Replica(org.apache.cassandra.locator.Replica) ForwardingInfo(org.apache.cassandra.net.ForwardingInfo)

Example 37 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class StorageProxy method commitPaxos.

private static void commitPaxos(Commit proposal, ConsistencyLevel consistencyLevel, boolean allowHints, long queryStartNanoTime) throws WriteTimeoutException {
    boolean shouldBlock = consistencyLevel != ConsistencyLevel.ANY;
    Keyspace keyspace = Keyspace.open(proposal.update.metadata().keyspace);
    Token tk = proposal.update.partitionKey().getToken();
    AbstractWriteResponseHandler<Commit> responseHandler = null;
    // NOTE: this ReplicaPlan is a lie, this usage of ReplicaPlan could do with being clarified - the selected() collection is essentially (I think) never used
    ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forWrite(keyspace, consistencyLevel, tk, ReplicaPlans.writeAll);
    if (shouldBlock) {
        AbstractReplicationStrategy rs = replicaPlan.replicationStrategy();
        responseHandler = rs.getWriteResponseHandler(replicaPlan, null, WriteType.SIMPLE, queryStartNanoTime);
    }
    Message<Commit> message = Message.outWithFlag(PAXOS_COMMIT_REQ, proposal, MessageFlag.CALL_BACK_ON_FAILURE);
    for (Replica replica : replicaPlan.liveAndDown()) {
        InetAddressAndPort destination = replica.endpoint();
        checkHintOverload(replica);
        if (replicaPlan.isAlive(replica)) {
            if (shouldBlock) {
                if (replica.isSelf())
                    commitPaxosLocal(replica, message, responseHandler);
                else
                    MessagingService.instance().sendWriteWithCallback(message, replica, responseHandler, allowHints && shouldHint(replica));
            } else {
                MessagingService.instance().send(message, destination);
            }
        } else {
            if (responseHandler != null) {
                responseHandler.expired();
            }
            if (allowHints && shouldHint(replica)) {
                submitHint(proposal.makeMutation(), replica, null);
            }
        }
    }
    if (shouldBlock)
        responseHandler.get();
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) ReplicaPlan(org.apache.cassandra.locator.ReplicaPlan) Keyspace(org.apache.cassandra.db.Keyspace) EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) Token(org.apache.cassandra.dht.Token) AbstractReplicationStrategy(org.apache.cassandra.locator.AbstractReplicationStrategy) Replica(org.apache.cassandra.locator.Replica)

Example 38 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class StorageProxy method preparePaxos.

private static PrepareCallback preparePaxos(Commit toPrepare, ReplicaPlan.ForPaxosWrite replicaPlan, long queryStartNanoTime) throws WriteTimeoutException {
    PrepareCallback callback = new PrepareCallback(toPrepare.update.partitionKey(), toPrepare.update.metadata(), replicaPlan.requiredParticipants(), replicaPlan.consistencyLevel(), queryStartNanoTime);
    Message<Commit> message = Message.out(PAXOS_PREPARE_REQ, toPrepare);
    boolean hasLocalRequest = false;
    for (Replica replica : replicaPlan.contacts()) {
        if (replica.isSelf()) {
            hasLocalRequest = true;
            PAXOS_PREPARE_REQ.stage.execute(() -> {
                try {
                    callback.onResponse(message.responseWith(doPrepare(toPrepare)));
                } catch (Exception ex) {
                    logger.error("Failed paxos prepare locally", ex);
                }
            });
        } else {
            MessagingService.instance().sendWithCallback(message, replica.endpoint(), callback);
        }
    }
    if (hasLocalRequest)
        writeMetrics.localRequests.mark();
    else
        writeMetrics.remoteRequests.mark();
    callback.await();
    return callback;
}
Also used : Replica(org.apache.cassandra.locator.Replica) OverloadedException(org.apache.cassandra.exceptions.OverloadedException) ReadAbortException(org.apache.cassandra.exceptions.ReadAbortException) RejectException(org.apache.cassandra.db.RejectException) CasWriteTimeoutException(org.apache.cassandra.exceptions.CasWriteTimeoutException) WriteFailureException(org.apache.cassandra.exceptions.WriteFailureException) InvalidRequestException(org.apache.cassandra.exceptions.InvalidRequestException) RequestTimeoutException(org.apache.cassandra.exceptions.RequestTimeoutException) ReadTimeoutException(org.apache.cassandra.exceptions.ReadTimeoutException) CasWriteUnknownResultException(org.apache.cassandra.exceptions.CasWriteUnknownResultException) TimeoutException(java.util.concurrent.TimeoutException) UnavailableException(org.apache.cassandra.exceptions.UnavailableException) WriteTimeoutException(org.apache.cassandra.exceptions.WriteTimeoutException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) TombstoneOverwhelmingException(org.apache.cassandra.db.filter.TombstoneOverwhelmingException) RequestFailureException(org.apache.cassandra.exceptions.RequestFailureException) IsBootstrappingException(org.apache.cassandra.exceptions.IsBootstrappingException) ReadFailureException(org.apache.cassandra.exceptions.ReadFailureException)

Example 39 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class StorageProxy method mutateMV.

/**
 * Use this method to have these Mutations applied
 * across all replicas.
 *
 * @param mutations the mutations to be applied across the replicas
 * @param writeCommitLog if commitlog should be written
 * @param baseComplete time from epoch in ms that the local base mutation was(or will be) completed
 * @param queryStartNanoTime the value of nanoTime() when the query started to be processed
 */
public static void mutateMV(ByteBuffer dataKey, Collection<Mutation> mutations, boolean writeCommitLog, AtomicLong baseComplete, long queryStartNanoTime) throws UnavailableException, OverloadedException, WriteTimeoutException {
    Tracing.trace("Determining replicas for mutation");
    final String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
    long startTime = nanoTime();
    try {
        // if we haven't joined the ring, write everything to batchlog because paired replicas may be stale
        final UUID batchUUID = UUIDGen.getTimeUUID();
        if (StorageService.instance.isStarting() || StorageService.instance.isJoining() || StorageService.instance.isMoving()) {
            BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), mutations), writeCommitLog);
        } else {
            List<WriteResponseHandlerWrapper> wrappers = new ArrayList<>(mutations.size());
            // non-local mutations rely on the base mutation commit-log entry for eventual consistency
            Set<Mutation> nonLocalMutations = new HashSet<>(mutations);
            Token baseToken = StorageService.instance.getTokenMetadata().partitioner.getToken(dataKey);
            ConsistencyLevel consistencyLevel = ConsistencyLevel.ONE;
            // Since the base -> view replication is 1:1 we only need to store the BL locally
            ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forLocalBatchlogWrite();
            BatchlogCleanup cleanup = new BatchlogCleanup(mutations.size(), () -> asyncRemoveFromBatchlog(replicaPlan, batchUUID));
            // add a handler for each mutation - includes checking availability, but doesn't initiate any writes, yet
            for (Mutation mutation : mutations) {
                if (hasLocalMutation(mutation))
                    writeMetrics.localRequests.mark();
                else
                    writeMetrics.remoteRequests.mark();
                String keyspaceName = mutation.getKeyspaceName();
                Token tk = mutation.key().getToken();
                AbstractReplicationStrategy replicationStrategy = Keyspace.open(keyspaceName).getReplicationStrategy();
                Optional<Replica> pairedEndpoint = ViewUtils.getViewNaturalEndpoint(replicationStrategy, baseToken, tk);
                EndpointsForToken pendingReplicas = StorageService.instance.getTokenMetadata().pendingEndpointsForToken(tk, keyspaceName);
                // if there are no paired endpoints there are probably range movements going on, so we write to the local batchlog to replay later
                if (!pairedEndpoint.isPresent()) {
                    if (pendingReplicas.isEmpty())
                        logger.warn("Received base materialized view mutation for key {} that does not belong " + "to this node. There is probably a range movement happening (move or decommission)," + "but this node hasn't updated its ring metadata yet. Adding mutation to " + "local batchlog to be replayed later.", mutation.key());
                    continue;
                }
                // write so the view mutation is sent to the pending endpoint
                if (pairedEndpoint.get().isSelf() && StorageService.instance.isJoined() && pendingReplicas.isEmpty()) {
                    try {
                        mutation.apply(writeCommitLog);
                        nonLocalMutations.remove(mutation);
                        // won't trigger cleanup
                        cleanup.decrement();
                    } catch (Exception exc) {
                        logger.error("Error applying local view update: Mutation (keyspace {}, tables {}, partition key {})", mutation.getKeyspaceName(), mutation.getTableIds(), mutation.key());
                        throw exc;
                    }
                } else {
                    ReplicaLayout.ForTokenWrite liveAndDown = ReplicaLayout.forTokenWrite(replicationStrategy, EndpointsForToken.of(tk, pairedEndpoint.get()), pendingReplicas);
                    wrappers.add(wrapViewBatchResponseHandler(mutation, consistencyLevel, consistencyLevel, liveAndDown, baseComplete, WriteType.BATCH, cleanup, queryStartNanoTime));
                }
            }
            // Apply to local batchlog memtable in this thread
            if (!nonLocalMutations.isEmpty())
                BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), nonLocalMutations), writeCommitLog);
            // Perform remote writes
            if (!wrappers.isEmpty())
                asyncWriteBatchedMutations(wrappers, localDataCenter, Stage.VIEW_MUTATION);
        }
    } finally {
        viewWriteMetrics.addNano(nanoTime() - startTime);
    }
}
Also used : EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) ReplicaPlan(org.apache.cassandra.locator.ReplicaPlan) ArrayList(java.util.ArrayList) EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) Token(org.apache.cassandra.dht.Token) Replica(org.apache.cassandra.locator.Replica) OverloadedException(org.apache.cassandra.exceptions.OverloadedException) ReadAbortException(org.apache.cassandra.exceptions.ReadAbortException) RejectException(org.apache.cassandra.db.RejectException) CasWriteTimeoutException(org.apache.cassandra.exceptions.CasWriteTimeoutException) WriteFailureException(org.apache.cassandra.exceptions.WriteFailureException) InvalidRequestException(org.apache.cassandra.exceptions.InvalidRequestException) RequestTimeoutException(org.apache.cassandra.exceptions.RequestTimeoutException) ReadTimeoutException(org.apache.cassandra.exceptions.ReadTimeoutException) CasWriteUnknownResultException(org.apache.cassandra.exceptions.CasWriteUnknownResultException) TimeoutException(java.util.concurrent.TimeoutException) UnavailableException(org.apache.cassandra.exceptions.UnavailableException) WriteTimeoutException(org.apache.cassandra.exceptions.WriteTimeoutException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) TombstoneOverwhelmingException(org.apache.cassandra.db.filter.TombstoneOverwhelmingException) RequestFailureException(org.apache.cassandra.exceptions.RequestFailureException) IsBootstrappingException(org.apache.cassandra.exceptions.IsBootstrappingException) ReadFailureException(org.apache.cassandra.exceptions.ReadFailureException) ConsistencyLevel(org.apache.cassandra.db.ConsistencyLevel) ReplicaLayout(org.apache.cassandra.locator.ReplicaLayout) BatchlogCleanup(org.apache.cassandra.service.BatchlogResponseHandler.BatchlogCleanup) AbstractReplicationStrategy(org.apache.cassandra.locator.AbstractReplicationStrategy) Mutation(org.apache.cassandra.db.Mutation) CounterMutation(org.apache.cassandra.db.CounterMutation) IMutation(org.apache.cassandra.db.IMutation) UUID(java.util.UUID) HashSet(java.util.HashSet)

Example 40 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class StorageProxy method mutateCounter.

/**
 * Handle counter mutation on the coordinator host.
 *
 * A counter mutation needs to first be applied to a replica (that we'll call the leader for the mutation) before being
 * replicated to the other endpoint. To achieve so, there is two case:
 *   1) the coordinator host is a replica: we proceed to applying the update locally and replicate throug
 *   applyCounterMutationOnCoordinator
 *   2) the coordinator is not a replica: we forward the (counter)mutation to a chosen replica (that will proceed through
 *   applyCounterMutationOnLeader upon receive) and wait for its acknowledgment.
 *
 * Implementation note: We check if we can fulfill the CL on the coordinator host even if he is not a replica to allow
 * quicker response and because the WriteResponseHandlers don't make it easy to send back an error. We also always gather
 * the write latencies at the coordinator node to make gathering point similar to the case of standard writes.
 */
public static AbstractWriteResponseHandler<IMutation> mutateCounter(CounterMutation cm, String localDataCenter, long queryStartNanoTime) throws UnavailableException, OverloadedException {
    Replica replica = findSuitableReplica(cm.getKeyspaceName(), cm.key(), localDataCenter, cm.consistency());
    if (replica.isSelf()) {
        return applyCounterMutationOnCoordinator(cm, localDataCenter, queryStartNanoTime);
    } else {
        // Exit now if we can't fulfill the CL here instead of forwarding to the leader replica
        String keyspaceName = cm.getKeyspaceName();
        Keyspace keyspace = Keyspace.open(keyspaceName);
        Token tk = cm.key().getToken();
        // we build this ONLY to perform the sufficiency check that happens on construction
        ReplicaPlans.forWrite(keyspace, cm.consistency(), tk, ReplicaPlans.writeAll);
        // Forward the actual update to the chosen leader replica
        AbstractWriteResponseHandler<IMutation> responseHandler = new WriteResponseHandler<>(ReplicaPlans.forForwardingCounterWrite(keyspace, tk, replica), WriteType.COUNTER, queryStartNanoTime);
        Tracing.trace("Enqueuing counter update to {}", replica);
        Message message = Message.outWithFlag(Verb.COUNTER_MUTATION_REQ, cm, MessageFlag.CALL_BACK_ON_FAILURE);
        MessagingService.instance().sendWriteWithCallback(message, replica, responseHandler, false);
        return responseHandler;
    }
}
Also used : IMutation(org.apache.cassandra.db.IMutation) Message(org.apache.cassandra.net.Message) Keyspace(org.apache.cassandra.db.Keyspace) EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) Token(org.apache.cassandra.dht.Token) Replica(org.apache.cassandra.locator.Replica)

Aggregations

Replica (org.apache.cassandra.locator.Replica)69 InetAddressAndPort (org.apache.cassandra.locator.InetAddressAndPort)24 Token (org.apache.cassandra.dht.Token)22 Test (org.junit.Test)20 HashMap (java.util.HashMap)18 Mutation (org.apache.cassandra.db.Mutation)15 EndpointsByReplica (org.apache.cassandra.locator.EndpointsByReplica)15 Range (org.apache.cassandra.dht.Range)14 RangesAtEndpoint (org.apache.cassandra.locator.RangesAtEndpoint)13 TokenMetadata (org.apache.cassandra.locator.TokenMetadata)12 Keyspace (org.apache.cassandra.db.Keyspace)11 AbstractReplicationStrategy (org.apache.cassandra.locator.AbstractReplicationStrategy)11 EndpointsForRange (org.apache.cassandra.locator.EndpointsForRange)11 Replica.fullReplica (org.apache.cassandra.locator.Replica.fullReplica)10 Collection (java.util.Collection)8 Map (java.util.Map)8 EndpointsForToken (org.apache.cassandra.locator.EndpointsForToken)8 ReplicaPlan (org.apache.cassandra.locator.ReplicaPlan)8 ArrayList (java.util.ArrayList)7 HashSet (java.util.HashSet)6