Search in sources :

Example 11 with EndpointsForToken

use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.

the class StorageProxy method sendMessagesToNonlocalDC.

/*
     * Send the message to the first replica of targets, and have it forward the message to others in its DC
     */
private static void sendMessagesToNonlocalDC(Message<? extends IMutation> message, EndpointsForToken targets, AbstractWriteResponseHandler<IMutation> handler) {
    final Replica target;
    if (targets.size() > 1) {
        target = targets.get(ThreadLocalRandom.current().nextInt(0, targets.size()));
        EndpointsForToken forwardToReplicas = targets.filter(r -> r != target, targets.size());
        for (Replica replica : forwardToReplicas) {
            MessagingService.instance().callbacks.addWithExpiration(handler, message, replica, handler.replicaPlan.consistencyLevel(), true);
            logger.trace("Adding FWD message to {}@{}", message.id(), replica);
        }
        // starting with 4.0, use the same message id for all replicas
        long[] messageIds = new long[forwardToReplicas.size()];
        Arrays.fill(messageIds, message.id());
        message = message.withForwardTo(new ForwardingInfo(forwardToReplicas.endpointList(), messageIds));
    } else {
        target = targets.get(0);
    }
    MessagingService.instance().sendWriteWithCallback(message, target, handler, true);
    logger.trace("Sending message to {}@{}", message.id(), target);
}
Also used : EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) Replica(org.apache.cassandra.locator.Replica) ForwardingInfo(org.apache.cassandra.net.ForwardingInfo)

Example 12 with EndpointsForToken

use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.

the class StorageProxy method mutateMV.

/**
 * Use this method to have these Mutations applied
 * across all replicas.
 *
 * @param mutations the mutations to be applied across the replicas
 * @param writeCommitLog if commitlog should be written
 * @param baseComplete time from epoch in ms that the local base mutation was(or will be) completed
 * @param queryStartNanoTime the value of nanoTime() when the query started to be processed
 */
public static void mutateMV(ByteBuffer dataKey, Collection<Mutation> mutations, boolean writeCommitLog, AtomicLong baseComplete, long queryStartNanoTime) throws UnavailableException, OverloadedException, WriteTimeoutException {
    Tracing.trace("Determining replicas for mutation");
    final String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
    long startTime = nanoTime();
    try {
        // if we haven't joined the ring, write everything to batchlog because paired replicas may be stale
        final UUID batchUUID = UUIDGen.getTimeUUID();
        if (StorageService.instance.isStarting() || StorageService.instance.isJoining() || StorageService.instance.isMoving()) {
            BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), mutations), writeCommitLog);
        } else {
            List<WriteResponseHandlerWrapper> wrappers = new ArrayList<>(mutations.size());
            // non-local mutations rely on the base mutation commit-log entry for eventual consistency
            Set<Mutation> nonLocalMutations = new HashSet<>(mutations);
            Token baseToken = StorageService.instance.getTokenMetadata().partitioner.getToken(dataKey);
            ConsistencyLevel consistencyLevel = ConsistencyLevel.ONE;
            // Since the base -> view replication is 1:1 we only need to store the BL locally
            ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forLocalBatchlogWrite();
            BatchlogCleanup cleanup = new BatchlogCleanup(mutations.size(), () -> asyncRemoveFromBatchlog(replicaPlan, batchUUID));
            // add a handler for each mutation - includes checking availability, but doesn't initiate any writes, yet
            for (Mutation mutation : mutations) {
                if (hasLocalMutation(mutation))
                    writeMetrics.localRequests.mark();
                else
                    writeMetrics.remoteRequests.mark();
                String keyspaceName = mutation.getKeyspaceName();
                Token tk = mutation.key().getToken();
                AbstractReplicationStrategy replicationStrategy = Keyspace.open(keyspaceName).getReplicationStrategy();
                Optional<Replica> pairedEndpoint = ViewUtils.getViewNaturalEndpoint(replicationStrategy, baseToken, tk);
                EndpointsForToken pendingReplicas = StorageService.instance.getTokenMetadata().pendingEndpointsForToken(tk, keyspaceName);
                // if there are no paired endpoints there are probably range movements going on, so we write to the local batchlog to replay later
                if (!pairedEndpoint.isPresent()) {
                    if (pendingReplicas.isEmpty())
                        logger.warn("Received base materialized view mutation for key {} that does not belong " + "to this node. There is probably a range movement happening (move or decommission)," + "but this node hasn't updated its ring metadata yet. Adding mutation to " + "local batchlog to be replayed later.", mutation.key());
                    continue;
                }
                // write so the view mutation is sent to the pending endpoint
                if (pairedEndpoint.get().isSelf() && StorageService.instance.isJoined() && pendingReplicas.isEmpty()) {
                    try {
                        mutation.apply(writeCommitLog);
                        nonLocalMutations.remove(mutation);
                        // won't trigger cleanup
                        cleanup.decrement();
                    } catch (Exception exc) {
                        logger.error("Error applying local view update: Mutation (keyspace {}, tables {}, partition key {})", mutation.getKeyspaceName(), mutation.getTableIds(), mutation.key());
                        throw exc;
                    }
                } else {
                    ReplicaLayout.ForTokenWrite liveAndDown = ReplicaLayout.forTokenWrite(replicationStrategy, EndpointsForToken.of(tk, pairedEndpoint.get()), pendingReplicas);
                    wrappers.add(wrapViewBatchResponseHandler(mutation, consistencyLevel, consistencyLevel, liveAndDown, baseComplete, WriteType.BATCH, cleanup, queryStartNanoTime));
                }
            }
            // Apply to local batchlog memtable in this thread
            if (!nonLocalMutations.isEmpty())
                BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), nonLocalMutations), writeCommitLog);
            // Perform remote writes
            if (!wrappers.isEmpty())
                asyncWriteBatchedMutations(wrappers, localDataCenter, Stage.VIEW_MUTATION);
        }
    } finally {
        viewWriteMetrics.addNano(nanoTime() - startTime);
    }
}
Also used : EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) ReplicaPlan(org.apache.cassandra.locator.ReplicaPlan) ArrayList(java.util.ArrayList) EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) Token(org.apache.cassandra.dht.Token) Replica(org.apache.cassandra.locator.Replica) OverloadedException(org.apache.cassandra.exceptions.OverloadedException) ReadAbortException(org.apache.cassandra.exceptions.ReadAbortException) RejectException(org.apache.cassandra.db.RejectException) CasWriteTimeoutException(org.apache.cassandra.exceptions.CasWriteTimeoutException) WriteFailureException(org.apache.cassandra.exceptions.WriteFailureException) InvalidRequestException(org.apache.cassandra.exceptions.InvalidRequestException) RequestTimeoutException(org.apache.cassandra.exceptions.RequestTimeoutException) ReadTimeoutException(org.apache.cassandra.exceptions.ReadTimeoutException) CasWriteUnknownResultException(org.apache.cassandra.exceptions.CasWriteUnknownResultException) TimeoutException(java.util.concurrent.TimeoutException) UnavailableException(org.apache.cassandra.exceptions.UnavailableException) WriteTimeoutException(org.apache.cassandra.exceptions.WriteTimeoutException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) TombstoneOverwhelmingException(org.apache.cassandra.db.filter.TombstoneOverwhelmingException) RequestFailureException(org.apache.cassandra.exceptions.RequestFailureException) IsBootstrappingException(org.apache.cassandra.exceptions.IsBootstrappingException) ReadFailureException(org.apache.cassandra.exceptions.ReadFailureException) ConsistencyLevel(org.apache.cassandra.db.ConsistencyLevel) ReplicaLayout(org.apache.cassandra.locator.ReplicaLayout) BatchlogCleanup(org.apache.cassandra.service.BatchlogResponseHandler.BatchlogCleanup) AbstractReplicationStrategy(org.apache.cassandra.locator.AbstractReplicationStrategy) Mutation(org.apache.cassandra.db.Mutation) CounterMutation(org.apache.cassandra.db.CounterMutation) IMutation(org.apache.cassandra.db.IMutation) UUID(java.util.UUID) HashSet(java.util.HashSet)

Example 13 with EndpointsForToken

use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.

the class StorageProxy method syncWriteBatchedMutations.

private static void syncWriteBatchedMutations(List<WriteResponseHandlerWrapper> wrappers, Stage stage) throws WriteTimeoutException, OverloadedException {
    String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
    for (WriteResponseHandlerWrapper wrapper : wrappers) {
        EndpointsForToken sendTo = wrapper.handler.replicaPlan.liveAndDown();
        // TODO: CASSANDRA-14549
        Replicas.temporaryAssertFull(sendTo);
        sendToHintedReplicas(wrapper.mutation, wrapper.handler.replicaPlan.withContact(sendTo), wrapper.handler, localDataCenter, stage);
    }
    for (WriteResponseHandlerWrapper wrapper : wrappers) wrapper.handler.get();
}
Also used : EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken)

Example 14 with EndpointsForToken

use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.

the class AbstractWriteResponseHandler method maybeTryAdditionalReplicas.

/**
 * Cheap Quorum backup.  If we failed to reach quorum with our initial (full) nodes, reach out to other nodes.
 */
public void maybeTryAdditionalReplicas(IMutation mutation, WritePerformer writePerformer, String localDC) {
    EndpointsForToken uncontacted = replicaPlan.liveUncontacted();
    if (uncontacted.isEmpty())
        return;
    long timeout = MAX_VALUE;
    List<ColumnFamilyStore> cfs = mutation.getTableIds().stream().map(instance::getColumnFamilyStoreInstance).collect(toList());
    for (ColumnFamilyStore cf : cfs) timeout = min(timeout, cf.additionalWriteLatencyNanos);
    // no latency information, or we're overloaded
    if (timeout > mutation.getTimeout(NANOSECONDS))
        return;
    try {
        if (!condition.await(timeout, NANOSECONDS)) {
            for (ColumnFamilyStore cf : cfs) cf.metric.additionalWrites.inc();
            writePerformer.apply(mutation, replicaPlan.withContact(uncontacted), (AbstractWriteResponseHandler<IMutation>) this, localDC);
        }
    } catch (InterruptedException e) {
        throw new UncheckedInterruptedException(e);
    }
}
Also used : EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) IMutation(org.apache.cassandra.db.IMutation) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException)

Example 15 with EndpointsForToken

use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.

the class AbstractReadExecutor method executeAsync.

/**
 * send the initial set of requests
 */
public void executeAsync() {
    EndpointsForToken selected = replicaPlan().contacts();
    EndpointsForToken fullDataRequests = selected.filter(Replica::isFull, initialDataRequestCount);
    makeFullDataRequests(fullDataRequests);
    makeTransientDataRequests(selected.filterLazily(Replica::isTransient));
    makeDigestRequests(selected.filterLazily(r -> r.isFull() && !fullDataRequests.contains(r)));
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) Stage(org.apache.cassandra.concurrent.Stage) EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) NANOSECONDS(java.util.concurrent.TimeUnit.NANOSECONDS) LoggerFactory(org.slf4j.LoggerFactory) ReadCommand(org.apache.cassandra.db.ReadCommand) DuplicateRowChecker(org.apache.cassandra.db.transform.DuplicateRowChecker) Message(org.apache.cassandra.net.Message) Iterables.all(com.google.common.collect.Iterables.all) DecoratedKey(org.apache.cassandra.db.DecoratedKey) ReadRepair(org.apache.cassandra.service.reads.repair.ReadRepair) ReplicaCollection(org.apache.cassandra.locator.ReplicaCollection) SinglePartitionReadCommand(org.apache.cassandra.db.SinglePartitionReadCommand) ReplicaPlans(org.apache.cassandra.locator.ReplicaPlans) ReplicaPlan(org.apache.cassandra.locator.ReplicaPlan) Keyspace(org.apache.cassandra.db.Keyspace) UnavailableException(org.apache.cassandra.exceptions.UnavailableException) LocalReadRunnable(org.apache.cassandra.service.StorageProxy.LocalReadRunnable) ConsistencyLevel(org.apache.cassandra.db.ConsistencyLevel) MessagingService(org.apache.cassandra.net.MessagingService) Logger(org.slf4j.Logger) ReadTimeoutException(org.apache.cassandra.exceptions.ReadTimeoutException) ReadFailureException(org.apache.cassandra.exceptions.ReadFailureException) Tracing(org.apache.cassandra.tracing.Tracing) Collectors(java.util.stream.Collectors) Replica(org.apache.cassandra.locator.Replica) TraceState(org.apache.cassandra.tracing.TraceState) List(java.util.List) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) PartitionIterator(org.apache.cassandra.db.partitions.PartitionIterator) Preconditions(com.google.common.base.Preconditions) EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) Replica(org.apache.cassandra.locator.Replica)

Aggregations

EndpointsForToken (org.apache.cassandra.locator.EndpointsForToken)21 Test (org.junit.Test)9 SinglePartitionReadCommand (org.apache.cassandra.db.SinglePartitionReadCommand)8 PartitionUpdate (org.apache.cassandra.db.partitions.PartitionUpdate)8 Token (org.apache.cassandra.dht.Token)8 Replica (org.apache.cassandra.locator.Replica)6 ReplicaPlan (org.apache.cassandra.locator.ReplicaPlan)5 Keyspace (org.apache.cassandra.db.Keyspace)4 InetAddressAndPort (org.apache.cassandra.locator.InetAddressAndPort)4 ReplicaLayout (org.apache.cassandra.locator.ReplicaLayout)4 UncheckedInterruptedException (org.apache.cassandra.utils.concurrent.UncheckedInterruptedException)4 List (java.util.List)3 UUID (java.util.UUID)3 Collectors (java.util.stream.Collectors)3 DatabaseDescriptor (org.apache.cassandra.config.DatabaseDescriptor)3 ColumnFamilyStore (org.apache.cassandra.db.ColumnFamilyStore)3 ConsistencyLevel (org.apache.cassandra.db.ConsistencyLevel)3 IMutation (org.apache.cassandra.db.IMutation)3 Mutation (org.apache.cassandra.db.Mutation)3 ReadFailureException (org.apache.cassandra.exceptions.ReadFailureException)3