use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.
the class BlockingPartitionRepair method maybeSendAdditionalWrites.
/**
* If it looks like we might not receive acks for all the repair mutations we sent out, combine all
* the unacked mutations and send them to the minority of nodes not involved in the read repair data
* read / write cycle. We will accept acks from them in lieu of acks from the initial mutations sent
* out, so long as we receive the same number of acks as repair mutations transmitted. This prevents
* misbehaving nodes from killing a quorum read, while continuing to guarantee monotonic quorum reads
*/
public void maybeSendAdditionalWrites(long timeout, TimeUnit timeoutUnit) {
if (awaitRepairsUntil(timeout + timeoutUnit.convert(mutationsSentTime, TimeUnit.NANOSECONDS), timeoutUnit))
return;
EndpointsForToken newCandidates = writePlan.liveUncontacted();
if (newCandidates.isEmpty())
return;
PartitionUpdate update = mergeUnackedUpdates();
if (update == null)
// timeout and call to get unacked mutation.
return;
ReadRepairMetrics.speculatedWrite.mark();
Mutation[] versionedMutations = new Mutation[msgVersionIdx(MessagingService.current_version) + 1];
for (Replica replica : newCandidates) {
int versionIdx = msgVersionIdx(MessagingService.instance().versions.get(replica.endpoint()));
Mutation mutation = versionedMutations[versionIdx];
if (mutation == null) {
mutation = BlockingReadRepairs.createRepairMutation(update, writePlan.consistencyLevel(), replica.endpoint(), true);
versionedMutations[versionIdx] = mutation;
}
if (mutation == null) {
// the mutation is too large to send.
ReadRepairDiagnostics.speculatedWriteOversized(this, replica.endpoint());
continue;
}
Tracing.trace("Sending speculative read-repair-mutation to {}", replica);
sendRR(Message.out(READ_REPAIR_REQ, mutation), replica.endpoint());
ReadRepairDiagnostics.speculatedWrite(this, replica.endpoint(), mutation);
}
}
use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.
the class StorageProxy method hintMutation.
private static void hintMutation(Mutation mutation) {
String keyspaceName = mutation.getKeyspaceName();
Token token = mutation.key().getToken();
// local writes can timeout, but cannot be dropped (see LocalMutationRunnable and CASSANDRA-6510),
// so there is no need to hint or retry.
EndpointsForToken replicasToHint = ReplicaLayout.forTokenWriteLiveAndDown(Keyspace.open(keyspaceName), token).all().filter(StorageProxy::shouldHint);
submitHint(mutation, replicasToHint, null);
}
use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.
the class ViewUtils method getViewNaturalEndpoint.
/**
* Calculate the natural endpoint for the view.
*
* The view natural endpoint is the endpoint which has the same cardinality as this node in the replication factor.
* The cardinality is the number at which this node would store a piece of data, given the change in replication
* factor. If the keyspace's replication strategy is a NetworkTopologyStrategy, we filter the ring to contain only
* nodes in the local datacenter when calculating cardinality.
*
* For example, if we have the following ring:
* {@code A, T1 -> B, T2 -> C, T3 -> A}
*
* For the token T1, at RF=1, A would be included, so A's cardinality for T1 is 1. For the token T1, at RF=2, B would
* be included, so B's cardinality for token T1 is 2. For token T3, at RF = 2, A would be included, so A's cardinality
* for T3 is 2.
*
* For a view whose base token is T1 and whose view token is T3, the pairings between the nodes would be:
* A writes to C (A's cardinality is 1 for T1, and C's cardinality is 1 for T3)
* B writes to A (B's cardinality is 2 for T1, and A's cardinality is 2 for T3)
* C writes to B (C's cardinality is 3 for T1, and B's cardinality is 3 for T3)
*
* @return Optional.empty() if this method is called using a base token which does not belong to this replica
*/
public static Optional<Replica> getViewNaturalEndpoint(AbstractReplicationStrategy replicationStrategy, Token baseToken, Token viewToken) {
String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
EndpointsForToken naturalBaseReplicas = replicationStrategy.getNaturalReplicasForToken(baseToken);
EndpointsForToken naturalViewReplicas = replicationStrategy.getNaturalReplicasForToken(viewToken);
Optional<Replica> localReplica = Iterables.tryFind(naturalViewReplicas, Replica::isSelf).toJavaUtil();
if (localReplica.isPresent())
return localReplica;
// We only select replicas from our own DC
// TODO: this is poor encapsulation, leaking implementation details of replication strategy
Predicate<Replica> isLocalDC = r -> !(replicationStrategy instanceof NetworkTopologyStrategy) || DatabaseDescriptor.getEndpointSnitch().getDatacenter(r).equals(localDataCenter);
// We have to remove any endpoint which is shared between the base and the view, as it will select itself
// and throw off the counts otherwise.
EndpointsForToken baseReplicas = naturalBaseReplicas.filter(r -> !naturalViewReplicas.endpoints().contains(r.endpoint()) && isLocalDC.test(r));
EndpointsForToken viewReplicas = naturalViewReplicas.filter(r -> !naturalBaseReplicas.endpoints().contains(r.endpoint()) && isLocalDC.test(r));
// number of replicas for all of the tokens in the ring.
assert baseReplicas.size() == viewReplicas.size() : "Replication strategy should have the same number of endpoints for the base and the view";
int baseIdx = -1;
for (int i = 0; i < baseReplicas.size(); i++) {
if (baseReplicas.get(i).isSelf()) {
baseIdx = i;
break;
}
}
if (baseIdx < 0)
// This node is not a base replica of this key, so we return empty
return Optional.empty();
return Optional.of(viewReplicas.get(baseIdx));
}
use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.
the class KeyspaceActions method recomputeTopology.
private Topology recomputeTopology() {
AbstractReplicationStrategy strategy = strategy();
Map<InetSocketAddress, Integer> lookup = Cluster.getUniqueAddressLookup(cluster, i -> i.config().num());
int[][] replicasForKey = new int[primaryKeys.length][];
int[][] pendingReplicasForKey = new int[primaryKeys.length][];
for (int i = 0; i < primaryKeys.length; ++i) {
int primaryKey = primaryKeys[i];
Token token = new Murmur3Partitioner().getToken(Int32Type.instance.decompose(primaryKey));
replicasForKey[i] = strategy.calculateNaturalReplicas(token, tokenMetadata).endpointList().stream().mapToInt(lookup::get).toArray();
PendingRangeMaps pendingRanges = tokenMetadata.getPendingRanges(keyspace);
EndpointsForToken pendingEndpoints = pendingRanges == null ? null : pendingRanges.pendingEndpointsFor(token);
if (pendingEndpoints == null)
pendingReplicasForKey[i] = new int[0];
else
pendingReplicasForKey[i] = pendingEndpoints.endpointList().stream().mapToInt(lookup::get).toArray();
}
int[] membersOfRing = joined.toArray();
long[] membersOfRingTokens = IntStream.of(membersOfRing).mapToLong(nodeLookup::tokenOf).toArray();
return new Topology(primaryKeys, membersOfRing, membersOfRingTokens, membersOfQuorum(), currentRf.clone(), quorumRf(), replicasForKey, pendingReplicasForKey);
}
use of org.apache.cassandra.locator.EndpointsForToken in project cassandra by apache.
the class MoveTest method newTestWriteEndpointsDuringMove.
/*
* Test whether write endpoints is correct when the node is moving. Uses
* StorageService.onChange and does not manipulate token metadata directly.
*/
@Test
public void newTestWriteEndpointsDuringMove() throws Exception {
StorageService ss = StorageService.instance;
final int RING_SIZE = 10;
// index of the moving node
final int MOVING_NODE = 3;
TokenMetadata tmd = ss.getTokenMetadata();
VersionedValue.VersionedValueFactory valueFactory = new VersionedValue.VersionedValueFactory(partitioner);
ArrayList<Token> endpointTokens = new ArrayList<Token>();
ArrayList<Token> keyTokens = new ArrayList<Token>();
List<InetAddressAndPort> hosts = new ArrayList<>();
List<UUID> hostIds = new ArrayList<UUID>();
Util.createInitialRing(ss, partitioner, endpointTokens, keyTokens, hosts, hostIds, RING_SIZE);
Map<Token, List<InetAddressAndPort>> expectedEndpoints = new HashMap<>();
for (Token token : keyTokens) {
List<InetAddressAndPort> endpoints = new ArrayList<>();
Iterator<Token> tokenIter = TokenMetadata.ringIterator(tmd.sortedTokens(), token, false);
while (tokenIter.hasNext()) {
endpoints.add(tmd.getEndpoint(tokenIter.next()));
}
expectedEndpoints.put(token, endpoints);
}
// node LEAVING_NODE should move to this token
Token newToken = positionToken(MOVING_NODE);
// Third node leaves
ss.onChange(hosts.get(MOVING_NODE), ApplicationState.STATUS, valueFactory.moving(newToken));
PendingRangeCalculatorService.instance.blockUntilFinished();
assertTrue(tmd.isMoving(hosts.get(MOVING_NODE)));
AbstractReplicationStrategy strategy;
for (String keyspaceName : Schema.instance.getNonLocalStrategyKeyspaces()) {
strategy = getStrategy(keyspaceName, tmd);
if (strategy instanceof NetworkTopologyStrategy)
continue;
int numMoved = 0;
for (Token token : keyTokens) {
int replicationFactor = strategy.getReplicationFactor().allReplicas;
EndpointsForToken actual = tmd.getWriteEndpoints(token, keyspaceName, strategy.calculateNaturalReplicas(token, tmd.cloneOnlyTokenMap()).forToken(token));
HashSet<InetAddressAndPort> expected = new HashSet<>();
for (int i = 0; i < replicationFactor; i++) {
expected.add(expectedEndpoints.get(token).get(i));
}
if (expected.size() == actual.size()) {
assertEquals("mismatched endpoint sets", expected, actual.endpoints());
} else {
expected.add(hosts.get(MOVING_NODE));
assertEquals("mismatched endpoint sets", expected, actual.endpoints());
numMoved++;
}
}
assertEquals("mismatched number of moved token", 1, numMoved);
}
// moving endpoint back to the normal state
ss.onChange(hosts.get(MOVING_NODE), ApplicationState.STATUS, valueFactory.normal(Collections.singleton(newToken)));
}
Aggregations