use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.
the class RangeRelocator method calculateRangesToStreamWithEndpoints.
/**
* calculating endpoints to stream current ranges to if needed
* in some situations node will handle current ranges as part of the new ranges
*/
public static RangesByEndpoint calculateRangesToStreamWithEndpoints(RangesAtEndpoint streamRanges, AbstractReplicationStrategy strat, TokenMetadata tmdBefore, TokenMetadata tmdAfter) {
RangesByEndpoint.Builder endpointRanges = new RangesByEndpoint.Builder();
for (Replica toStream : streamRanges) {
// If the range we are sending is full only send it to the new full replica
// There will also be a new transient replica we need to send the data to, but not
// the repaired data
EndpointsForRange oldEndpoints = strat.calculateNaturalReplicas(toStream.range().right, tmdBefore);
EndpointsForRange newEndpoints = strat.calculateNaturalReplicas(toStream.range().right, tmdAfter);
logger.debug("Need to stream {}, current endpoints {}, new endpoints {}", toStream, oldEndpoints, newEndpoints);
for (Replica newEndpoint : newEndpoints) {
Replica oldEndpoint = oldEndpoints.byEndpoint().get(newEndpoint.endpoint());
// Nothing to do
if (newEndpoint.equals(oldEndpoint))
continue;
// Completely new range for this endpoint
if (oldEndpoint == null) {
if (toStream.isTransient() && newEndpoint.isFull())
throw new AssertionError(String.format("Need to stream %s, but only have %s which is transient and not full", newEndpoint, toStream));
for (Range<Token> intersection : newEndpoint.range().intersectionWith(toStream.range())) {
endpointRanges.put(newEndpoint.endpoint(), newEndpoint.decorateSubrange(intersection));
}
} else {
Set<Range<Token>> subsToStream = Collections.singleton(toStream.range());
// First subtract what we already have
if (oldEndpoint.isFull() == newEndpoint.isFull() || oldEndpoint.isFull())
subsToStream = toStream.range().subtract(oldEndpoint.range());
// Now we only stream what is still replicated
subsToStream.stream().flatMap(range -> range.intersectionWith(newEndpoint.range()).stream()).forEach(tokenRange -> endpointRanges.put(newEndpoint.endpoint(), newEndpoint.decorateSubrange(tokenRange)));
}
}
}
return endpointRanges.build();
}
use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.
the class RangeCommandIterator method query.
/**
* Queries the provided sub-range.
*
* @param replicaPlan the subRange to query.
* @param isFirst in the case where multiple queries are sent in parallel, whether that's the first query on
* that batch or not. The reason it matters is that whe paging queries, the command (more specifically the
* {@code DataLimits}) may have "state" information and that state may only be valid for the first query (in
* that it's the query that "continues" whatever we're previously queried).
*/
private SingleRangeResponse query(ReplicaPlan.ForRangeRead replicaPlan, boolean isFirst) {
PartitionRangeReadCommand rangeCommand = command.forSubRange(replicaPlan.range(), isFirst);
// If enabled, request repaired data tracking info from full replicas, but
// only if there are multiple full replicas to compare results from.
boolean trackRepairedStatus = DatabaseDescriptor.getRepairedDataTrackingForRangeReadsEnabled() && replicaPlan.contacts().filter(Replica::isFull).size() > 1;
ReplicaPlan.SharedForRangeRead sharedReplicaPlan = ReplicaPlan.shared(replicaPlan);
ReadRepair<EndpointsForRange, ReplicaPlan.ForRangeRead> readRepair = ReadRepair.create(command, sharedReplicaPlan, queryStartNanoTime);
DataResolver<EndpointsForRange, ReplicaPlan.ForRangeRead> resolver = new DataResolver<>(rangeCommand, sharedReplicaPlan, readRepair, queryStartNanoTime, trackRepairedStatus);
ReadCallback<EndpointsForRange, ReplicaPlan.ForRangeRead> handler = new ReadCallback<>(resolver, rangeCommand, sharedReplicaPlan, queryStartNanoTime);
if (replicaPlan.contacts().size() == 1 && replicaPlan.contacts().get(0).isSelf()) {
Stage.READ.execute(new StorageProxy.LocalReadRunnable(rangeCommand, handler, trackRepairedStatus));
} else {
for (Replica replica : replicaPlan.contacts()) {
Tracing.trace("Enqueuing request to {}", replica);
ReadCommand command = replica.isFull() ? rangeCommand : rangeCommand.copyAsTransientQuery(replica);
Message<ReadCommand> message = command.createMessage(trackRepairedStatus && replica.isFull());
MessagingService.instance().sendWithCallback(message, replica.endpoint(), handler);
}
}
return new SingleRangeResponse(resolver, handler, readRepair);
}
use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.
the class MoveTest method assertMaps.
private void assertMaps(Map<Range<Token>, EndpointsForRange> expected, PendingRangeMaps actual) {
int sizeOfActual = 0;
Iterator<Map.Entry<Range<Token>, EndpointsForRange.Builder>> iterator = actual.iterator();
while (iterator.hasNext()) {
Map.Entry<Range<Token>, EndpointsForRange.Builder> actualEntry = iterator.next();
assertNotNull(expected.get(actualEntry.getKey()));
assertEquals(ImmutableSet.copyOf(expected.get(actualEntry.getKey())), ImmutableSet.copyOf(actualEntry.getValue()));
sizeOfActual++;
}
assertEquals(expected.size(), sizeOfActual);
}
use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.
the class DataResolverTest method testResolveComplexDelete.
@Test
public void testResolveComplexDelete() {
EndpointsForRange replicas = makeReplicas(2);
ReadCommand cmd = Util.cmd(cfs2, dk).withNowInSeconds(nowInSec).build();
TestableReadRepair readRepair = new TestableReadRepair(cmd);
DataResolver resolver = new DataResolver(cmd, plan(replicas, ALL), readRepair, nanoTime());
long[] ts = { 100, 200 };
Row.Builder builder = BTreeRow.unsortedBuilder();
builder.newRow(Clustering.EMPTY);
builder.addComplexDeletion(m, new DeletionTime(ts[0] - 1, nowInSec));
builder.addCell(mapCell(0, 0, ts[0]));
InetAddressAndPort peer1 = replicas.get(0).endpoint();
resolver.preprocess(response(cmd, peer1, iter(PartitionUpdate.singleRowUpdate(cfm2, dk, builder.build()))));
builder.newRow(Clustering.EMPTY);
DeletionTime expectedCmplxDelete = new DeletionTime(ts[1] - 1, nowInSec);
builder.addComplexDeletion(m, expectedCmplxDelete);
Cell<?> expectedCell = mapCell(1, 1, ts[1]);
builder.addCell(expectedCell);
InetAddressAndPort peer2 = replicas.get(1).endpoint();
resolver.preprocess(response(cmd, peer2, iter(PartitionUpdate.singleRowUpdate(cfm2, dk, builder.build()))));
try (PartitionIterator data = resolver.resolve()) {
try (RowIterator rows = Iterators.getOnlyElement(data)) {
Row row = Iterators.getOnlyElement(rows);
assertColumns(row, "m");
Assert.assertNull(row.getCell(m, CellPath.create(bb(0))));
Assert.assertNotNull(row.getCell(m, CellPath.create(bb(1))));
}
}
Mutation mutation = readRepair.getForEndpoint(peer1);
Iterator<Row> rowIter = mutation.getPartitionUpdate(cfm2).iterator();
assertTrue(rowIter.hasNext());
Row row = rowIter.next();
assertFalse(rowIter.hasNext());
ComplexColumnData cd = row.getComplexColumnData(m);
assertEquals(Collections.singleton(expectedCell), Sets.newHashSet(cd));
assertEquals(expectedCmplxDelete, cd.complexDeletion());
Assert.assertNull(readRepair.sent.get(peer2));
}
use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.
the class DataResolverTest method testResolveWithBothEmpty.
@Test
public void testResolveWithBothEmpty() {
EndpointsForRange replicas = makeReplicas(2);
TestableReadRepair readRepair = new TestableReadRepair(command);
DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, nanoTime());
resolver.preprocess(response(command, replicas.get(0).endpoint(), EmptyIterators.unfilteredPartition(cfm)));
resolver.preprocess(response(command, replicas.get(1).endpoint(), EmptyIterators.unfilteredPartition(cfm)));
try (PartitionIterator data = resolver.resolve()) {
assertFalse(data.hasNext());
}
assertTrue(readRepair.sent.isEmpty());
}
Aggregations