Search in sources :

Example 21 with EndpointsForRange

use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.

the class RangeRelocator method calculateRangesToStreamWithEndpoints.

/**
 * calculating endpoints to stream current ranges to if needed
 * in some situations node will handle current ranges as part of the new ranges
 */
public static RangesByEndpoint calculateRangesToStreamWithEndpoints(RangesAtEndpoint streamRanges, AbstractReplicationStrategy strat, TokenMetadata tmdBefore, TokenMetadata tmdAfter) {
    RangesByEndpoint.Builder endpointRanges = new RangesByEndpoint.Builder();
    for (Replica toStream : streamRanges) {
        // If the range we are sending is full only send it to the new full replica
        // There will also be a new transient replica we need to send the data to, but not
        // the repaired data
        EndpointsForRange oldEndpoints = strat.calculateNaturalReplicas(toStream.range().right, tmdBefore);
        EndpointsForRange newEndpoints = strat.calculateNaturalReplicas(toStream.range().right, tmdAfter);
        logger.debug("Need to stream {}, current endpoints {}, new endpoints {}", toStream, oldEndpoints, newEndpoints);
        for (Replica newEndpoint : newEndpoints) {
            Replica oldEndpoint = oldEndpoints.byEndpoint().get(newEndpoint.endpoint());
            // Nothing to do
            if (newEndpoint.equals(oldEndpoint))
                continue;
            // Completely new range for this endpoint
            if (oldEndpoint == null) {
                if (toStream.isTransient() && newEndpoint.isFull())
                    throw new AssertionError(String.format("Need to stream %s, but only have %s which is transient and not full", newEndpoint, toStream));
                for (Range<Token> intersection : newEndpoint.range().intersectionWith(toStream.range())) {
                    endpointRanges.put(newEndpoint.endpoint(), newEndpoint.decorateSubrange(intersection));
                }
            } else {
                Set<Range<Token>> subsToStream = Collections.singleton(toStream.range());
                // First subtract what we already have
                if (oldEndpoint.isFull() == newEndpoint.isFull() || oldEndpoint.isFull())
                    subsToStream = toStream.range().subtract(oldEndpoint.range());
                // Now we only stream what is still replicated
                subsToStream.stream().flatMap(range -> range.intersectionWith(newEndpoint.range()).stream()).forEach(tokenRange -> endpointRanges.put(newEndpoint.endpoint(), newEndpoint.decorateSubrange(tokenRange)));
            }
        }
    }
    return endpointRanges.build();
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) Range(org.apache.cassandra.dht.Range) Multimap(com.google.common.collect.Multimap) Future(java.util.concurrent.Future) Token(org.apache.cassandra.dht.Token) TokenMetadata(org.apache.cassandra.locator.TokenMetadata) Pair(org.apache.cassandra.utils.Pair) RangeStreamer(org.apache.cassandra.dht.RangeStreamer) DatabaseDescriptor(org.apache.cassandra.config.DatabaseDescriptor) StreamOperation(org.apache.cassandra.streaming.StreamOperation) Keyspace(org.apache.cassandra.db.Keyspace) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) Logger(org.slf4j.Logger) FBUtilities(org.apache.cassandra.utils.FBUtilities) Collection(java.util.Collection) Set(java.util.Set) RangesByEndpoint(org.apache.cassandra.locator.RangesByEndpoint) RangesAtEndpoint(org.apache.cassandra.locator.RangesAtEndpoint) Replica(org.apache.cassandra.locator.Replica) StreamState(org.apache.cassandra.streaming.StreamState) List(java.util.List) AbstractReplicationStrategy(org.apache.cassandra.locator.AbstractReplicationStrategy) StreamPlan(org.apache.cassandra.streaming.StreamPlan) EndpointsByReplica(org.apache.cassandra.locator.EndpointsByReplica) VisibleForTesting(com.google.common.annotations.VisibleForTesting) FailureDetector(org.apache.cassandra.gms.FailureDetector) Collections(java.util.Collections) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) Token(org.apache.cassandra.dht.Token) Range(org.apache.cassandra.dht.Range) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) Replica(org.apache.cassandra.locator.Replica) EndpointsByReplica(org.apache.cassandra.locator.EndpointsByReplica) RangesByEndpoint(org.apache.cassandra.locator.RangesByEndpoint)

Example 22 with EndpointsForRange

use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.

the class RangeCommandIterator method query.

/**
 * Queries the provided sub-range.
 *
 * @param replicaPlan the subRange to query.
 * @param isFirst in the case where multiple queries are sent in parallel, whether that's the first query on
 * that batch or not. The reason it matters is that whe paging queries, the command (more specifically the
 * {@code DataLimits}) may have "state" information and that state may only be valid for the first query (in
 * that it's the query that "continues" whatever we're previously queried).
 */
private SingleRangeResponse query(ReplicaPlan.ForRangeRead replicaPlan, boolean isFirst) {
    PartitionRangeReadCommand rangeCommand = command.forSubRange(replicaPlan.range(), isFirst);
    // If enabled, request repaired data tracking info from full replicas, but
    // only if there are multiple full replicas to compare results from.
    boolean trackRepairedStatus = DatabaseDescriptor.getRepairedDataTrackingForRangeReadsEnabled() && replicaPlan.contacts().filter(Replica::isFull).size() > 1;
    ReplicaPlan.SharedForRangeRead sharedReplicaPlan = ReplicaPlan.shared(replicaPlan);
    ReadRepair<EndpointsForRange, ReplicaPlan.ForRangeRead> readRepair = ReadRepair.create(command, sharedReplicaPlan, queryStartNanoTime);
    DataResolver<EndpointsForRange, ReplicaPlan.ForRangeRead> resolver = new DataResolver<>(rangeCommand, sharedReplicaPlan, readRepair, queryStartNanoTime, trackRepairedStatus);
    ReadCallback<EndpointsForRange, ReplicaPlan.ForRangeRead> handler = new ReadCallback<>(resolver, rangeCommand, sharedReplicaPlan, queryStartNanoTime);
    if (replicaPlan.contacts().size() == 1 && replicaPlan.contacts().get(0).isSelf()) {
        Stage.READ.execute(new StorageProxy.LocalReadRunnable(rangeCommand, handler, trackRepairedStatus));
    } else {
        for (Replica replica : replicaPlan.contacts()) {
            Tracing.trace("Enqueuing request to {}", replica);
            ReadCommand command = replica.isFull() ? rangeCommand : rangeCommand.copyAsTransientQuery(replica);
            Message<ReadCommand> message = command.createMessage(trackRepairedStatus && replica.isFull());
            MessagingService.instance().sendWithCallback(message, replica.endpoint(), handler);
        }
    }
    return new SingleRangeResponse(resolver, handler, readRepair);
}
Also used : ReplicaPlan(org.apache.cassandra.locator.ReplicaPlan) PartitionRangeReadCommand(org.apache.cassandra.db.PartitionRangeReadCommand) StorageProxy(org.apache.cassandra.service.StorageProxy) ReadCommand(org.apache.cassandra.db.ReadCommand) PartitionRangeReadCommand(org.apache.cassandra.db.PartitionRangeReadCommand) Replica(org.apache.cassandra.locator.Replica) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) ReadCallback(org.apache.cassandra.service.reads.ReadCallback) DataResolver(org.apache.cassandra.service.reads.DataResolver)

Example 23 with EndpointsForRange

use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.

the class MoveTest method assertMaps.

private void assertMaps(Map<Range<Token>, EndpointsForRange> expected, PendingRangeMaps actual) {
    int sizeOfActual = 0;
    Iterator<Map.Entry<Range<Token>, EndpointsForRange.Builder>> iterator = actual.iterator();
    while (iterator.hasNext()) {
        Map.Entry<Range<Token>, EndpointsForRange.Builder> actualEntry = iterator.next();
        assertNotNull(expected.get(actualEntry.getKey()));
        assertEquals(ImmutableSet.copyOf(expected.get(actualEntry.getKey())), ImmutableSet.copyOf(actualEntry.getValue()));
        sizeOfActual++;
    }
    assertEquals(expected.size(), sizeOfActual);
}
Also used : EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) BigIntegerToken(org.apache.cassandra.dht.RandomPartitioner.BigIntegerToken) Token(org.apache.cassandra.dht.Token) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) Range(org.apache.cassandra.dht.Range) RangesAtEndpoint(org.apache.cassandra.locator.RangesAtEndpoint) RangesByEndpoint(org.apache.cassandra.locator.RangesByEndpoint)

Example 24 with EndpointsForRange

use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.

the class DataResolverTest method testResolveComplexDelete.

@Test
public void testResolveComplexDelete() {
    EndpointsForRange replicas = makeReplicas(2);
    ReadCommand cmd = Util.cmd(cfs2, dk).withNowInSeconds(nowInSec).build();
    TestableReadRepair readRepair = new TestableReadRepair(cmd);
    DataResolver resolver = new DataResolver(cmd, plan(replicas, ALL), readRepair, nanoTime());
    long[] ts = { 100, 200 };
    Row.Builder builder = BTreeRow.unsortedBuilder();
    builder.newRow(Clustering.EMPTY);
    builder.addComplexDeletion(m, new DeletionTime(ts[0] - 1, nowInSec));
    builder.addCell(mapCell(0, 0, ts[0]));
    InetAddressAndPort peer1 = replicas.get(0).endpoint();
    resolver.preprocess(response(cmd, peer1, iter(PartitionUpdate.singleRowUpdate(cfm2, dk, builder.build()))));
    builder.newRow(Clustering.EMPTY);
    DeletionTime expectedCmplxDelete = new DeletionTime(ts[1] - 1, nowInSec);
    builder.addComplexDeletion(m, expectedCmplxDelete);
    Cell<?> expectedCell = mapCell(1, 1, ts[1]);
    builder.addCell(expectedCell);
    InetAddressAndPort peer2 = replicas.get(1).endpoint();
    resolver.preprocess(response(cmd, peer2, iter(PartitionUpdate.singleRowUpdate(cfm2, dk, builder.build()))));
    try (PartitionIterator data = resolver.resolve()) {
        try (RowIterator rows = Iterators.getOnlyElement(data)) {
            Row row = Iterators.getOnlyElement(rows);
            assertColumns(row, "m");
            Assert.assertNull(row.getCell(m, CellPath.create(bb(0))));
            Assert.assertNotNull(row.getCell(m, CellPath.create(bb(1))));
        }
    }
    Mutation mutation = readRepair.getForEndpoint(peer1);
    Iterator<Row> rowIter = mutation.getPartitionUpdate(cfm2).iterator();
    assertTrue(rowIter.hasNext());
    Row row = rowIter.next();
    assertFalse(rowIter.hasNext());
    ComplexColumnData cd = row.getComplexColumnData(m);
    assertEquals(Collections.singleton(expectedCell), Sets.newHashSet(cd));
    assertEquals(expectedCmplxDelete, cd.complexDeletion());
    Assert.assertNull(readRepair.sent.get(peer2));
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) TestableReadRepair(org.apache.cassandra.service.reads.repair.TestableReadRepair) DeletionTime(org.apache.cassandra.db.DeletionTime) ReadCommand(org.apache.cassandra.db.ReadCommand) ComplexColumnData(org.apache.cassandra.db.rows.ComplexColumnData) UnfilteredPartitionIterator(org.apache.cassandra.db.partitions.UnfilteredPartitionIterator) PartitionIterator(org.apache.cassandra.db.partitions.PartitionIterator) RowIterator(org.apache.cassandra.db.rows.RowIterator) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) BTreeRow(org.apache.cassandra.db.rows.BTreeRow) Row(org.apache.cassandra.db.rows.Row) Mutation(org.apache.cassandra.db.Mutation) Test(org.junit.Test)

Example 25 with EndpointsForRange

use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.

the class DataResolverTest method testResolveWithBothEmpty.

@Test
public void testResolveWithBothEmpty() {
    EndpointsForRange replicas = makeReplicas(2);
    TestableReadRepair readRepair = new TestableReadRepair(command);
    DataResolver resolver = new DataResolver(command, plan(replicas, ALL), readRepair, nanoTime());
    resolver.preprocess(response(command, replicas.get(0).endpoint(), EmptyIterators.unfilteredPartition(cfm)));
    resolver.preprocess(response(command, replicas.get(1).endpoint(), EmptyIterators.unfilteredPartition(cfm)));
    try (PartitionIterator data = resolver.resolve()) {
        assertFalse(data.hasNext());
    }
    assertTrue(readRepair.sent.isEmpty());
}
Also used : TestableReadRepair(org.apache.cassandra.service.reads.repair.TestableReadRepair) UnfilteredPartitionIterator(org.apache.cassandra.db.partitions.UnfilteredPartitionIterator) PartitionIterator(org.apache.cassandra.db.partitions.PartitionIterator) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) Test(org.junit.Test)

Aggregations

EndpointsForRange (org.apache.cassandra.locator.EndpointsForRange)44 InetAddressAndPort (org.apache.cassandra.locator.InetAddressAndPort)36 Test (org.junit.Test)32 Mutation (org.apache.cassandra.db.Mutation)18 PartitionIterator (org.apache.cassandra.db.partitions.PartitionIterator)16 UnfilteredPartitionIterator (org.apache.cassandra.db.partitions.UnfilteredPartitionIterator)16 ByteBuffer (java.nio.ByteBuffer)14 RowUpdateBuilder (org.apache.cassandra.db.RowUpdateBuilder)13 BTreeRow (org.apache.cassandra.db.rows.BTreeRow)10 Row (org.apache.cassandra.db.rows.Row)10 Replica (org.apache.cassandra.locator.Replica)10 RowIterator (org.apache.cassandra.db.rows.RowIterator)9 DeletionTime (org.apache.cassandra.db.DeletionTime)8 HashMap (java.util.HashMap)6 Token (org.apache.cassandra.dht.Token)6 TestableReadRepair (org.apache.cassandra.service.reads.repair.TestableReadRepair)6 RangeTombstone (org.apache.cassandra.db.RangeTombstone)5 ReadCommand (org.apache.cassandra.db.ReadCommand)4 EndpointsByReplica (org.apache.cassandra.locator.EndpointsByReplica)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)3