Search in sources :

Example 1 with ReadCommand

use of org.apache.cassandra.db.ReadCommand in project cassandra by apache.

the class AbstractReadExecutor method makeRequests.

private void makeRequests(ReadCommand readCommand, Iterable<InetAddress> endpoints) {
    boolean hasLocalEndpoint = false;
    for (InetAddress endpoint : endpoints) {
        if (StorageProxy.canDoLocalRequest(endpoint)) {
            hasLocalEndpoint = true;
            continue;
        }
        if (traceState != null)
            traceState.trace("reading {} from {}", readCommand.isDigestQuery() ? "digest" : "data", endpoint);
        logger.trace("reading {} from {}", readCommand.isDigestQuery() ? "digest" : "data", endpoint);
        MessageOut<ReadCommand> message = readCommand.createMessage();
        MessagingService.instance().sendRRWithFailure(message, endpoint, handler);
    }
    // We delay the local (potentially blocking) read till the end to avoid stalling remote requests.
    if (hasLocalEndpoint) {
        logger.trace("reading {} locally", readCommand.isDigestQuery() ? "digest" : "data");
        StageManager.getStage(Stage.READ).maybeExecuteImmediately(new LocalReadRunnable(command, handler));
    }
}
Also used : ReadCommand(org.apache.cassandra.db.ReadCommand) SinglePartitionReadCommand(org.apache.cassandra.db.SinglePartitionReadCommand) LocalReadRunnable(org.apache.cassandra.service.StorageProxy.LocalReadRunnable) InetAddress(java.net.InetAddress)

Example 2 with ReadCommand

use of org.apache.cassandra.db.ReadCommand in project cassandra by apache.

the class DataResolverTest method testResolveDeletedCollection.

@Test
public void testResolveDeletedCollection() {
    EndpointsForRange replicas = makeReplicas(2);
    ReadCommand cmd = Util.cmd(cfs2, dk).withNowInSeconds(nowInSec).build();
    TestableReadRepair readRepair = new TestableReadRepair(cmd);
    DataResolver resolver = new DataResolver(cmd, plan(replicas, ALL), readRepair, nanoTime());
    long[] ts = { 100, 200 };
    Row.Builder builder = BTreeRow.unsortedBuilder();
    builder.newRow(Clustering.EMPTY);
    builder.addComplexDeletion(m, new DeletionTime(ts[0] - 1, nowInSec));
    builder.addCell(mapCell(0, 0, ts[0]));
    InetAddressAndPort peer1 = replicas.get(0).endpoint();
    resolver.preprocess(response(cmd, peer1, iter(PartitionUpdate.singleRowUpdate(cfm2, dk, builder.build()))));
    builder.newRow(Clustering.EMPTY);
    DeletionTime expectedCmplxDelete = new DeletionTime(ts[1] - 1, nowInSec);
    builder.addComplexDeletion(m, expectedCmplxDelete);
    InetAddressAndPort peer2 = replicas.get(1).endpoint();
    resolver.preprocess(response(cmd, peer2, iter(PartitionUpdate.singleRowUpdate(cfm2, dk, builder.build()))));
    try (PartitionIterator data = resolver.resolve()) {
        assertFalse(data.hasNext());
    }
    Mutation mutation = readRepair.getForEndpoint(peer1);
    Iterator<Row> rowIter = mutation.getPartitionUpdate(cfm2).iterator();
    assertTrue(rowIter.hasNext());
    Row row = rowIter.next();
    assertFalse(rowIter.hasNext());
    ComplexColumnData cd = row.getComplexColumnData(m);
    assertEquals(Collections.emptySet(), Sets.newHashSet(cd));
    assertEquals(expectedCmplxDelete, cd.complexDeletion());
    Assert.assertNull(readRepair.sent.get(peer2));
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) TestableReadRepair(org.apache.cassandra.service.reads.repair.TestableReadRepair) DeletionTime(org.apache.cassandra.db.DeletionTime) ReadCommand(org.apache.cassandra.db.ReadCommand) ComplexColumnData(org.apache.cassandra.db.rows.ComplexColumnData) UnfilteredPartitionIterator(org.apache.cassandra.db.partitions.UnfilteredPartitionIterator) PartitionIterator(org.apache.cassandra.db.partitions.PartitionIterator) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) BTreeRow(org.apache.cassandra.db.rows.BTreeRow) Row(org.apache.cassandra.db.rows.Row) Mutation(org.apache.cassandra.db.Mutation) Test(org.junit.Test)

Example 3 with ReadCommand

use of org.apache.cassandra.db.ReadCommand in project cassandra by apache.

the class ThrottledUnfilteredIteratorTest method testThrottledIteratorWithRangeDeletions.

@Test
public void testThrottledIteratorWithRangeDeletions() throws Exception {
    Keyspace keyspace = Keyspace.open(KSNAME);
    ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(CFNAME);
    // Inserting data
    String key = "k1";
    UpdateBuilder builder;
    builder = UpdateBuilder.create(cfs.metadata(), key).withTimestamp(0);
    for (int i = 0; i < 40; i += 2) builder.newRow(i).add("val", i);
    builder.applyUnsafe();
    new RowUpdateBuilder(cfs.metadata(), 1, key).addRangeTombstone(10, 22).build().applyUnsafe();
    cfs.forceBlockingFlush();
    builder = UpdateBuilder.create(cfs.metadata(), key).withTimestamp(2);
    for (int i = 1; i < 40; i += 2) builder.newRow(i).add("val", i);
    builder.applyUnsafe();
    new RowUpdateBuilder(cfs.metadata(), 3, key).addRangeTombstone(19, 27).build().applyUnsafe();
    // We don't flush to test with both a range tomsbtone in memtable and in sstable
    // Queries by name
    int[] live = new int[] { 4, 9, 11, 17, 28 };
    int[] dead = new int[] { 12, 19, 21, 24, 27 };
    AbstractReadCommandBuilder.PartitionRangeBuilder cmdBuilder = Util.cmd(cfs);
    ReadCommand cmd = cmdBuilder.build();
    for (int batchSize = 2; batchSize <= 40; batchSize++) {
        List<UnfilteredRowIterator> unfilteredRowIterators = new LinkedList<>();
        try (ReadExecutionController executionController = cmd.executionController();
            UnfilteredPartitionIterator iterator = cmd.executeLocally(executionController)) {
            assertTrue(iterator.hasNext());
            Iterator<UnfilteredRowIterator> throttled = ThrottledUnfilteredIterator.throttle(iterator, batchSize);
            while (throttled.hasNext()) {
                UnfilteredRowIterator next = throttled.next();
                ImmutableBTreePartition materializedPartition = ImmutableBTreePartition.create(next);
                int unfilteredCount = Iterators.size(materializedPartition.unfilteredIterator());
                System.out.println("batchsize " + batchSize + " unfilteredCount " + unfilteredCount + " materializedPartition " + materializedPartition);
                if (throttled.hasNext()) {
                    if (unfilteredCount != batchSize) {
                        // when there is extra unfiltered, it must be close bound marker
                        assertEquals(batchSize + 1, unfilteredCount);
                        Unfiltered last = Iterators.getLast(materializedPartition.unfilteredIterator());
                        assertTrue(last.isRangeTombstoneMarker());
                        RangeTombstoneMarker marker = (RangeTombstoneMarker) last;
                        assertFalse(marker.isBoundary());
                        assertTrue(marker.isClose(false));
                    }
                } else {
                    // only last batch can be smaller than batchSize
                    assertTrue(unfilteredCount <= batchSize + 1);
                }
                unfilteredRowIterators.add(materializedPartition.unfilteredIterator());
            }
            assertFalse(iterator.hasNext());
        }
        // Verify throttled data after merge
        Partition partition = ImmutableBTreePartition.create(UnfilteredRowIterators.merge(unfilteredRowIterators));
        int nowInSec = FBUtilities.nowInSeconds();
        for (int i : live) assertTrue("Row " + i + " should be live", partition.getRow(Clustering.make(ByteBufferUtil.bytes((i)))).hasLiveData(nowInSec, cfs.metadata().enforceStrictLiveness()));
        for (int i : dead) assertFalse("Row " + i + " shouldn't be live", partition.getRow(Clustering.make(ByteBufferUtil.bytes((i)))).hasLiveData(nowInSec, cfs.metadata().enforceStrictLiveness()));
    }
}
Also used : Partition(org.apache.cassandra.db.partitions.Partition) ImmutableBTreePartition(org.apache.cassandra.db.partitions.ImmutableBTreePartition) AbstractReadCommandBuilder(org.apache.cassandra.db.AbstractReadCommandBuilder) ReadCommand(org.apache.cassandra.db.ReadCommand) UpdateBuilder(org.apache.cassandra.UpdateBuilder) RowUpdateBuilder(org.apache.cassandra.db.RowUpdateBuilder) UnfilteredPartitionIterator(org.apache.cassandra.db.partitions.UnfilteredPartitionIterator) AbstractUnfilteredPartitionIterator(org.apache.cassandra.db.partitions.AbstractUnfilteredPartitionIterator) LinkedList(java.util.LinkedList) ReadExecutionController(org.apache.cassandra.db.ReadExecutionController) RowUpdateBuilder(org.apache.cassandra.db.RowUpdateBuilder) Keyspace(org.apache.cassandra.db.Keyspace) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) ImmutableBTreePartition(org.apache.cassandra.db.partitions.ImmutableBTreePartition) Test(org.junit.Test)

Example 4 with ReadCommand

use of org.apache.cassandra.db.ReadCommand in project cassandra by apache.

the class StorageProxy method readRegular.

@SuppressWarnings("resource")
private static PartitionIterator readRegular(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, long queryStartNanoTime) throws UnavailableException, ReadFailureException, ReadTimeoutException {
    long start = nanoTime();
    try {
        PartitionIterator result = fetchRows(group.queries, consistencyLevel, queryStartNanoTime);
        // Note that the only difference between the command in a group must be the partition key on which
        // they applied.
        boolean enforceStrictLiveness = group.queries.get(0).metadata().enforceStrictLiveness();
        // might not honor it and so we should enforce it
        if (group.queries.size() > 1)
            result = group.limits().filter(result, group.nowInSec(), group.selectsFullPartition(), enforceStrictLiveness);
        return result;
    } catch (UnavailableException e) {
        readMetrics.unavailables.mark();
        readMetricsForLevel(consistencyLevel).unavailables.mark();
        logRequestException(e, group.queries);
        throw e;
    } catch (ReadTimeoutException e) {
        readMetrics.timeouts.mark();
        readMetricsForLevel(consistencyLevel).timeouts.mark();
        logRequestException(e, group.queries);
        throw e;
    } catch (ReadAbortException e) {
        recordReadRegularAbort(consistencyLevel, e);
        throw e;
    } catch (ReadFailureException e) {
        readMetrics.failures.mark();
        readMetricsForLevel(consistencyLevel).failures.mark();
        throw e;
    } finally {
        long latency = nanoTime() - start;
        readMetrics.addNano(latency);
        readMetricsForLevel(consistencyLevel).addNano(latency);
        // TODO avoid giving every command the same latency number.  Can fix this in CASSADRA-5329
        for (ReadCommand command : group.queries) Keyspace.openAndGetStore(command.metadata()).metric.coordinatorReadLatency.update(latency, TimeUnit.NANOSECONDS);
    }
}
Also used : ReadFailureException(org.apache.cassandra.exceptions.ReadFailureException) ReadTimeoutException(org.apache.cassandra.exceptions.ReadTimeoutException) UnfilteredPartitionIterator(org.apache.cassandra.db.partitions.UnfilteredPartitionIterator) PartitionIterator(org.apache.cassandra.db.partitions.PartitionIterator) UnavailableException(org.apache.cassandra.exceptions.UnavailableException) SinglePartitionReadCommand(org.apache.cassandra.db.SinglePartitionReadCommand) PartitionRangeReadCommand(org.apache.cassandra.db.PartitionRangeReadCommand) ReadCommand(org.apache.cassandra.db.ReadCommand) ReadAbortException(org.apache.cassandra.exceptions.ReadAbortException)

Example 5 with ReadCommand

use of org.apache.cassandra.db.ReadCommand in project cassandra by apache.

the class RangeCommandIterator method query.

/**
 * Queries the provided sub-range.
 *
 * @param replicaPlan the subRange to query.
 * @param isFirst in the case where multiple queries are sent in parallel, whether that's the first query on
 * that batch or not. The reason it matters is that whe paging queries, the command (more specifically the
 * {@code DataLimits}) may have "state" information and that state may only be valid for the first query (in
 * that it's the query that "continues" whatever we're previously queried).
 */
private SingleRangeResponse query(ReplicaPlan.ForRangeRead replicaPlan, boolean isFirst) {
    PartitionRangeReadCommand rangeCommand = command.forSubRange(replicaPlan.range(), isFirst);
    // If enabled, request repaired data tracking info from full replicas, but
    // only if there are multiple full replicas to compare results from.
    boolean trackRepairedStatus = DatabaseDescriptor.getRepairedDataTrackingForRangeReadsEnabled() && replicaPlan.contacts().filter(Replica::isFull).size() > 1;
    ReplicaPlan.SharedForRangeRead sharedReplicaPlan = ReplicaPlan.shared(replicaPlan);
    ReadRepair<EndpointsForRange, ReplicaPlan.ForRangeRead> readRepair = ReadRepair.create(command, sharedReplicaPlan, queryStartNanoTime);
    DataResolver<EndpointsForRange, ReplicaPlan.ForRangeRead> resolver = new DataResolver<>(rangeCommand, sharedReplicaPlan, readRepair, queryStartNanoTime, trackRepairedStatus);
    ReadCallback<EndpointsForRange, ReplicaPlan.ForRangeRead> handler = new ReadCallback<>(resolver, rangeCommand, sharedReplicaPlan, queryStartNanoTime);
    if (replicaPlan.contacts().size() == 1 && replicaPlan.contacts().get(0).isSelf()) {
        Stage.READ.execute(new StorageProxy.LocalReadRunnable(rangeCommand, handler, trackRepairedStatus));
    } else {
        for (Replica replica : replicaPlan.contacts()) {
            Tracing.trace("Enqueuing request to {}", replica);
            ReadCommand command = replica.isFull() ? rangeCommand : rangeCommand.copyAsTransientQuery(replica);
            Message<ReadCommand> message = command.createMessage(trackRepairedStatus && replica.isFull());
            MessagingService.instance().sendWithCallback(message, replica.endpoint(), handler);
        }
    }
    return new SingleRangeResponse(resolver, handler, readRepair);
}
Also used : ReplicaPlan(org.apache.cassandra.locator.ReplicaPlan) PartitionRangeReadCommand(org.apache.cassandra.db.PartitionRangeReadCommand) StorageProxy(org.apache.cassandra.service.StorageProxy) ReadCommand(org.apache.cassandra.db.ReadCommand) PartitionRangeReadCommand(org.apache.cassandra.db.PartitionRangeReadCommand) Replica(org.apache.cassandra.locator.Replica) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) ReadCallback(org.apache.cassandra.service.reads.ReadCallback) DataResolver(org.apache.cassandra.service.reads.DataResolver)

Aggregations

ReadCommand (org.apache.cassandra.db.ReadCommand)13 UnfilteredPartitionIterator (org.apache.cassandra.db.partitions.UnfilteredPartitionIterator)6 PartitionIterator (org.apache.cassandra.db.partitions.PartitionIterator)5 EndpointsForRange (org.apache.cassandra.locator.EndpointsForRange)5 InetAddressAndPort (org.apache.cassandra.locator.InetAddressAndPort)5 Test (org.junit.Test)5 DeletionTime (org.apache.cassandra.db.DeletionTime)4 SinglePartitionReadCommand (org.apache.cassandra.db.SinglePartitionReadCommand)4 BTreeRow (org.apache.cassandra.db.rows.BTreeRow)4 ComplexColumnData (org.apache.cassandra.db.rows.ComplexColumnData)4 Row (org.apache.cassandra.db.rows.Row)4 TestableReadRepair (org.apache.cassandra.service.reads.repair.TestableReadRepair)4 Mutation (org.apache.cassandra.db.Mutation)3 RowIterator (org.apache.cassandra.db.rows.RowIterator)3 ColumnFamilyStore (org.apache.cassandra.db.ColumnFamilyStore)2 PartitionRangeReadCommand (org.apache.cassandra.db.PartitionRangeReadCommand)2 Replica (org.apache.cassandra.locator.Replica)2 StorageProxy (org.apache.cassandra.service.StorageProxy)2 LocalReadRunnable (org.apache.cassandra.service.StorageProxy.LocalReadRunnable)2 InetAddress (java.net.InetAddress)1