use of org.apache.cassandra.db.ReadCommand in project cassandra by apache.
the class AbstractReadExecutor method makeRequests.
private void makeRequests(ReadCommand readCommand, Iterable<InetAddress> endpoints) {
boolean hasLocalEndpoint = false;
for (InetAddress endpoint : endpoints) {
if (StorageProxy.canDoLocalRequest(endpoint)) {
hasLocalEndpoint = true;
continue;
}
if (traceState != null)
traceState.trace("reading {} from {}", readCommand.isDigestQuery() ? "digest" : "data", endpoint);
logger.trace("reading {} from {}", readCommand.isDigestQuery() ? "digest" : "data", endpoint);
MessageOut<ReadCommand> message = readCommand.createMessage();
MessagingService.instance().sendRRWithFailure(message, endpoint, handler);
}
// We delay the local (potentially blocking) read till the end to avoid stalling remote requests.
if (hasLocalEndpoint) {
logger.trace("reading {} locally", readCommand.isDigestQuery() ? "digest" : "data");
StageManager.getStage(Stage.READ).maybeExecuteImmediately(new LocalReadRunnable(command, handler));
}
}
use of org.apache.cassandra.db.ReadCommand in project cassandra by apache.
the class DataResolverTest method testResolveDeletedCollection.
@Test
public void testResolveDeletedCollection() {
EndpointsForRange replicas = makeReplicas(2);
ReadCommand cmd = Util.cmd(cfs2, dk).withNowInSeconds(nowInSec).build();
TestableReadRepair readRepair = new TestableReadRepair(cmd);
DataResolver resolver = new DataResolver(cmd, plan(replicas, ALL), readRepair, nanoTime());
long[] ts = { 100, 200 };
Row.Builder builder = BTreeRow.unsortedBuilder();
builder.newRow(Clustering.EMPTY);
builder.addComplexDeletion(m, new DeletionTime(ts[0] - 1, nowInSec));
builder.addCell(mapCell(0, 0, ts[0]));
InetAddressAndPort peer1 = replicas.get(0).endpoint();
resolver.preprocess(response(cmd, peer1, iter(PartitionUpdate.singleRowUpdate(cfm2, dk, builder.build()))));
builder.newRow(Clustering.EMPTY);
DeletionTime expectedCmplxDelete = new DeletionTime(ts[1] - 1, nowInSec);
builder.addComplexDeletion(m, expectedCmplxDelete);
InetAddressAndPort peer2 = replicas.get(1).endpoint();
resolver.preprocess(response(cmd, peer2, iter(PartitionUpdate.singleRowUpdate(cfm2, dk, builder.build()))));
try (PartitionIterator data = resolver.resolve()) {
assertFalse(data.hasNext());
}
Mutation mutation = readRepair.getForEndpoint(peer1);
Iterator<Row> rowIter = mutation.getPartitionUpdate(cfm2).iterator();
assertTrue(rowIter.hasNext());
Row row = rowIter.next();
assertFalse(rowIter.hasNext());
ComplexColumnData cd = row.getComplexColumnData(m);
assertEquals(Collections.emptySet(), Sets.newHashSet(cd));
assertEquals(expectedCmplxDelete, cd.complexDeletion());
Assert.assertNull(readRepair.sent.get(peer2));
}
use of org.apache.cassandra.db.ReadCommand in project cassandra by apache.
the class ThrottledUnfilteredIteratorTest method testThrottledIteratorWithRangeDeletions.
@Test
public void testThrottledIteratorWithRangeDeletions() throws Exception {
Keyspace keyspace = Keyspace.open(KSNAME);
ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(CFNAME);
// Inserting data
String key = "k1";
UpdateBuilder builder;
builder = UpdateBuilder.create(cfs.metadata(), key).withTimestamp(0);
for (int i = 0; i < 40; i += 2) builder.newRow(i).add("val", i);
builder.applyUnsafe();
new RowUpdateBuilder(cfs.metadata(), 1, key).addRangeTombstone(10, 22).build().applyUnsafe();
cfs.forceBlockingFlush();
builder = UpdateBuilder.create(cfs.metadata(), key).withTimestamp(2);
for (int i = 1; i < 40; i += 2) builder.newRow(i).add("val", i);
builder.applyUnsafe();
new RowUpdateBuilder(cfs.metadata(), 3, key).addRangeTombstone(19, 27).build().applyUnsafe();
// We don't flush to test with both a range tomsbtone in memtable and in sstable
// Queries by name
int[] live = new int[] { 4, 9, 11, 17, 28 };
int[] dead = new int[] { 12, 19, 21, 24, 27 };
AbstractReadCommandBuilder.PartitionRangeBuilder cmdBuilder = Util.cmd(cfs);
ReadCommand cmd = cmdBuilder.build();
for (int batchSize = 2; batchSize <= 40; batchSize++) {
List<UnfilteredRowIterator> unfilteredRowIterators = new LinkedList<>();
try (ReadExecutionController executionController = cmd.executionController();
UnfilteredPartitionIterator iterator = cmd.executeLocally(executionController)) {
assertTrue(iterator.hasNext());
Iterator<UnfilteredRowIterator> throttled = ThrottledUnfilteredIterator.throttle(iterator, batchSize);
while (throttled.hasNext()) {
UnfilteredRowIterator next = throttled.next();
ImmutableBTreePartition materializedPartition = ImmutableBTreePartition.create(next);
int unfilteredCount = Iterators.size(materializedPartition.unfilteredIterator());
System.out.println("batchsize " + batchSize + " unfilteredCount " + unfilteredCount + " materializedPartition " + materializedPartition);
if (throttled.hasNext()) {
if (unfilteredCount != batchSize) {
// when there is extra unfiltered, it must be close bound marker
assertEquals(batchSize + 1, unfilteredCount);
Unfiltered last = Iterators.getLast(materializedPartition.unfilteredIterator());
assertTrue(last.isRangeTombstoneMarker());
RangeTombstoneMarker marker = (RangeTombstoneMarker) last;
assertFalse(marker.isBoundary());
assertTrue(marker.isClose(false));
}
} else {
// only last batch can be smaller than batchSize
assertTrue(unfilteredCount <= batchSize + 1);
}
unfilteredRowIterators.add(materializedPartition.unfilteredIterator());
}
assertFalse(iterator.hasNext());
}
// Verify throttled data after merge
Partition partition = ImmutableBTreePartition.create(UnfilteredRowIterators.merge(unfilteredRowIterators));
int nowInSec = FBUtilities.nowInSeconds();
for (int i : live) assertTrue("Row " + i + " should be live", partition.getRow(Clustering.make(ByteBufferUtil.bytes((i)))).hasLiveData(nowInSec, cfs.metadata().enforceStrictLiveness()));
for (int i : dead) assertFalse("Row " + i + " shouldn't be live", partition.getRow(Clustering.make(ByteBufferUtil.bytes((i)))).hasLiveData(nowInSec, cfs.metadata().enforceStrictLiveness()));
}
}
use of org.apache.cassandra.db.ReadCommand in project cassandra by apache.
the class StorageProxy method readRegular.
@SuppressWarnings("resource")
private static PartitionIterator readRegular(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, long queryStartNanoTime) throws UnavailableException, ReadFailureException, ReadTimeoutException {
long start = nanoTime();
try {
PartitionIterator result = fetchRows(group.queries, consistencyLevel, queryStartNanoTime);
// Note that the only difference between the command in a group must be the partition key on which
// they applied.
boolean enforceStrictLiveness = group.queries.get(0).metadata().enforceStrictLiveness();
// might not honor it and so we should enforce it
if (group.queries.size() > 1)
result = group.limits().filter(result, group.nowInSec(), group.selectsFullPartition(), enforceStrictLiveness);
return result;
} catch (UnavailableException e) {
readMetrics.unavailables.mark();
readMetricsForLevel(consistencyLevel).unavailables.mark();
logRequestException(e, group.queries);
throw e;
} catch (ReadTimeoutException e) {
readMetrics.timeouts.mark();
readMetricsForLevel(consistencyLevel).timeouts.mark();
logRequestException(e, group.queries);
throw e;
} catch (ReadAbortException e) {
recordReadRegularAbort(consistencyLevel, e);
throw e;
} catch (ReadFailureException e) {
readMetrics.failures.mark();
readMetricsForLevel(consistencyLevel).failures.mark();
throw e;
} finally {
long latency = nanoTime() - start;
readMetrics.addNano(latency);
readMetricsForLevel(consistencyLevel).addNano(latency);
// TODO avoid giving every command the same latency number. Can fix this in CASSADRA-5329
for (ReadCommand command : group.queries) Keyspace.openAndGetStore(command.metadata()).metric.coordinatorReadLatency.update(latency, TimeUnit.NANOSECONDS);
}
}
use of org.apache.cassandra.db.ReadCommand in project cassandra by apache.
the class RangeCommandIterator method query.
/**
* Queries the provided sub-range.
*
* @param replicaPlan the subRange to query.
* @param isFirst in the case where multiple queries are sent in parallel, whether that's the first query on
* that batch or not. The reason it matters is that whe paging queries, the command (more specifically the
* {@code DataLimits}) may have "state" information and that state may only be valid for the first query (in
* that it's the query that "continues" whatever we're previously queried).
*/
private SingleRangeResponse query(ReplicaPlan.ForRangeRead replicaPlan, boolean isFirst) {
PartitionRangeReadCommand rangeCommand = command.forSubRange(replicaPlan.range(), isFirst);
// If enabled, request repaired data tracking info from full replicas, but
// only if there are multiple full replicas to compare results from.
boolean trackRepairedStatus = DatabaseDescriptor.getRepairedDataTrackingForRangeReadsEnabled() && replicaPlan.contacts().filter(Replica::isFull).size() > 1;
ReplicaPlan.SharedForRangeRead sharedReplicaPlan = ReplicaPlan.shared(replicaPlan);
ReadRepair<EndpointsForRange, ReplicaPlan.ForRangeRead> readRepair = ReadRepair.create(command, sharedReplicaPlan, queryStartNanoTime);
DataResolver<EndpointsForRange, ReplicaPlan.ForRangeRead> resolver = new DataResolver<>(rangeCommand, sharedReplicaPlan, readRepair, queryStartNanoTime, trackRepairedStatus);
ReadCallback<EndpointsForRange, ReplicaPlan.ForRangeRead> handler = new ReadCallback<>(resolver, rangeCommand, sharedReplicaPlan, queryStartNanoTime);
if (replicaPlan.contacts().size() == 1 && replicaPlan.contacts().get(0).isSelf()) {
Stage.READ.execute(new StorageProxy.LocalReadRunnable(rangeCommand, handler, trackRepairedStatus));
} else {
for (Replica replica : replicaPlan.contacts()) {
Tracing.trace("Enqueuing request to {}", replica);
ReadCommand command = replica.isFull() ? rangeCommand : rangeCommand.copyAsTransientQuery(replica);
Message<ReadCommand> message = command.createMessage(trackRepairedStatus && replica.isFull());
MessagingService.instance().sendWithCallback(message, replica.endpoint(), handler);
}
}
return new SingleRangeResponse(resolver, handler, readRepair);
}
Aggregations