use of org.apache.cassandra.index.internal.IndexEntry in project cassandra by apache.
the class CompositesSearcher method filterStaleEntries.
// We assume all rows in dataIter belong to the same partition.
private UnfilteredRowIterator filterStaleEntries(UnfilteredRowIterator dataIter, final ByteBuffer indexValue, final List<IndexEntry> entries, final OpOrder.Group writeOp, final int nowInSec) {
// collect stale index entries and delete them when we close this iterator
final List<IndexEntry> staleEntries = new ArrayList<>();
// any index entries which would be shadowed by it
if (!dataIter.partitionLevelDeletion().isLive()) {
DeletionTime deletion = dataIter.partitionLevelDeletion();
entries.forEach(e -> {
if (deletion.deletes(e.timestamp))
staleEntries.add(e);
});
}
UnfilteredRowIterator iteratorToReturn = null;
if (isStaticColumn()) {
if (entries.size() != 1)
throw new AssertionError("A partition should have at most one index within a static column index");
iteratorToReturn = dataIter;
if (index.isStale(dataIter.staticRow(), indexValue, nowInSec)) {
// The entry is staled, we return no rows in this partition.
staleEntries.addAll(entries);
iteratorToReturn = UnfilteredRowIterators.noRowsIterator(dataIter.metadata(), dataIter.partitionKey(), Rows.EMPTY_STATIC_ROW, dataIter.partitionLevelDeletion(), dataIter.isReverseOrder());
}
deleteAllEntries(staleEntries, writeOp, nowInSec);
} else {
ClusteringComparator comparator = dataIter.metadata().comparator;
class Transform extends Transformation {
private int entriesIdx;
@Override
public Row applyToRow(Row row) {
IndexEntry entry = findEntry(row.clustering());
if (!index.isStale(row, indexValue, nowInSec))
return row;
staleEntries.add(entry);
return null;
}
private IndexEntry findEntry(Clustering clustering) {
assert entriesIdx < entries.size();
while (entriesIdx < entries.size()) {
IndexEntry entry = entries.get(entriesIdx++);
// The entries are in clustering order. So that the requested entry should be the
// next entry, the one at 'entriesIdx'. However, we can have stale entries, entries
// that have no corresponding row in the base table typically because of a range
// tombstone or partition level deletion. Delete such stale entries.
// For static column, we only need to compare the partition key, otherwise we compare
// the whole clustering.
int cmp = comparator.compare(entry.indexedEntryClustering, clustering);
// this would means entries are not in clustering order, which shouldn't happen
assert cmp <= 0;
if (cmp == 0)
return entry;
else
staleEntries.add(entry);
}
// entries correspond to the rows we've queried, so we shouldn't have a row that has no corresponding entry.
throw new AssertionError();
}
@Override
public void onPartitionClose() {
deleteAllEntries(staleEntries, writeOp, nowInSec);
}
}
iteratorToReturn = Transformation.apply(dataIter, new Transform());
}
return iteratorToReturn;
}
use of org.apache.cassandra.index.internal.IndexEntry in project cassandra by apache.
the class CompositesSearcher method queryDataFromIndex.
protected UnfilteredPartitionIterator queryDataFromIndex(final DecoratedKey indexKey, final RowIterator indexHits, final ReadCommand command, final ReadExecutionController executionController) {
assert indexHits.staticRow() == Rows.EMPTY_STATIC_ROW;
return new UnfilteredPartitionIterator() {
private IndexEntry nextEntry;
private UnfilteredRowIterator next;
public TableMetadata metadata() {
return command.metadata();
}
public boolean hasNext() {
return prepareNext();
}
public UnfilteredRowIterator next() {
if (next == null)
prepareNext();
UnfilteredRowIterator toReturn = next;
next = null;
return toReturn;
}
private boolean prepareNext() {
while (true) {
if (next != null)
return true;
if (nextEntry == null) {
if (!indexHits.hasNext())
return false;
nextEntry = index.decodeEntry(indexKey, indexHits.next());
}
SinglePartitionReadCommand dataCmd;
DecoratedKey partitionKey = index.baseCfs.decorateKey(nextEntry.indexedKey);
List<IndexEntry> entries = new ArrayList<>();
if (isStaticColumn()) {
// If the index is on a static column, we just need to do a full read on the partition.
// Note that we want to re-use the command.columnFilter() in case of future change.
dataCmd = SinglePartitionReadCommand.create(index.baseCfs.metadata(), command.nowInSec(), command.columnFilter(), RowFilter.NONE, DataLimits.NONE, partitionKey, new ClusteringIndexSliceFilter(Slices.ALL, false));
entries.add(nextEntry);
nextEntry = indexHits.hasNext() ? index.decodeEntry(indexKey, indexHits.next()) : null;
} else {
// Gather all index hits belonging to the same partition and query the data for those hits.
// TODO: it's much more efficient to do 1 read for all hits to the same partition than doing
// 1 read per index hit. However, this basically mean materializing all hits for a partition
// in memory so we should consider adding some paging mechanism. However, index hits should
// be relatively small so it's much better than the previous code that was materializing all
// *data* for a given partition.
BTreeSet.Builder<Clustering> clusterings = BTreeSet.builder(index.baseCfs.getComparator());
while (nextEntry != null && partitionKey.getKey().equals(nextEntry.indexedKey)) {
// We're queried a slice of the index, but some hits may not match some of the clustering column constraints
if (isMatchingEntry(partitionKey, nextEntry, command)) {
clusterings.add(nextEntry.indexedEntryClustering);
entries.add(nextEntry);
}
nextEntry = indexHits.hasNext() ? index.decodeEntry(indexKey, indexHits.next()) : null;
}
// Because we've eliminated entries that don't match the clustering columns, it's possible we added nothing
if (clusterings.isEmpty())
continue;
// Query the gathered index hits. We still need to filter stale hits from the resulting query.
ClusteringIndexNamesFilter filter = new ClusteringIndexNamesFilter(clusterings.build(), false);
dataCmd = SinglePartitionReadCommand.create(index.baseCfs.metadata(), command.nowInSec(), command.columnFilter(), command.rowFilter(), DataLimits.NONE, partitionKey, filter);
}
// We close right away if empty, and if it's assign to next it will be called either
@SuppressWarnings("resource") UnfilteredRowIterator // by the next caller of next, or through closing this iterator is this come before.
dataIter = filterStaleEntries(dataCmd.queryMemtableAndDisk(index.baseCfs, executionController), indexKey.getKey(), entries, executionController.writeOpOrderGroup(), command.nowInSec());
if (dataIter.isEmpty()) {
dataIter.close();
continue;
}
next = dataIter;
return true;
}
}
public void remove() {
throw new UnsupportedOperationException();
}
public void close() {
indexHits.close();
if (next != null)
next.close();
}
};
}
Aggregations