use of org.apache.cassandra.db.index.SecondaryIndex in project eiger by wlloyd.
the class ColumnFamilyStore method maybeSwitchMemtable.
/** flush the given memtable and swap in a new one for its CFS, if it hasn't been frozen already. threadsafe. */
public Future<?> maybeSwitchMemtable(Memtable oldMemtable, final boolean writeCommitLog) {
if (oldMemtable.isFrozen()) {
logger.debug("memtable is already frozen; another thread must be flushing it");
return null;
}
/*
* If we can get the writelock, that means no new updates can come in and
* all ongoing updates to memtables have completed. We can get the tail
* of the log and use it as the starting position for log replay on recovery.
*
* This is why we Table.flusherLock needs to be global instead of per-Table:
* we need to schedule discardCompletedSegments calls in the same order as their
* contexts (commitlog position) were read, even though the flush executor
* is multithreaded.
*/
Table.switchLock.writeLock().lock();
try {
if (oldMemtable.isFrozen()) {
logger.debug("memtable is already frozen; another thread must be flushing it");
return null;
}
assert getMemtableThreadSafe() == oldMemtable;
final ReplayPosition ctx = writeCommitLog ? CommitLog.instance.getContext() : ReplayPosition.NONE;
logger.debug("flush position is {}", ctx);
// submit the memtable for any indexed sub-cfses, and our own.
final List<ColumnFamilyStore> icc = new ArrayList<ColumnFamilyStore>();
// don't assume that this.memtable is dirty; forceFlush can bring us here during index build even if it is not
for (ColumnFamilyStore cfs : concatWithIndexes()) {
Memtable mt = cfs.getMemtableThreadSafe();
if (!mt.isClean() && !mt.isFrozen()) {
// We need to freeze indexes too because they can be concurrently flushed too (#3547)
mt.freeze();
icc.add(cfs);
}
}
final CountDownLatch latch = new CountDownLatch(icc.size());
for (ColumnFamilyStore cfs : icc) {
Memtable memtable = cfs.data.switchMemtable();
logger.info("Enqueuing flush of {}", memtable);
memtable.flushAndSignal(latch, flushWriter, ctx);
}
if (memtableSwitchCount == Integer.MAX_VALUE)
memtableSwitchCount = 0;
memtableSwitchCount++;
// while keeping the wait-for-flush (future.get) out of anything latency-sensitive.
return postFlushExecutor.submit(new WrappedRunnable() {
public void runMayThrow() throws InterruptedException, IOException {
latch.await();
if (!icc.isEmpty()) {
for (SecondaryIndex index : indexManager.getIndexesNotBackedByCfs()) {
// flush any non-cfs backed indexes
logger.info("Flushing SecondaryIndex {}", index);
index.forceBlockingFlush();
}
}
if (writeCommitLog) {
// if we're not writing to the commit log, we are replaying the log, so marking
// the log header with "you can discard anything written before the context" is not valid
CommitLog.instance.discardCompletedSegments(metadata.cfId, ctx);
}
}
});
} finally {
Table.switchLock.writeLock().unlock();
}
}
use of org.apache.cassandra.db.index.SecondaryIndex in project eiger by wlloyd.
the class KeysSearcher method highestSelectivityPredicate.
private IndexExpression highestSelectivityPredicate(List<IndexExpression> clause) {
IndexExpression best = null;
int bestMeanCount = Integer.MAX_VALUE;
for (IndexExpression expression : clause) {
//skip columns belonging to a different index type
if (!columns.contains(expression.column_name))
continue;
SecondaryIndex index = indexManager.getIndexForColumn(expression.column_name);
if (index == null || (expression.op != IndexOperator.EQ))
continue;
int columns = index.getIndexCfs().getMeanColumns();
if (columns < bestMeanCount) {
best = expression;
bestMeanCount = columns;
}
}
return best;
}
use of org.apache.cassandra.db.index.SecondaryIndex in project eiger by wlloyd.
the class CleanupTest method testCleanupWithIndexes.
@Test
public void testCleanupWithIndexes() throws IOException, ExecutionException, InterruptedException {
Table table = Table.open(TABLE1);
ColumnFamilyStore cfs = table.getColumnFamilyStore(CF1);
assertEquals(cfs.indexManager.getIndexedColumns().iterator().next(), COLUMN);
List<Row> rows;
// insert data and verify we get it back w/ range query
fillCF(cfs, LOOPS);
rows = Util.getRangeSlice(cfs);
assertEquals(LOOPS, rows.size());
SecondaryIndex index = cfs.indexManager.getIndexForColumn(COLUMN);
long start = System.currentTimeMillis();
while (!index.isIndexBuilt(COLUMN) && System.currentTimeMillis() < start + 10000) Thread.sleep(10);
// verify we get it back w/ index query too
IndexExpression expr = new IndexExpression(COLUMN, IndexOperator.EQ, VALUE);
List<IndexExpression> clause = Arrays.asList(expr);
IFilter filter = new IdentityQueryFilter();
IPartitioner p = StorageService.getPartitioner();
Range<RowPosition> range = Util.range("", "");
rows = table.getColumnFamilyStore(CF1).search(clause, range, Integer.MAX_VALUE, filter);
assertEquals(LOOPS, rows.size());
// we don't allow cleanup when the local host has no range to avoid wipping up all data when a node has not join the ring.
// So to make sure cleanup erase everything here, we give the localhost the tiniest possible range.
TokenMetadata tmd = StorageService.instance.getTokenMetadata();
byte[] tk1 = new byte[1], tk2 = new byte[1];
tk1[0] = 2;
tk2[0] = 1;
tmd.updateNormalToken(new BytesToken(tk1), InetAddress.getByName("127.0.0.1"));
tmd.updateNormalToken(new BytesToken(tk2), InetAddress.getByName("127.0.0.2"));
CompactionManager.instance.performCleanup(cfs, new NodeId.OneShotRenewer());
// row data should be gone
rows = Util.getRangeSlice(cfs);
assertEquals(0, rows.size());
// not only should it be gone but there should be no data on disk, not even tombstones
assert cfs.getSSTables().isEmpty();
// 2ary indexes should result in no results, too (although tombstones won't be gone until compacted)
rows = cfs.search(clause, range, Integer.MAX_VALUE, filter);
assertEquals(0, rows.size());
}
use of org.apache.cassandra.db.index.SecondaryIndex in project eiger by wlloyd.
the class ColumnFamilyStoreTest method testIndexCreate.
@Test
public void testIndexCreate() throws IOException, ConfigurationException, InterruptedException, ExecutionException {
Table table = Table.open("Keyspace1");
// create a row and update the birthdate value, test that the index query fetches the new version
RowMutation rm;
rm = new RowMutation("Keyspace1", ByteBufferUtil.bytes("k1"));
rm.add(new QueryPath("Indexed2", null, ByteBufferUtil.bytes("birthdate")), ByteBufferUtil.bytes(1L), 1);
rm.apply();
ColumnFamilyStore cfs = table.getColumnFamilyStore("Indexed2");
ColumnDefinition old = cfs.metadata.getColumn_metadata().get(ByteBufferUtil.bytes("birthdate"));
ColumnDefinition cd = new ColumnDefinition(old.name, old.getValidator(), IndexType.KEYS, null, "birthdate_index");
Future<?> future = cfs.indexManager.addIndexedColumn(cd);
future.get();
// we had a bug (CASSANDRA-2244) where index would get created but not flushed -- check for that
assert cfs.indexManager.getIndexForColumn(cd.name).getIndexCfs().getSSTables().size() > 0;
queryBirthdate(table);
// validate that drop clears it out & rebuild works (CASSANDRA-2320)
SecondaryIndex indexedCfs = cfs.indexManager.getIndexForColumn(ByteBufferUtil.bytes("birthdate"));
cfs.indexManager.removeIndexedColumn(ByteBufferUtil.bytes("birthdate"));
assert !indexedCfs.isIndexBuilt(ByteBufferUtil.bytes("birthdate"));
// rebuild & re-query
future = cfs.indexManager.addIndexedColumn(cd);
future.get();
queryBirthdate(table);
}
use of org.apache.cassandra.db.index.SecondaryIndex in project eiger by wlloyd.
the class KeysSearcher method getIndexedIterator.
public ColumnFamilyStore.AbstractScanIterator getIndexedIterator(final AbstractBounds<RowPosition> range, final ExtendedFilter filter) {
// Start with the most-restrictive indexed clause, then apply remaining clauses
// to each row matching that clause.
// TODO: allow merge join instead of just one index + loop
final IndexExpression primary = highestSelectivityPredicate(filter.getClause());
final SecondaryIndex index = indexManager.getIndexForColumn(primary.column_name);
if (logger.isDebugEnabled())
logger.debug("Primary scan clause is " + baseCfs.getComparator().getString(primary.column_name));
assert index != null;
final DecoratedKey indexKey = indexManager.getIndexKeyFor(primary.column_name, primary.value);
/*
* XXX: If the range requested is a token range, we'll have to start at the beginning (and stop at the end) of
* the indexed row unfortunately (which will be inefficient), because we have not way to intuit the small
* possible key having a given token. A fix would be to actually store the token along the key in the
* indexed row.
*/
final ByteBuffer startKey = range.left instanceof DecoratedKey ? ((DecoratedKey) range.left).key : ByteBufferUtil.EMPTY_BYTE_BUFFER;
final ByteBuffer endKey = range.right instanceof DecoratedKey ? ((DecoratedKey) range.right).key : ByteBufferUtil.EMPTY_BYTE_BUFFER;
return new ColumnFamilyStore.AbstractScanIterator() {
private ByteBuffer lastSeenKey = startKey;
private Iterator<IColumn> indexColumns;
private final QueryPath path = new QueryPath(baseCfs.columnFamily);
private int columnsRead = Integer.MAX_VALUE;
protected Row computeNext() {
int meanColumns = Math.max(index.getIndexCfs().getMeanColumns(), 1);
// We shouldn't fetch only 1 row as this provides buggy paging in case the first row doesn't satisfy all clauses
int rowsPerQuery = Math.max(Math.min(filter.maxRows(), filter.maxColumns() / meanColumns), 2);
while (true) {
if (indexColumns == null || !indexColumns.hasNext()) {
if (columnsRead < rowsPerQuery) {
logger.debug("Read only {} (< {}) last page through, must be done", columnsRead, rowsPerQuery);
return endOfData();
}
if (logger.isDebugEnabled())
logger.debug(String.format("Scanning index %s starting with %s", expressionString(primary), index.getBaseCfs().metadata.getKeyValidator().getString(startKey)));
QueryFilter indexFilter = QueryFilter.getSliceFilter(indexKey, new QueryPath(index.getIndexCfs().getColumnFamilyName()), lastSeenKey, endKey, false, rowsPerQuery);
ColumnFamily indexRow = index.getIndexCfs().getColumnFamily(indexFilter);
logger.debug("fetched {}", indexRow);
if (indexRow == null) {
logger.debug("no data, all done");
return endOfData();
}
Collection<IColumn> sortedColumns = indexRow.getSortedColumns();
columnsRead = sortedColumns.size();
indexColumns = sortedColumns.iterator();
IColumn firstColumn = sortedColumns.iterator().next();
// Paging is racy, so it is possible the first column of a page is not the last seen one.
if (lastSeenKey != startKey && lastSeenKey.equals(firstColumn.name())) {
// skip the row we already saw w/ the last page of results
indexColumns.next();
columnsRead--;
logger.debug("Skipping {}", baseCfs.getComparator().getString(firstColumn.name()));
} else if (range instanceof Range && indexColumns.hasNext() && firstColumn.equals(startKey)) {
// skip key excluded by range
indexColumns.next();
columnsRead--;
logger.debug("Skipping first key as range excludes it");
}
}
while (indexColumns.hasNext()) {
IColumn column = indexColumns.next();
lastSeenKey = column.name();
if (column.isMarkedForDelete()) {
logger.debug("skipping {}", column.name());
continue;
}
DecoratedKey dk = baseCfs.partitioner.decorateKey(lastSeenKey);
if (!range.right.isMinimum(baseCfs.partitioner) && range.right.compareTo(dk) < 0) {
logger.debug("Reached end of assigned scan range");
return endOfData();
}
if (!range.contains(dk)) {
logger.debug("Skipping entry {} outside of assigned scan range", dk.token);
continue;
}
logger.debug("Returning index hit for {}", dk);
ColumnFamily data = baseCfs.getColumnFamily(new QueryFilter(dk, path, filter.initialFilter()));
// While the column family we'll get in the end should contains the primary clause column, the initialFilter may not have found it and can thus be null
if (data == null)
data = ColumnFamily.create(baseCfs.metadata);
return new Row(dk, data);
}
}
}
public void close() throws IOException {
}
};
}
Aggregations