use of org.apache.cassandra.db.filter.QueryPath in project eiger by wlloyd.
the class HintedHandOffManager method deliverHintsToEndpointInternal.
private void deliverHintsToEndpointInternal(InetAddress endpoint) throws IOException, DigestMismatchException, InvalidRequestException, InterruptedException {
ColumnFamilyStore hintStore =;
if (hintStore.isEmpty())
// nothing to do, don't confuse users by logging a no-op handoff
logger_.debug("Checking remote({}) schema before delivering hints", endpoint);
int waited;
try {
waited = waitForSchemaAgreement(endpoint);
} catch (TimeoutException e) {
// (if we had to wait, then gossiper randomness took care of that for us already.)
if (waited == 0) {
// use a 'rounded' sleep interval because of a strange bug with windows: CASSANDRA-3375
int sleep = FBUtilities.threadLocalRandom().nextInt(2000) * 30;
logger_.debug("Sleeping {}ms to stagger hint delivery", sleep);
if (!FailureDetector.instance.isAlive(endpoint)) {"Endpoint {} died before hint delivery, aborting", endpoint);
// 1. Get the key of the endpoint we need to handoff
// 2. For each column, deserialize the mutation and send it to the endpoint
// 3. Delete the subcolumn if the write was successful
// 4. Force a flush
// 5. Do major compaction to clean up all deletes etc.
// find the hints for the node using its token.
Token<?> token = StorageService.instance.getTokenMetadata().getToken(endpoint);"Started hinted handoff for token: {} with IP: {}", token, endpoint);
ByteBuffer tokenBytes = StorageService.getPartitioner().getTokenFactory().toByteArray(token);
DecoratedKey<?> epkey = StorageService.getPartitioner().decorateKey(tokenBytes);
int rowsReplayed = 0;
ByteBuffer startColumn = ByteBufferUtil.EMPTY_BYTE_BUFFER;
int pageSize = PAGE_SIZE;
// read less columns (mutations) per page if they are very large
if (hintStore.getMeanColumns() > 0) {
int averageColumnSize = (int) (hintStore.getMeanRowSize() / hintStore.getMeanColumns());
pageSize = Math.min(PAGE_SIZE, DatabaseDescriptor.getInMemoryCompactionLimit() / averageColumnSize);
// page size of 1 does not allow actual paging b/c of >= behavior on startColumn
pageSize = Math.max(2, pageSize);
logger_.debug("average hinted-row column size is {}; using pageSize of {}", averageColumnSize, pageSize);
delivery: while (true) {
QueryFilter filter = QueryFilter.getSliceFilter(epkey, new QueryPath(HINTS_CF), startColumn, ByteBufferUtil.EMPTY_BYTE_BUFFER, false, pageSize);
ColumnFamily hintsPage = ColumnFamilyStore.removeDeleted(hintStore.getColumnFamily(filter), Integer.MAX_VALUE);
if (pagingFinished(hintsPage, startColumn))
page: for (IColumn hint : hintsPage.getSortedColumns()) {
startColumn =;
for (IColumn subColumn : hint.getSubColumns()) {
// both 0.8 and 1.0 column names are UTF8 strings, so this check is safe
if (ByteBufferUtil.string( {
logger_.debug("0.8-style hint found. This should have been taken care of by purgeIncompatibleHints");
deleteHint(tokenBytes,, hint.maxTimestamp());
continue page;
IColumn versionColumn = hint.getSubColumn(ByteBufferUtil.bytes("version"));
IColumn tableColumn = hint.getSubColumn(ByteBufferUtil.bytes("table"));
IColumn keyColumn = hint.getSubColumn(ByteBufferUtil.bytes("key"));
IColumn mutationColumn = hint.getSubColumn(ByteBufferUtil.bytes("mutation"));
assert versionColumn != null;
assert tableColumn != null;
assert keyColumn != null;
assert mutationColumn != null;
DataInputStream in = new DataInputStream(ByteBufferUtil.inputStream(mutationColumn.value()));
RowMutation rm = RowMutation.serializer().deserialize(in, ByteBufferUtil.toInt(versionColumn.value()));
try {
sendMutation(endpoint, rm);
deleteHint(tokenBytes,, hint.maxTimestamp());
} catch (TimeoutException e) {"Timed out replaying hints to %s; aborting further deliveries", endpoint));
break delivery;
if (rowsReplayed > 0) {
try {
CompactionManager.instance.submitMaximal(hintStore, Integer.MAX_VALUE).get();
} catch (Exception e) {
throw new RuntimeException(e);
}"Finished hinted handoff of %s rows to endpoint %s", rowsReplayed, endpoint));
use of org.apache.cassandra.db.filter.QueryPath in project eiger by wlloyd.
the class RowMutation method hintFor.
* Returns mutation representing a Hints to be sent to <code>address</code>
* as soon as it becomes available.
* The format is the following:
* HintsColumnFamily: { // cf
* <dest token>: { // key
* <uuid>: { // super-column
* table: <table> // columns
* key: <key>
* mutation: <mutation>
* version: <version>
* }
* }
* }
public static RowMutation hintFor(RowMutation mutation, ByteBuffer token) throws IOException {
RowMutation rm = new RowMutation(Table.SYSTEM_TABLE, token);
ByteBuffer hintId = ByteBuffer.wrap(UUIDGen.getTimeUUIDBytes());
// determine the TTL for the RowMutation
// this is set at the smallest GCGraceSeconds for any of the CFs in the RM
// this ensures that deletes aren't "undone" by delivery of an old hint
int ttl = Integer.MAX_VALUE;
for (ColumnFamily cf : mutation.getColumnFamilies()) ttl = Math.min(ttl, cf.metadata().getGcGraceSeconds());
// serialized RowMutation
QueryPath path = new QueryPath(HintedHandOffManager.HINTS_CF, hintId, ByteBufferUtil.bytes("mutation"));
rm.add(path, ByteBuffer.wrap(mutation.getSerializedBuffer(MessagingService.version_)), System.currentTimeMillis(), System.currentTimeMillis(), ttl, null);
// serialization version
path = new QueryPath(HintedHandOffManager.HINTS_CF, hintId, ByteBufferUtil.bytes("version"));
rm.add(path, ByteBufferUtil.bytes(MessagingService.version_), System.currentTimeMillis(), System.currentTimeMillis(), ttl, null);
// table
path = new QueryPath(HintedHandOffManager.HINTS_CF, hintId, ByteBufferUtil.bytes("table"));
rm.add(path, ByteBufferUtil.bytes(mutation.getTable()), System.currentTimeMillis(), System.currentTimeMillis(), ttl, null);
// key
path = new QueryPath(HintedHandOffManager.HINTS_CF, hintId, ByteBufferUtil.bytes("key"));
rm.add(path, mutation.key(), System.currentTimeMillis(), System.currentTimeMillis(), ttl, null);
return rm;
use of org.apache.cassandra.db.filter.QueryPath in project eiger by wlloyd.
the class SystemTable method removeToken.
* Remove stored token being used by another node
public static synchronized void removeToken(Token token) {
IPartitioner p = StorageService.getPartitioner();
RowMutation rm = new RowMutation(Table.SYSTEM_TABLE, RING_KEY);
rm.delete(new QueryPath(STATUS_CF, null, p.getTokenFactory().toByteArray(token)), LamportClock.getVersion());
try {
} catch (IOException e) {
throw new IOError(e);
use of org.apache.cassandra.db.filter.QueryPath in project eiger by wlloyd.
the class SystemTable method isBootstrapped.
public static boolean isBootstrapped() {
Table table =;
QueryFilter filter = QueryFilter.getNamesFilter(decorate(BOOTSTRAP_KEY), new QueryPath(STATUS_CF), BOOTSTRAP);
ColumnFamily cf = table.getColumnFamilyStore(STATUS_CF).getColumnFamily(filter);
if (cf == null)
return false;
IColumn c = cf.getColumn(BOOTSTRAP);
return c.value().get(c.value().position()) == 1;
use of org.apache.cassandra.db.filter.QueryPath in project eiger by wlloyd.
the class SystemTable method getCurrentLocalNodeId.
* Read the current local node id from the system table or null if no
* such node id is recorded.
public static NodeId getCurrentLocalNodeId() {
ByteBuffer id = null;
Table table =;
QueryFilter filter = QueryFilter.getIdentityFilter(decorate(CURRENT_LOCAL_NODE_ID_KEY), new QueryPath(NODE_ID_CF));
ColumnFamily cf = table.getColumnFamilyStore(NODE_ID_CF).getColumnFamily(filter);
if (cf != null) {
// Even though gc_grace==0 on System table, we can have a race where we get back tombstones (see CASSANDRA-2824)
cf = ColumnFamilyStore.removeDeleted(cf, 0);
assert cf.getColumnCount() <= 1;
if (cf.getColumnCount() > 0)
id = cf.iterator().next().name();
if (id != null) {
return NodeId.wrap(id);
} else {
return null;