use of org.apache.cassandra.utils.concurrent.Future in project cassandra by apache.
the class HintsService method excise.
/**
* Cleans up hints-related state after a node with id = hostId left.
*
* Dispatcher can not stop itself (isHostAlive() can not start returning false for the leaving host because this
* method is called by the same thread as gossip, which blocks gossip), so we can't simply wait for
* completion.
*
* We should also flush the buffer if there are any hints for the node there, and close the writer (if any),
* so that we don't leave any hint files lying around.
*
* Once that is done, we can simply delete all hint files and remove the host id from the catalog.
*
* The worst that can happen if we don't get everything right is a hints file (or two) remaining undeleted.
*
* @param hostId id of the node being excised
*/
public void excise(UUID hostId) {
HintsStore store = catalog.getNullable(hostId);
if (store == null)
return;
// flush the buffer and then close the writer for the excised host id, to make sure that no new files will appear
// for this host id after we are done
Future flushFuture = writeExecutor.flushBufferPool(bufferPool, Collections.singleton(store));
Future closeFuture = writeExecutor.closeWriter(store);
try {
flushFuture.get();
closeFuture.get();
} catch (InterruptedException e) {
throw new UncheckedInterruptedException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
// interrupt the current dispatch session to end (if any), so that the currently dispatched file gets removed
dispatchExecutor.interruptDispatch(store.hostId);
// delete all the hints files and remove the HintsStore instance from the map in the catalog
catalog.exciseStore(hostId);
}
use of org.apache.cassandra.utils.concurrent.Future in project cassandra by apache.
the class CommitLogReplayer method blockForWrites.
/**
* Flushes all keyspaces associated with this replayer in parallel, blocking until their flushes are complete.
* @return the number of mutations replayed
*/
public int blockForWrites() {
for (Map.Entry<TableId, AtomicInteger> entry : commitLogReader.getInvalidMutations()) logger.warn("Skipped {} mutations from unknown (probably removed) CF with id {}", entry.getValue(), entry.getKey());
// wait for all the writes to finish on the mutation stage
FBUtilities.waitOnFutures(futures);
logger.trace("Finished waiting on mutations from recovery");
// flush replayed keyspaces
futures.clear();
boolean flushingSystem = false;
List<Future<?>> futures = new ArrayList<Future<?>>();
for (Keyspace keyspace : keyspacesReplayed) {
if (keyspace.getName().equals(SchemaConstants.SYSTEM_KEYSPACE_NAME))
flushingSystem = true;
futures.addAll(keyspace.flush());
}
// also flush batchlog incase of any MV updates
if (!flushingSystem)
futures.add(Keyspace.open(SchemaConstants.SYSTEM_KEYSPACE_NAME).getColumnFamilyStore(SystemKeyspace.BATCHES).forceFlush());
FBUtilities.waitOnFutures(futures);
return replayedCount.get();
}
use of org.apache.cassandra.utils.concurrent.Future in project cassandra by apache.
the class StorageService method restoreReplicaCount.
/**
* Called when an endpoint is removed from the ring. This function checks
* whether this node becomes responsible for new ranges as a
* consequence and streams data if needed.
*
* This is rather ineffective, but it does not matter so much
* since this is called very seldom
*
* @param endpoint the node that left
*/
private void restoreReplicaCount(InetAddressAndPort endpoint, final InetAddressAndPort notifyEndpoint) {
Map<String, Multimap<InetAddressAndPort, FetchReplica>> replicasToFetch = new HashMap<>();
InetAddressAndPort myAddress = FBUtilities.getBroadcastAddressAndPort();
for (String keyspaceName : Schema.instance.getNonLocalStrategyKeyspaces()) {
logger.debug("Restoring replica count for keyspace {}", keyspaceName);
EndpointsByReplica changedReplicas = getChangedReplicasForLeaving(keyspaceName, endpoint, tokenMetadata, Keyspace.open(keyspaceName).getReplicationStrategy());
Set<LeavingReplica> myNewReplicas = new HashSet<>();
for (Map.Entry<Replica, Replica> entry : changedReplicas.flattenEntries()) {
Replica replica = entry.getValue();
if (replica.endpoint().equals(myAddress)) {
// Maybe we don't technically need to fetch transient data from somewhere
// but it's probably not a lot and it probably makes things a hair more resilient to people
// not running repair when they should.
myNewReplicas.add(new LeavingReplica(entry.getKey(), entry.getValue()));
}
}
logger.debug("Changed replicas for leaving {}, myNewReplicas {}", changedReplicas, myNewReplicas);
replicasToFetch.put(keyspaceName, getNewSourceReplicas(keyspaceName, myNewReplicas));
}
StreamPlan stream = new StreamPlan(StreamOperation.RESTORE_REPLICA_COUNT);
replicasToFetch.forEach((keyspaceName, sources) -> {
logger.debug("Requesting keyspace {} sources", keyspaceName);
sources.asMap().forEach((sourceAddress, fetchReplicas) -> {
logger.debug("Source and our replicas are {}", fetchReplicas);
// Remember whether this node is providing the full or transient replicas for this range. We are going
// to pass streaming the local instance of Replica for the range which doesn't tell us anything about the source
// By encoding it as two separate sets we retain this information about the source.
RangesAtEndpoint full = fetchReplicas.stream().filter(f -> f.remote.isFull()).map(f -> f.local).collect(RangesAtEndpoint.collector(myAddress));
RangesAtEndpoint transientReplicas = fetchReplicas.stream().filter(f -> f.remote.isTransient()).map(f -> f.local).collect(RangesAtEndpoint.collector(myAddress));
if (logger.isDebugEnabled())
logger.debug("Requesting from {} full replicas {} transient replicas {}", sourceAddress, StringUtils.join(full, ", "), StringUtils.join(transientReplicas, ", "));
stream.requestRanges(sourceAddress, keyspaceName, full, transientReplicas);
});
});
StreamResultFuture future = stream.execute();
future.addCallback(new FutureCallback<StreamState>() {
public void onSuccess(StreamState finalState) {
sendReplicationNotification(notifyEndpoint);
}
public void onFailure(Throwable t) {
logger.warn("Streaming to restore replica count failed", t);
// We still want to send the notification
sendReplicationNotification(notifyEndpoint);
}
});
}
use of org.apache.cassandra.utils.concurrent.Future in project cassandra by apache.
the class StorageService method unbootstrap.
private void unbootstrap(Runnable onFinish) throws ExecutionException, InterruptedException {
Supplier<Future<StreamState>> startStreaming = prepareUnbootstrapStreaming();
setMode(Mode.LEAVING, "replaying batch log and streaming data to other nodes", true);
// Start with BatchLog replay, which may create hints but no writes since this is no longer a valid endpoint.
Future<?> batchlogReplay = BatchlogManager.instance.startBatchlogReplay();
Future<StreamState> streamSuccess = startStreaming.get();
// Wait for batch log to complete before streaming hints.
logger.debug("waiting for batch log processing.");
batchlogReplay.get();
setMode(Mode.LEAVING, "streaming hints to other nodes", true);
Future hintsSuccess = streamHints();
// wait for the transfer runnables to signal the latch.
logger.debug("waiting for stream acks.");
streamSuccess.get();
hintsSuccess.get();
logger.debug("stream acks all received.");
leaveRing();
onFinish.run();
}
use of org.apache.cassandra.utils.concurrent.Future in project cassandra by apache.
the class AutoSavingCache method loadSaved.
public int loadSaved() {
int count = 0;
long start = nanoTime();
// modern format, allows both key and value (so key cache load can be purely sequential)
File dataPath = getCacheDataPath(CURRENT_VERSION);
File crcPath = getCacheCrcPath(CURRENT_VERSION);
if (dataPath.exists() && crcPath.exists()) {
DataInputStreamPlus in = null;
try {
logger.info("reading saved cache {}", dataPath);
in = new DataInputStreamPlus(new LengthAvailableInputStream(new BufferedInputStream(streamFactory.getInputStream(dataPath, crcPath)), dataPath.length()));
// Check the schema has not changed since CFs are looked up by name which is ambiguous
UUID schemaVersion = new UUID(in.readLong(), in.readLong());
if (!schemaVersion.equals(Schema.instance.getVersion()))
throw new RuntimeException("Cache schema version " + schemaVersion + " does not match current schema version " + Schema.instance.getVersion());
ArrayDeque<Future<Pair<K, V>>> futures = new ArrayDeque<>();
long loadByNanos = start + TimeUnit.SECONDS.toNanos(DatabaseDescriptor.getCacheLoadTimeout());
while (nanoTime() < loadByNanos && in.available() > 0) {
// tableId and indexName are serialized by the serializers in CacheService
// That is delegated there because there are serializer specific conditions
// where a cache key is skipped and not written
TableId tableId = TableId.deserialize(in);
String indexName = in.readUTF();
if (indexName.isEmpty())
indexName = null;
ColumnFamilyStore cfs = Schema.instance.getColumnFamilyStoreInstance(tableId);
if (indexName != null && cfs != null)
cfs = cfs.indexManager.getIndexByName(indexName).getBackingTable().orElse(null);
Future<Pair<K, V>> entryFuture = cacheLoader.deserialize(in, cfs);
// Key cache entry can return null, if the SSTable doesn't exist.
if (entryFuture == null)
continue;
futures.offer(entryFuture);
count++;
/*
* Kind of unwise to accrue an unbounded number of pending futures
* So now there is this loop to keep a bounded number pending.
*/
do {
while (futures.peek() != null && futures.peek().isDone()) {
Future<Pair<K, V>> future = futures.poll();
Pair<K, V> entry = future.get();
if (entry != null && entry.right != null)
put(entry.left, entry.right);
}
if (futures.size() > 1000)
Thread.yield();
} while (futures.size() > 1000);
}
Future<Pair<K, V>> future = null;
while ((future = futures.poll()) != null) {
Pair<K, V> entry = future.get();
if (entry != null && entry.right != null)
put(entry.left, entry.right);
}
} catch (CorruptFileException e) {
JVMStabilityInspector.inspectThrowable(e);
logger.warn(String.format("Non-fatal checksum error reading saved cache %s", dataPath.absolutePath()), e);
} catch (Throwable t) {
JVMStabilityInspector.inspectThrowable(t);
logger.info(String.format("Harmless error reading saved cache %s", dataPath.absolutePath()), t);
} finally {
FileUtils.closeQuietly(in);
cacheLoader.cleanupAfterDeserialize();
}
}
if (logger.isTraceEnabled())
logger.trace("completed reading ({} ms; {} keys) saved cache {}", TimeUnit.NANOSECONDS.toMillis(nanoTime() - start), count, dataPath);
return count;
}
Aggregations