use of org.apache.cassandra.dht.Token in project cassandra by apache.
the class CompactionManager method createMerkleTrees.
private static MerkleTrees createMerkleTrees(Iterable<SSTableReader> sstables, Collection<Range<Token>> ranges, ColumnFamilyStore cfs) {
MerkleTrees tree = new MerkleTrees(cfs.getPartitioner());
long allPartitions = 0;
Map<Range<Token>, Long> rangePartitionCounts = Maps.newHashMapWithExpectedSize(ranges.size());
for (Range<Token> range : ranges) {
long numPartitions = 0;
for (SSTableReader sstable : sstables) numPartitions += sstable.estimatedKeysForRanges(Collections.singleton(range));
rangePartitionCounts.put(range, numPartitions);
allPartitions += numPartitions;
}
for (Range<Token> range : ranges) {
long numPartitions = rangePartitionCounts.get(range);
double rangeOwningRatio = allPartitions > 0 ? (double) numPartitions / allPartitions : 0;
// determine max tree depth proportional to range size to avoid blowing up memory with multiple tress,
// capping at 20 to prevent large tree (CASSANDRA-11390)
int maxDepth = rangeOwningRatio > 0 ? (int) Math.floor(20 - Math.log(1 / rangeOwningRatio) / Math.log(2)) : 0;
// determine tree depth from number of partitions, capping at max tree depth (CASSANDRA-5263)
int depth = numPartitions > 0 ? (int) Math.min(Math.ceil(Math.log(numPartitions) / Math.log(2)), maxDepth) : 0;
tree.addMerkleTree((int) Math.pow(2, depth), range);
}
if (logger.isDebugEnabled()) {
// MT serialize may take time
logger.debug("Created {} merkle trees with merkle trees size {}, {} partitions, {} bytes", tree.ranges().size(), tree.size(), allPartitions, MerkleTrees.serializer.serializedSize(tree, 0));
}
return tree;
}
use of org.apache.cassandra.dht.Token in project cassandra by apache.
the class CompactionManager method sstablesInBounds.
private static Collection<SSTableReader> sstablesInBounds(ColumnFamilyStore cfs, Collection<Range<Token>> tokenRangeCollection) {
final Set<SSTableReader> sstables = new HashSet<>();
Iterable<SSTableReader> liveTables = cfs.getTracker().getView().select(SSTableSet.LIVE);
SSTableIntervalTree tree = SSTableIntervalTree.build(liveTables);
for (Range<Token> tokenRange : tokenRangeCollection) {
Iterable<SSTableReader> ssTableReaders = View.sstablesInBounds(tokenRange.left.minKeyBound(), tokenRange.right.maxKeyBound(), tree);
Iterables.addAll(sstables, ssTableReaders);
}
return sstables;
}
use of org.apache.cassandra.dht.Token in project cassandra by apache.
the class LeveledManifest method overlapping.
private static Set<SSTableReader> overlapping(Collection<SSTableReader> candidates, Iterable<SSTableReader> others) {
assert !candidates.isEmpty();
/*
* Picking each sstable from others that overlap one of the sstable of candidates is not enough
* because you could have the following situation:
* candidates = [ s1(a, c), s2(m, z) ]
* others = [ s3(e, g) ]
* In that case, s2 overlaps none of s1 or s2, but if we compact s1 with s2, the resulting sstable will
* overlap s3, so we must return s3.
*
* Thus, the correct approach is to pick sstables overlapping anything between the first key in all
* the candidate sstables, and the last.
*/
Iterator<SSTableReader> iter = candidates.iterator();
SSTableReader sstable = iter.next();
Token first = sstable.first.getToken();
Token last = sstable.last.getToken();
while (iter.hasNext()) {
sstable = iter.next();
first = first.compareTo(sstable.first.getToken()) <= 0 ? first : sstable.first.getToken();
last = last.compareTo(sstable.last.getToken()) >= 0 ? last : sstable.last.getToken();
}
return overlapping(first, last, others);
}
use of org.apache.cassandra.dht.Token in project cassandra by apache.
the class ViewBuilder method run.
public void run() {
logger.trace("Running view builder for {}.{}", baseCfs.metadata.keyspace, view.name);
UUID localHostId = SystemKeyspace.getLocalHostId();
String ksname = baseCfs.metadata.keyspace, viewName = view.name;
if (SystemKeyspace.isViewBuilt(ksname, viewName)) {
if (!SystemKeyspace.isViewStatusReplicated(ksname, viewName))
updateDistributed(ksname, viewName, localHostId);
return;
}
Iterable<Range<Token>> ranges = StorageService.instance.getLocalRanges(baseCfs.metadata.keyspace);
final Pair<Integer, Token> buildStatus = SystemKeyspace.getViewBuildStatus(ksname, viewName);
Token lastToken;
Function<org.apache.cassandra.db.lifecycle.View, Iterable<SSTableReader>> function;
if (buildStatus == null) {
baseCfs.forceBlockingFlush();
function = org.apache.cassandra.db.lifecycle.View.selectFunction(SSTableSet.CANONICAL);
int generation = Integer.MIN_VALUE;
try (Refs<SSTableReader> temp = baseCfs.selectAndReference(function).refs) {
for (SSTableReader reader : temp) {
generation = Math.max(reader.descriptor.generation, generation);
}
}
SystemKeyspace.beginViewBuild(ksname, viewName, generation);
lastToken = null;
} else {
function = new Function<org.apache.cassandra.db.lifecycle.View, Iterable<SSTableReader>>() {
@Nullable
public Iterable<SSTableReader> apply(org.apache.cassandra.db.lifecycle.View view) {
Iterable<SSTableReader> readers = org.apache.cassandra.db.lifecycle.View.selectFunction(SSTableSet.CANONICAL).apply(view);
if (readers != null)
return Iterables.filter(readers, ssTableReader -> ssTableReader.descriptor.generation <= buildStatus.left);
return null;
}
};
lastToken = buildStatus.right;
}
prevToken = lastToken;
try (Refs<SSTableReader> sstables = baseCfs.selectAndReference(function).refs;
ReducingKeyIterator iter = new ReducingKeyIterator(sstables)) {
SystemDistributedKeyspace.startViewBuild(ksname, viewName, localHostId);
while (!isStopped && iter.hasNext()) {
DecoratedKey key = iter.next();
Token token = key.getToken();
if (lastToken == null || lastToken.compareTo(token) < 0) {
for (Range<Token> range : ranges) {
if (range.contains(token)) {
buildKey(key);
if (prevToken == null || prevToken.compareTo(token) != 0) {
SystemKeyspace.updateViewBuildStatus(ksname, viewName, key.getToken());
prevToken = token;
}
}
}
lastToken = null;
}
}
if (!isStopped) {
SystemKeyspace.finishViewBuildStatus(ksname, viewName);
updateDistributed(ksname, viewName, localHostId);
}
} catch (Exception e) {
ScheduledExecutors.nonPeriodicTasks.schedule(() -> CompactionManager.instance.submitViewBuilder(this), 5, TimeUnit.MINUTES);
logger.warn("Materialized View failed to complete, sleeping 5 minutes before restarting", e);
}
}
use of org.apache.cassandra.dht.Token in project cassandra by apache.
the class Gossiper method assassinateEndpoint.
/**
* Do not call this method unless you know what you are doing.
* It will try extremely hard to obliterate any endpoint from the ring,
* even if it does not know about it.
*
* @param address
* @throws UnknownHostException
*/
public void assassinateEndpoint(String address) throws UnknownHostException {
InetAddress endpoint = InetAddress.getByName(address);
EndpointState epState = endpointStateMap.get(endpoint);
Collection<Token> tokens = null;
logger.warn("Assassinating {} via gossip", endpoint);
if (epState == null) {
epState = new EndpointState(new HeartBeatState((int) ((System.currentTimeMillis() + 60000) / 1000), 9999));
} else {
int generation = epState.getHeartBeatState().getGeneration();
int heartbeat = epState.getHeartBeatState().getHeartBeatVersion();
logger.info("Sleeping for {}ms to ensure {} does not change", StorageService.RING_DELAY, endpoint);
Uninterruptibles.sleepUninterruptibly(StorageService.RING_DELAY, TimeUnit.MILLISECONDS);
// make sure it did not change
EndpointState newState = endpointStateMap.get(endpoint);
if (newState == null)
logger.warn("Endpoint {} disappeared while trying to assassinate, continuing anyway", endpoint);
else if (newState.getHeartBeatState().getGeneration() != generation)
throw new RuntimeException("Endpoint still alive: " + endpoint + " generation changed while trying to assassinate it");
else if (newState.getHeartBeatState().getHeartBeatVersion() != heartbeat)
throw new RuntimeException("Endpoint still alive: " + endpoint + " heartbeat changed while trying to assassinate it");
// make sure we don't evict it too soon
epState.updateTimestamp();
epState.getHeartBeatState().forceNewerGenerationUnsafe();
}
try {
tokens = StorageService.instance.getTokenMetadata().getTokens(endpoint);
} catch (Throwable th) {
JVMStabilityInspector.inspectThrowable(th);
// TODO this is broken
logger.warn("Unable to calculate tokens for {}. Will use a random one", address);
tokens = Collections.singletonList(StorageService.instance.getTokenMetadata().partitioner.getRandomToken());
}
// do not pass go, do not collect 200 dollars, just gtfo
epState.addApplicationState(ApplicationState.STATUS, StorageService.instance.valueFactory.left(tokens, computeExpireTime()));
handleMajorStateChange(endpoint, epState);
Uninterruptibles.sleepUninterruptibly(intervalInMillis * 4, TimeUnit.MILLISECONDS);
logger.warn("Finished assassinating {}", endpoint);
}
Aggregations