Search in sources :

Example 41 with Token

use of org.apache.cassandra.dht.Token in project cassandra by apache.

the class CompactionManager method createMerkleTrees.

private static MerkleTrees createMerkleTrees(Iterable<SSTableReader> sstables, Collection<Range<Token>> ranges, ColumnFamilyStore cfs) {
    MerkleTrees tree = new MerkleTrees(cfs.getPartitioner());
    long allPartitions = 0;
    Map<Range<Token>, Long> rangePartitionCounts = Maps.newHashMapWithExpectedSize(ranges.size());
    for (Range<Token> range : ranges) {
        long numPartitions = 0;
        for (SSTableReader sstable : sstables) numPartitions += sstable.estimatedKeysForRanges(Collections.singleton(range));
        rangePartitionCounts.put(range, numPartitions);
        allPartitions += numPartitions;
    }
    for (Range<Token> range : ranges) {
        long numPartitions = rangePartitionCounts.get(range);
        double rangeOwningRatio = allPartitions > 0 ? (double) numPartitions / allPartitions : 0;
        // determine max tree depth proportional to range size to avoid blowing up memory with multiple tress,
        // capping at 20 to prevent large tree (CASSANDRA-11390)
        int maxDepth = rangeOwningRatio > 0 ? (int) Math.floor(20 - Math.log(1 / rangeOwningRatio) / Math.log(2)) : 0;
        // determine tree depth from number of partitions, capping at max tree depth (CASSANDRA-5263)
        int depth = numPartitions > 0 ? (int) Math.min(Math.ceil(Math.log(numPartitions) / Math.log(2)), maxDepth) : 0;
        tree.addMerkleTree((int) Math.pow(2, depth), range);
    }
    if (logger.isDebugEnabled()) {
        // MT serialize may take time
        logger.debug("Created {} merkle trees with merkle trees size {}, {} partitions, {} bytes", tree.ranges().size(), tree.size(), allPartitions, MerkleTrees.serializer.serializedSize(tree, 0));
    }
    return tree;
}
Also used : SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) Token(org.apache.cassandra.dht.Token) Range(org.apache.cassandra.dht.Range)

Example 42 with Token

use of org.apache.cassandra.dht.Token in project cassandra by apache.

the class CompactionManager method sstablesInBounds.

private static Collection<SSTableReader> sstablesInBounds(ColumnFamilyStore cfs, Collection<Range<Token>> tokenRangeCollection) {
    final Set<SSTableReader> sstables = new HashSet<>();
    Iterable<SSTableReader> liveTables = cfs.getTracker().getView().select(SSTableSet.LIVE);
    SSTableIntervalTree tree = SSTableIntervalTree.build(liveTables);
    for (Range<Token> tokenRange : tokenRangeCollection) {
        Iterable<SSTableReader> ssTableReaders = View.sstablesInBounds(tokenRange.left.minKeyBound(), tokenRange.right.maxKeyBound(), tree);
        Iterables.addAll(sstables, ssTableReaders);
    }
    return sstables;
}
Also used : SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) SSTableIntervalTree(org.apache.cassandra.db.lifecycle.SSTableIntervalTree) Token(org.apache.cassandra.dht.Token)

Example 43 with Token

use of org.apache.cassandra.dht.Token in project cassandra by apache.

the class LeveledManifest method overlapping.

private static Set<SSTableReader> overlapping(Collection<SSTableReader> candidates, Iterable<SSTableReader> others) {
    assert !candidates.isEmpty();
    /*
         * Picking each sstable from others that overlap one of the sstable of candidates is not enough
         * because you could have the following situation:
         *   candidates = [ s1(a, c), s2(m, z) ]
         *   others = [ s3(e, g) ]
         * In that case, s2 overlaps none of s1 or s2, but if we compact s1 with s2, the resulting sstable will
         * overlap s3, so we must return s3.
         *
         * Thus, the correct approach is to pick sstables overlapping anything between the first key in all
         * the candidate sstables, and the last.
         */
    Iterator<SSTableReader> iter = candidates.iterator();
    SSTableReader sstable = iter.next();
    Token first = sstable.first.getToken();
    Token last = sstable.last.getToken();
    while (iter.hasNext()) {
        sstable = iter.next();
        first = first.compareTo(sstable.first.getToken()) <= 0 ? first : sstable.first.getToken();
        last = last.compareTo(sstable.last.getToken()) >= 0 ? last : sstable.last.getToken();
    }
    return overlapping(first, last, others);
}
Also used : SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) Token(org.apache.cassandra.dht.Token)

Example 44 with Token

use of org.apache.cassandra.dht.Token in project cassandra by apache.

the class ViewBuilder method run.

public void run() {
    logger.trace("Running view builder for {}.{}", baseCfs.metadata.keyspace, view.name);
    UUID localHostId = SystemKeyspace.getLocalHostId();
    String ksname = baseCfs.metadata.keyspace, viewName = view.name;
    if (SystemKeyspace.isViewBuilt(ksname, viewName)) {
        if (!SystemKeyspace.isViewStatusReplicated(ksname, viewName))
            updateDistributed(ksname, viewName, localHostId);
        return;
    }
    Iterable<Range<Token>> ranges = StorageService.instance.getLocalRanges(baseCfs.metadata.keyspace);
    final Pair<Integer, Token> buildStatus = SystemKeyspace.getViewBuildStatus(ksname, viewName);
    Token lastToken;
    Function<org.apache.cassandra.db.lifecycle.View, Iterable<SSTableReader>> function;
    if (buildStatus == null) {
        baseCfs.forceBlockingFlush();
        function = org.apache.cassandra.db.lifecycle.View.selectFunction(SSTableSet.CANONICAL);
        int generation = Integer.MIN_VALUE;
        try (Refs<SSTableReader> temp = baseCfs.selectAndReference(function).refs) {
            for (SSTableReader reader : temp) {
                generation = Math.max(reader.descriptor.generation, generation);
            }
        }
        SystemKeyspace.beginViewBuild(ksname, viewName, generation);
        lastToken = null;
    } else {
        function = new Function<org.apache.cassandra.db.lifecycle.View, Iterable<SSTableReader>>() {

            @Nullable
            public Iterable<SSTableReader> apply(org.apache.cassandra.db.lifecycle.View view) {
                Iterable<SSTableReader> readers = org.apache.cassandra.db.lifecycle.View.selectFunction(SSTableSet.CANONICAL).apply(view);
                if (readers != null)
                    return Iterables.filter(readers, ssTableReader -> ssTableReader.descriptor.generation <= buildStatus.left);
                return null;
            }
        };
        lastToken = buildStatus.right;
    }
    prevToken = lastToken;
    try (Refs<SSTableReader> sstables = baseCfs.selectAndReference(function).refs;
        ReducingKeyIterator iter = new ReducingKeyIterator(sstables)) {
        SystemDistributedKeyspace.startViewBuild(ksname, viewName, localHostId);
        while (!isStopped && iter.hasNext()) {
            DecoratedKey key = iter.next();
            Token token = key.getToken();
            if (lastToken == null || lastToken.compareTo(token) < 0) {
                for (Range<Token> range : ranges) {
                    if (range.contains(token)) {
                        buildKey(key);
                        if (prevToken == null || prevToken.compareTo(token) != 0) {
                            SystemKeyspace.updateViewBuildStatus(ksname, viewName, key.getToken());
                            prevToken = token;
                        }
                    }
                }
                lastToken = null;
            }
        }
        if (!isStopped) {
            SystemKeyspace.finishViewBuildStatus(ksname, viewName);
            updateDistributed(ksname, viewName, localHostId);
        }
    } catch (Exception e) {
        ScheduledExecutors.nonPeriodicTasks.schedule(() -> CompactionManager.instance.submitViewBuilder(this), 5, TimeUnit.MINUTES);
        logger.warn("Materialized View failed to complete, sleeping 5 minutes before restarting", e);
    }
}
Also used : Token(org.apache.cassandra.dht.Token) Range(org.apache.cassandra.dht.Range) ReducingKeyIterator(org.apache.cassandra.io.sstable.ReducingKeyIterator) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) UUID(java.util.UUID) org.apache.cassandra.db(org.apache.cassandra.db) Nullable(javax.annotation.Nullable)

Example 45 with Token

use of org.apache.cassandra.dht.Token in project cassandra by apache.

the class Gossiper method assassinateEndpoint.

/**
     * Do not call this method unless you know what you are doing.
     * It will try extremely hard to obliterate any endpoint from the ring,
     * even if it does not know about it.
     *
     * @param address
     * @throws UnknownHostException
     */
public void assassinateEndpoint(String address) throws UnknownHostException {
    InetAddress endpoint = InetAddress.getByName(address);
    EndpointState epState = endpointStateMap.get(endpoint);
    Collection<Token> tokens = null;
    logger.warn("Assassinating {} via gossip", endpoint);
    if (epState == null) {
        epState = new EndpointState(new HeartBeatState((int) ((System.currentTimeMillis() + 60000) / 1000), 9999));
    } else {
        int generation = epState.getHeartBeatState().getGeneration();
        int heartbeat = epState.getHeartBeatState().getHeartBeatVersion();
        logger.info("Sleeping for {}ms to ensure {} does not change", StorageService.RING_DELAY, endpoint);
        Uninterruptibles.sleepUninterruptibly(StorageService.RING_DELAY, TimeUnit.MILLISECONDS);
        // make sure it did not change
        EndpointState newState = endpointStateMap.get(endpoint);
        if (newState == null)
            logger.warn("Endpoint {} disappeared while trying to assassinate, continuing anyway", endpoint);
        else if (newState.getHeartBeatState().getGeneration() != generation)
            throw new RuntimeException("Endpoint still alive: " + endpoint + " generation changed while trying to assassinate it");
        else if (newState.getHeartBeatState().getHeartBeatVersion() != heartbeat)
            throw new RuntimeException("Endpoint still alive: " + endpoint + " heartbeat changed while trying to assassinate it");
        // make sure we don't evict it too soon
        epState.updateTimestamp();
        epState.getHeartBeatState().forceNewerGenerationUnsafe();
    }
    try {
        tokens = StorageService.instance.getTokenMetadata().getTokens(endpoint);
    } catch (Throwable th) {
        JVMStabilityInspector.inspectThrowable(th);
        // TODO this is broken
        logger.warn("Unable to calculate tokens for {}.  Will use a random one", address);
        tokens = Collections.singletonList(StorageService.instance.getTokenMetadata().partitioner.getRandomToken());
    }
    // do not pass go, do not collect 200 dollars, just gtfo
    epState.addApplicationState(ApplicationState.STATUS, StorageService.instance.valueFactory.left(tokens, computeExpireTime()));
    handleMajorStateChange(endpoint, epState);
    Uninterruptibles.sleepUninterruptibly(intervalInMillis * 4, TimeUnit.MILLISECONDS);
    logger.warn("Finished assassinating {}", endpoint);
}
Also used : Token(org.apache.cassandra.dht.Token) InetAddress(java.net.InetAddress)

Aggregations

Token (org.apache.cassandra.dht.Token)173 Range (org.apache.cassandra.dht.Range)73 InetAddress (java.net.InetAddress)66 Test (org.junit.Test)65 BigIntegerToken (org.apache.cassandra.dht.RandomPartitioner.BigIntegerToken)27 TokenMetadata (org.apache.cassandra.locator.TokenMetadata)27 IPartitioner (org.apache.cassandra.dht.IPartitioner)26 SSTableReader (org.apache.cassandra.io.sstable.format.SSTableReader)23 ArrayList (java.util.ArrayList)16 UUID (java.util.UUID)16 VersionedValue (org.apache.cassandra.gms.VersionedValue)15 StringToken (org.apache.cassandra.dht.OrderPreservingPartitioner.StringToken)14 ColumnFamilyStore (org.apache.cassandra.db.ColumnFamilyStore)9 IOException (java.io.IOException)8 ByteBuffer (java.nio.ByteBuffer)8 BytesToken (org.apache.cassandra.dht.ByteOrderedPartitioner.BytesToken)8 AbstractReplicationStrategy (org.apache.cassandra.locator.AbstractReplicationStrategy)8 Set (java.util.Set)7 LongToken (org.apache.cassandra.dht.Murmur3Partitioner.LongToken)7 HashSet (java.util.HashSet)6