Search in sources :

Example 6 with Range

use of org.apache.cassandra.dht.Range in project cassandra by apache.

the class CompactionManager method createMerkleTrees.

private static MerkleTrees createMerkleTrees(Iterable<SSTableReader> sstables, Collection<Range<Token>> ranges, ColumnFamilyStore cfs) {
    MerkleTrees tree = new MerkleTrees(cfs.getPartitioner());
    long allPartitions = 0;
    Map<Range<Token>, Long> rangePartitionCounts = Maps.newHashMapWithExpectedSize(ranges.size());
    for (Range<Token> range : ranges) {
        long numPartitions = 0;
        for (SSTableReader sstable : sstables) numPartitions += sstable.estimatedKeysForRanges(Collections.singleton(range));
        rangePartitionCounts.put(range, numPartitions);
        allPartitions += numPartitions;
    }
    for (Range<Token> range : ranges) {
        long numPartitions = rangePartitionCounts.get(range);
        double rangeOwningRatio = allPartitions > 0 ? (double) numPartitions / allPartitions : 0;
        // determine max tree depth proportional to range size to avoid blowing up memory with multiple tress,
        // capping at 20 to prevent large tree (CASSANDRA-11390)
        int maxDepth = rangeOwningRatio > 0 ? (int) Math.floor(20 - Math.log(1 / rangeOwningRatio) / Math.log(2)) : 0;
        // determine tree depth from number of partitions, capping at max tree depth (CASSANDRA-5263)
        int depth = numPartitions > 0 ? (int) Math.min(Math.ceil(Math.log(numPartitions) / Math.log(2)), maxDepth) : 0;
        tree.addMerkleTree((int) Math.pow(2, depth), range);
    }
    if (logger.isDebugEnabled()) {
        // MT serialize may take time
        logger.debug("Created {} merkle trees with merkle trees size {}, {} partitions, {} bytes", tree.ranges().size(), tree.size(), allPartitions, MerkleTrees.serializer.serializedSize(tree, 0));
    }
    return tree;
}
Also used : SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) Token(org.apache.cassandra.dht.Token) Range(org.apache.cassandra.dht.Range)

Example 7 with Range

use of org.apache.cassandra.dht.Range in project cassandra by apache.

the class CompactionManager method antiCompactGroup.

private int antiCompactGroup(ColumnFamilyStore cfs, Collection<Range<Token>> ranges, LifecycleTransaction anticompactionGroup, long repairedAt, UUID pendingRepair) {
    long groupMaxDataAge = -1;
    for (Iterator<SSTableReader> i = anticompactionGroup.originals().iterator(); i.hasNext(); ) {
        SSTableReader sstable = i.next();
        if (groupMaxDataAge < sstable.maxDataAge)
            groupMaxDataAge = sstable.maxDataAge;
    }
    if (anticompactionGroup.originals().size() == 0) {
        logger.info("No valid anticompactions for this group, All sstables were compacted and are no longer available");
        return 0;
    }
    logger.info("Anticompacting {}", anticompactionGroup);
    Set<SSTableReader> sstableAsSet = anticompactionGroup.originals();
    File destination = cfs.getDirectories().getWriteableLocationAsFile(cfs.getExpectedCompactedFileSize(sstableAsSet, OperationType.ANTICOMPACTION));
    long repairedKeyCount = 0;
    long unrepairedKeyCount = 0;
    int nowInSec = FBUtilities.nowInSeconds();
    CompactionStrategyManager strategy = cfs.getCompactionStrategyManager();
    try (SSTableRewriter repairedSSTableWriter = SSTableRewriter.constructWithoutEarlyOpening(anticompactionGroup, false, groupMaxDataAge);
        SSTableRewriter unRepairedSSTableWriter = SSTableRewriter.constructWithoutEarlyOpening(anticompactionGroup, false, groupMaxDataAge);
        AbstractCompactionStrategy.ScannerList scanners = strategy.getScanners(anticompactionGroup.originals());
        CompactionController controller = new CompactionController(cfs, sstableAsSet, getDefaultGcBefore(cfs, nowInSec));
        CompactionIterator ci = new CompactionIterator(OperationType.ANTICOMPACTION, scanners.scanners, controller, nowInSec, UUIDGen.getTimeUUID(), metrics)) {
        int expectedBloomFilterSize = Math.max(cfs.metadata().params.minIndexInterval, (int) (SSTableReader.getApproximateKeyCount(sstableAsSet)));
        repairedSSTableWriter.switchWriter(CompactionManager.createWriterForAntiCompaction(cfs, destination, expectedBloomFilterSize, repairedAt, pendingRepair, sstableAsSet, anticompactionGroup));
        unRepairedSSTableWriter.switchWriter(CompactionManager.createWriterForAntiCompaction(cfs, destination, expectedBloomFilterSize, ActiveRepairService.UNREPAIRED_SSTABLE, null, sstableAsSet, anticompactionGroup));
        Range.OrderedRangeContainmentChecker containmentChecker = new Range.OrderedRangeContainmentChecker(ranges);
        while (ci.hasNext()) {
            try (UnfilteredRowIterator partition = ci.next()) {
                // if current range from sstable is repaired, save it into the new repaired sstable
                if (containmentChecker.contains(partition.partitionKey().getToken())) {
                    repairedSSTableWriter.append(partition);
                    repairedKeyCount++;
                } else // otherwise save into the new 'non-repaired' table
                {
                    unRepairedSSTableWriter.append(partition);
                    unrepairedKeyCount++;
                }
            }
        }
        List<SSTableReader> anticompactedSSTables = new ArrayList<>();
        // since both writers are operating over the same Transaction, we cannot use the convenience Transactional.finish() method,
        // as on the second finish() we would prepareToCommit() on a Transaction that has already been committed, which is forbidden by the API
        // (since it indicates misuse). We call permitRedundantTransitions so that calls that transition to a state already occupied are permitted.
        anticompactionGroup.permitRedundantTransitions();
        repairedSSTableWriter.setRepairedAt(repairedAt).prepareToCommit();
        unRepairedSSTableWriter.prepareToCommit();
        anticompactedSSTables.addAll(repairedSSTableWriter.finished());
        anticompactedSSTables.addAll(unRepairedSSTableWriter.finished());
        repairedSSTableWriter.commit();
        unRepairedSSTableWriter.commit();
        logger.trace("Repaired {} keys out of {} for {}/{} in {}", repairedKeyCount, repairedKeyCount + unrepairedKeyCount, cfs.keyspace.getName(), cfs.getTableName(), anticompactionGroup);
        return anticompactedSSTables.size();
    } catch (Throwable e) {
        JVMStabilityInspector.inspectThrowable(e);
        logger.error("Error anticompacting " + anticompactionGroup, e);
    }
    return 0;
}
Also used : UnfilteredRowIterator(org.apache.cassandra.db.rows.UnfilteredRowIterator) SSTableRewriter(org.apache.cassandra.io.sstable.SSTableRewriter) Range(org.apache.cassandra.dht.Range) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) File(java.io.File)

Example 8 with Range

use of org.apache.cassandra.dht.Range in project cassandra by apache.

the class ViewBuilder method run.

public void run() {
    logger.trace("Running view builder for {}.{}", baseCfs.metadata.keyspace, view.name);
    UUID localHostId = SystemKeyspace.getLocalHostId();
    String ksname = baseCfs.metadata.keyspace, viewName = view.name;
    if (SystemKeyspace.isViewBuilt(ksname, viewName)) {
        if (!SystemKeyspace.isViewStatusReplicated(ksname, viewName))
            updateDistributed(ksname, viewName, localHostId);
        return;
    }
    Iterable<Range<Token>> ranges = StorageService.instance.getLocalRanges(baseCfs.metadata.keyspace);
    final Pair<Integer, Token> buildStatus = SystemKeyspace.getViewBuildStatus(ksname, viewName);
    Token lastToken;
    Function<org.apache.cassandra.db.lifecycle.View, Iterable<SSTableReader>> function;
    if (buildStatus == null) {
        baseCfs.forceBlockingFlush();
        function = org.apache.cassandra.db.lifecycle.View.selectFunction(SSTableSet.CANONICAL);
        int generation = Integer.MIN_VALUE;
        try (Refs<SSTableReader> temp = baseCfs.selectAndReference(function).refs) {
            for (SSTableReader reader : temp) {
                generation = Math.max(reader.descriptor.generation, generation);
            }
        }
        SystemKeyspace.beginViewBuild(ksname, viewName, generation);
        lastToken = null;
    } else {
        function = new Function<org.apache.cassandra.db.lifecycle.View, Iterable<SSTableReader>>() {

            @Nullable
            public Iterable<SSTableReader> apply(org.apache.cassandra.db.lifecycle.View view) {
                Iterable<SSTableReader> readers = org.apache.cassandra.db.lifecycle.View.selectFunction(SSTableSet.CANONICAL).apply(view);
                if (readers != null)
                    return Iterables.filter(readers, ssTableReader -> ssTableReader.descriptor.generation <= buildStatus.left);
                return null;
            }
        };
        lastToken = buildStatus.right;
    }
    prevToken = lastToken;
    try (Refs<SSTableReader> sstables = baseCfs.selectAndReference(function).refs;
        ReducingKeyIterator iter = new ReducingKeyIterator(sstables)) {
        SystemDistributedKeyspace.startViewBuild(ksname, viewName, localHostId);
        while (!isStopped && iter.hasNext()) {
            DecoratedKey key = iter.next();
            Token token = key.getToken();
            if (lastToken == null || lastToken.compareTo(token) < 0) {
                for (Range<Token> range : ranges) {
                    if (range.contains(token)) {
                        buildKey(key);
                        if (prevToken == null || prevToken.compareTo(token) != 0) {
                            SystemKeyspace.updateViewBuildStatus(ksname, viewName, key.getToken());
                            prevToken = token;
                        }
                    }
                }
                lastToken = null;
            }
        }
        if (!isStopped) {
            SystemKeyspace.finishViewBuildStatus(ksname, viewName);
            updateDistributed(ksname, viewName, localHostId);
        }
    } catch (Exception e) {
        ScheduledExecutors.nonPeriodicTasks.schedule(() -> CompactionManager.instance.submitViewBuilder(this), 5, TimeUnit.MINUTES);
        logger.warn("Materialized View failed to complete, sleeping 5 minutes before restarting", e);
    }
}
Also used : Token(org.apache.cassandra.dht.Token) Range(org.apache.cassandra.dht.Range) ReducingKeyIterator(org.apache.cassandra.io.sstable.ReducingKeyIterator) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) UUID(java.util.UUID) org.apache.cassandra.db(org.apache.cassandra.db) Nullable(javax.annotation.Nullable)

Example 9 with Range

use of org.apache.cassandra.dht.Range in project cassandra by apache.

the class RepairRunnable method runMayThrow.

protected void runMayThrow() throws Exception {
    final TraceState traceState;
    final UUID parentSession = UUIDGen.getTimeUUID();
    final String tag = "repair:" + cmd;
    final AtomicInteger progress = new AtomicInteger();
    // get valid column families, calculate neighbors, validation, prepare for repair + number of ranges to repair
    final int totalProgress = 4 + options.getRanges().size();
    String[] columnFamilies = options.getColumnFamilies().toArray(new String[options.getColumnFamilies().size()]);
    Iterable<ColumnFamilyStore> validColumnFamilies;
    try {
        validColumnFamilies = storageService.getValidColumnFamilies(false, false, keyspace, columnFamilies);
        progress.incrementAndGet();
    } catch (IllegalArgumentException e) {
        logger.error("Repair failed:", e);
        fireErrorAndComplete(tag, progress.get(), totalProgress, e.getMessage());
        return;
    }
    final long startTime = System.currentTimeMillis();
    String message = String.format("Starting repair command #%d (%s), repairing keyspace %s with %s", cmd, parentSession, keyspace, options);
    logger.info(message);
    if (options.isTraced()) {
        StringBuilder cfsb = new StringBuilder();
        for (ColumnFamilyStore cfs : validColumnFamilies) cfsb.append(", ").append(cfs.keyspace.getName()).append(".").append(cfs.name);
        UUID sessionId = Tracing.instance.newSession(Tracing.TraceType.REPAIR);
        traceState = Tracing.instance.begin("repair", ImmutableMap.of("keyspace", keyspace, "columnFamilies", cfsb.substring(2)));
        message = message + " tracing with " + sessionId;
        fireProgressEvent(tag, new ProgressEvent(ProgressEventType.START, 0, 100, message));
        Tracing.traceRepair(message);
        traceState.enableActivityNotification(tag);
        for (ProgressListener listener : listeners) traceState.addProgressListener(listener);
        Thread queryThread = createQueryThread(cmd, sessionId);
        queryThread.setName("RepairTracePolling");
        queryThread.start();
    } else {
        fireProgressEvent(tag, new ProgressEvent(ProgressEventType.START, 0, 100, message));
        traceState = null;
    }
    final Set<InetAddress> allNeighbors = new HashSet<>();
    List<Pair<Set<InetAddress>, ? extends Collection<Range<Token>>>> commonRanges = new ArrayList<>();
    //pre-calculate output of getLocalRanges and pass it to getNeighbors to increase performance and prevent
    //calculation multiple times
    Collection<Range<Token>> keyspaceLocalRanges = storageService.getLocalRanges(keyspace);
    try {
        for (Range<Token> range : options.getRanges()) {
            Set<InetAddress> neighbors = ActiveRepairService.getNeighbors(keyspace, keyspaceLocalRanges, range, options.getDataCenters(), options.getHosts());
            addRangeToNeighbors(commonRanges, range, neighbors);
            allNeighbors.addAll(neighbors);
        }
        progress.incrementAndGet();
    } catch (IllegalArgumentException e) {
        logger.error("Repair failed:", e);
        fireErrorAndComplete(tag, progress.get(), totalProgress, e.getMessage());
        return;
    }
    // Validate columnfamilies
    List<ColumnFamilyStore> columnFamilyStores = new ArrayList<>();
    try {
        Iterables.addAll(columnFamilyStores, validColumnFamilies);
        progress.incrementAndGet();
    } catch (IllegalArgumentException e) {
        fireErrorAndComplete(tag, progress.get(), totalProgress, e.getMessage());
        return;
    }
    String[] cfnames = new String[columnFamilyStores.size()];
    for (int i = 0; i < columnFamilyStores.size(); i++) {
        cfnames[i] = columnFamilyStores.get(i).name;
    }
    SystemDistributedKeyspace.startParentRepair(parentSession, keyspace, cfnames, options);
    long repairedAt;
    try {
        ActiveRepairService.instance.prepareForRepair(parentSession, FBUtilities.getBroadcastAddress(), allNeighbors, options, columnFamilyStores);
        repairedAt = ActiveRepairService.instance.getParentRepairSession(parentSession).getRepairedAt();
        progress.incrementAndGet();
    } catch (Throwable t) {
        SystemDistributedKeyspace.failParentRepair(parentSession, t);
        fireErrorAndComplete(tag, progress.get(), totalProgress, t.getMessage());
        return;
    }
    if (options.isIncremental()) {
        consistentRepair(parentSession, repairedAt, startTime, traceState, allNeighbors, commonRanges, cfnames);
    } else {
        normalRepair(parentSession, startTime, traceState, allNeighbors, commonRanges, cfnames);
    }
}
Also used : Token(org.apache.cassandra.dht.Token) ProgressEvent(org.apache.cassandra.utils.progress.ProgressEvent) Pair(org.apache.cassandra.utils.Pair) TraceState(org.apache.cassandra.tracing.TraceState) Range(org.apache.cassandra.dht.Range) ProgressListener(org.apache.cassandra.utils.progress.ProgressListener) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) InetAddress(java.net.InetAddress)

Example 10 with Range

use of org.apache.cassandra.dht.Range in project cassandra by apache.

the class PendingRangeMaps method pendingEndpointsFor.

public Collection<InetAddress> pendingEndpointsFor(Token token) {
    Set<InetAddress> endpoints = new HashSet<>();
    Range searchRange = new Range(token, token);
    // search for non-wrap-around maps
    NavigableMap<Range<Token>, List<InetAddress>> ascendingTailMap = ascendingMap.tailMap(searchRange, true);
    NavigableMap<Range<Token>, List<InetAddress>> descendingTailMap = descendingMap.tailMap(searchRange, false);
    // add intersections of two maps
    if (ascendingTailMap.size() < descendingTailMap.size()) {
        addIntersections(endpoints, ascendingTailMap, descendingTailMap);
    } else {
        addIntersections(endpoints, descendingTailMap, ascendingTailMap);
    }
    // search for wrap-around sets
    ascendingTailMap = ascendingMapForWrapAround.tailMap(searchRange, true);
    descendingTailMap = descendingMapForWrapAround.tailMap(searchRange, false);
    // add them since they are all necessary.
    for (Map.Entry<Range<Token>, List<InetAddress>> entry : ascendingTailMap.entrySet()) {
        endpoints.addAll(entry.getValue());
    }
    for (Map.Entry<Range<Token>, List<InetAddress>> entry : descendingTailMap.entrySet()) {
        endpoints.addAll(entry.getValue());
    }
    return endpoints;
}
Also used : Range(org.apache.cassandra.dht.Range) InetAddress(java.net.InetAddress)

Aggregations

Range (org.apache.cassandra.dht.Range)120 Token (org.apache.cassandra.dht.Token)74 Test (org.junit.Test)63 InetAddress (java.net.InetAddress)35 SSTableReader (org.apache.cassandra.io.sstable.format.SSTableReader)28 TokenMetadata (org.apache.cassandra.locator.TokenMetadata)17 TreeRange (org.apache.cassandra.utils.MerkleTree.TreeRange)17 IPartitioner (org.apache.cassandra.dht.IPartitioner)14 UUID (java.util.UUID)12 ColumnFamilyStore (org.apache.cassandra.db.ColumnFamilyStore)12 StringToken (org.apache.cassandra.dht.OrderPreservingPartitioner.StringToken)9 IOException (java.io.IOException)8 ArrayList (java.util.ArrayList)8 LifecycleTransaction (org.apache.cassandra.db.lifecycle.LifecycleTransaction)8 BigIntegerToken (org.apache.cassandra.dht.RandomPartitioner.BigIntegerToken)8 KeyspaceMetadata (org.apache.cassandra.schema.KeyspaceMetadata)8 LongToken (org.apache.cassandra.dht.Murmur3Partitioner.LongToken)7 BigInteger (java.math.BigInteger)6 HashSet (java.util.HashSet)6 BytesToken (org.apache.cassandra.dht.ByteOrderedPartitioner.BytesToken)6