Search in sources :

Example 91 with SSTableReader

use of org.apache.cassandra.io.sstable.format.SSTableReader in project cassandra by apache.

the class StreamTransferTaskTest method testScheduleTimeout.

@Test
public void testScheduleTimeout() throws Exception {
    InetAddress peer = FBUtilities.getBroadcastAddress();
    StreamSession session = new StreamSession(peer, peer, null, 0, true, false, null);
    ColumnFamilyStore cfs = Keyspace.open(KEYSPACE1).getColumnFamilyStore(CF_STANDARD);
    // create two sstables
    for (int i = 0; i < 2; i++) {
        SchemaLoader.insertData(KEYSPACE1, CF_STANDARD, i, 1);
        cfs.forceBlockingFlush();
    }
    // create streaming task that streams those two sstables
    StreamTransferTask task = new StreamTransferTask(session, cfs.metadata.id);
    for (SSTableReader sstable : cfs.getLiveSSTables()) {
        List<Range<Token>> ranges = new ArrayList<>();
        ranges.add(new Range<>(sstable.first.getToken(), sstable.last.getToken()));
        task.addTransferFile(sstable.selfRef(), 1, sstable.getPositionsForRanges(ranges), 0);
    }
    assertEquals(2, task.getTotalNumberOfFiles());
    // if file sending completes before timeout then the task should be canceled.
    Future f = task.scheduleTimeout(0, 0, TimeUnit.NANOSECONDS);
    f.get();
    // when timeout runs on second file, task should be completed
    f = task.scheduleTimeout(1, 10, TimeUnit.MILLISECONDS);
    task.complete(1);
    try {
        f.get();
        Assert.assertTrue(false);
    } catch (CancellationException ex) {
    }
    assertEquals(StreamSession.State.WAIT_COMPLETE, session.state());
    // when all streaming are done, time out task should not be scheduled.
    assertNull(task.scheduleTimeout(1, 1, TimeUnit.SECONDS));
}
Also used : SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) CancellationException(java.util.concurrent.CancellationException) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) Range(org.apache.cassandra.dht.Range) InetAddress(java.net.InetAddress) Test(org.junit.Test)

Example 92 with SSTableReader

use of org.apache.cassandra.io.sstable.format.SSTableReader in project cassandra by apache.

the class CompactionStress method initCf.

ColumnFamilyStore initCf(StressProfile stressProfile, boolean loadSSTables) {
    generateTokens(stressProfile.seedStr, StorageService.instance.getTokenMetadata(), numTokens);
    CreateTableStatement.RawStatement createStatement = stressProfile.getCreateStatement();
    List<File> dataDirectories = getDataDirectories();
    ColumnFamilyStore cfs = StressCQLSSTableWriter.Builder.createOfflineTable(createStatement, Collections.EMPTY_LIST, dataDirectories);
    if (loadSSTables) {
        Directories.SSTableLister lister = cfs.getDirectories().sstableLister(Directories.OnTxnErr.IGNORE).skipTemporary(true);
        List<SSTableReader> sstables = new ArrayList<>();
        //Offline open sstables
        for (Map.Entry<Descriptor, Set<Component>> entry : lister.list().entrySet()) {
            Set<Component> components = entry.getValue();
            if (!components.contains(Component.DATA))
                continue;
            try {
                SSTableReader sstable = SSTableReader.openNoValidation(entry.getKey(), components, cfs);
                sstables.add(sstable);
            } catch (Exception e) {
                JVMStabilityInspector.inspectThrowable(e);
                System.err.println(String.format("Error Loading %s: %s", entry.getKey(), e.getMessage()));
            }
        }
        cfs.disableAutoCompaction();
        //Register with cfs
        cfs.addSSTables(sstables);
    }
    return cfs;
}
Also used : CreateTableStatement(org.apache.cassandra.cql3.statements.CreateTableStatement) Directories(org.apache.cassandra.db.Directories) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) Descriptor(org.apache.cassandra.io.sstable.Descriptor) DatabaseDescriptor(org.apache.cassandra.config.DatabaseDescriptor) Component(org.apache.cassandra.io.sstable.Component) File(java.io.File)

Example 93 with SSTableReader

use of org.apache.cassandra.io.sstable.format.SSTableReader in project cassandra by apache.

the class AbstractCompactionStrategy method groupSSTablesForAntiCompaction.

/**
     * Method for grouping similar SSTables together, This will be used by
     * anti-compaction to determine which SSTables should be anitcompacted
     * as a group. If a given compaction strategy creates sstables which
     * cannot be merged due to some constraint it must override this method.
     */
public Collection<Collection<SSTableReader>> groupSSTablesForAntiCompaction(Collection<SSTableReader> sstablesToGroup) {
    int groupSize = 2;
    List<SSTableReader> sortedSSTablesToGroup = new ArrayList<>(sstablesToGroup);
    Collections.sort(sortedSSTablesToGroup, SSTableReader.sstableComparator);
    Collection<Collection<SSTableReader>> groupedSSTables = new ArrayList<>();
    Collection<SSTableReader> currGroup = new ArrayList<>();
    for (SSTableReader sstable : sortedSSTablesToGroup) {
        currGroup.add(sstable);
        if (currGroup.size() == groupSize) {
            groupedSSTables.add(currGroup);
            currGroup = new ArrayList<>();
        }
    }
    if (currGroup.size() != 0)
        groupedSSTables.add(currGroup);
    return groupedSSTables;
}
Also used : SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader)

Example 94 with SSTableReader

use of org.apache.cassandra.io.sstable.format.SSTableReader in project cassandra by apache.

the class CompactionController method getPurgeEvaluator.

/**
     * @param key
     * @return a predicate for whether tombstones marked for deletion at the given time for the given partition are
     * purgeable; we calculate this by checking whether the deletion time is less than the min timestamp of all SSTables
     * containing his partition and not participating in the compaction. This means there isn't any data in those
     * sstables that might still need to be suppressed by a tombstone at this timestamp.
     */
public Predicate<Long> getPurgeEvaluator(DecoratedKey key) {
    if (NEVER_PURGE_TOMBSTONES || !compactingRepaired())
        return time -> false;
    overlapIterator.update(key);
    Set<SSTableReader> filteredSSTables = overlapIterator.overlaps();
    Iterable<Memtable> memtables = cfs.getTracker().getView().getAllMemtables();
    long minTimestampSeen = Long.MAX_VALUE;
    boolean hasTimestamp = false;
    for (SSTableReader sstable : filteredSSTables) {
        // we check index file instead.
        if (sstable.getBloomFilter() instanceof AlwaysPresentFilter && sstable.getPosition(key, SSTableReader.Operator.EQ, false) != null || sstable.getBloomFilter().isPresent(key)) {
            minTimestampSeen = Math.min(minTimestampSeen, sstable.getMinTimestamp());
            hasTimestamp = true;
        }
    }
    for (Memtable memtable : memtables) {
        Partition partition = memtable.getPartition(key);
        if (partition != null) {
            minTimestampSeen = Math.min(minTimestampSeen, partition.stats().minTimestamp);
            hasTimestamp = true;
        }
    }
    if (!hasTimestamp)
        return time -> true;
    else {
        final long finalTimestamp = minTimestampSeen;
        return time -> time < finalTimestamp;
    }
}
Also used : java.util(java.util) Iterables(com.google.common.collect.Iterables) Logger(org.slf4j.Logger) OverlapIterator(org.apache.cassandra.utils.OverlapIterator) Predicate(java.util.function.Predicate) TombstoneOption(org.apache.cassandra.schema.CompactionParams.TombstoneOption) LoggerFactory(org.slf4j.LoggerFactory) org.apache.cassandra.db(org.apache.cassandra.db) RateLimiter(com.google.common.util.concurrent.RateLimiter) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) Partition(org.apache.cassandra.db.partitions.Partition) FileDataInput(org.apache.cassandra.io.util.FileDataInput) UnfilteredRowIterator(org.apache.cassandra.db.rows.UnfilteredRowIterator) FileUtils(org.apache.cassandra.io.util.FileUtils) SSTableIntervalTree.buildIntervals(org.apache.cassandra.db.lifecycle.SSTableIntervalTree.buildIntervals) Memtable(org.apache.cassandra.db.Memtable) Predicates(com.google.common.base.Predicates) Refs(org.apache.cassandra.utils.concurrent.Refs) AlwaysPresentFilter(org.apache.cassandra.utils.AlwaysPresentFilter) Partition(org.apache.cassandra.db.partitions.Partition) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) AlwaysPresentFilter(org.apache.cassandra.utils.AlwaysPresentFilter) Memtable(org.apache.cassandra.db.Memtable)

Example 95 with SSTableReader

use of org.apache.cassandra.io.sstable.format.SSTableReader in project cassandra by apache.

the class SinglePartitionReadCommand method queryMemtableAndDiskInternal.

private UnfilteredRowIterator queryMemtableAndDiskInternal(ColumnFamilyStore cfs) {
    /*
         * We have 2 main strategies:
         *   1) We query memtables and sstables simulateneously. This is our most generic strategy and the one we use
         *      unless we have a names filter that we know we can optimize futher.
         *   2) If we have a name filter (so we query specific rows), we can make a bet: that all column for all queried row
         *      will have data in the most recent sstable(s), thus saving us from reading older ones. This does imply we
         *      have a way to guarantee we have all the data for what is queried, which is only possible for name queries
         *      and if we have neither non-frozen collections/UDTs nor counters (indeed, for a non-frozen collection or UDT,
         *      we can't guarantee an older sstable won't have some elements that weren't in the most recent sstables,
         *      and counters are intrinsically a collection of shards and so have the same problem).
         */
    if (clusteringIndexFilter() instanceof ClusteringIndexNamesFilter && !queriesMulticellType())
        return queryMemtableAndSSTablesInTimestampOrder(cfs, (ClusteringIndexNamesFilter) clusteringIndexFilter());
    Tracing.trace("Acquiring sstable references");
    ColumnFamilyStore.ViewFragment view = cfs.select(View.select(SSTableSet.LIVE, partitionKey()));
    List<UnfilteredRowIterator> iterators = new ArrayList<>(Iterables.size(view.memtables) + view.sstables.size());
    ClusteringIndexFilter filter = clusteringIndexFilter();
    long minTimestamp = Long.MAX_VALUE;
    try {
        for (Memtable memtable : view.memtables) {
            Partition partition = memtable.getPartition(partitionKey());
            if (partition == null)
                continue;
            minTimestamp = Math.min(minTimestamp, memtable.getMinTimestamp());
            // 'iter' is added to iterators which is closed on exception, or through the closing of the final merged iterator
            @SuppressWarnings("resource") UnfilteredRowIterator iter = filter.getUnfilteredRowIterator(columnFilter(), partition);
            oldestUnrepairedTombstone = Math.min(oldestUnrepairedTombstone, partition.stats().minLocalDeletionTime);
            iterators.add(iter);
        }
        /*
             * We can't eliminate full sstables based on the timestamp of what we've already read like
             * in collectTimeOrderedData, but we still want to eliminate sstable whose maxTimestamp < mostRecentTombstone
             * we've read. We still rely on the sstable ordering by maxTimestamp since if
             *   maxTimestamp_s1 > maxTimestamp_s0,
             * we're guaranteed that s1 cannot have a row tombstone such that
             *   timestamp(tombstone) > maxTimestamp_s0
             * since we necessarily have
             *   timestamp(tombstone) <= maxTimestamp_s1
             * In other words, iterating in maxTimestamp order allow to do our mostRecentPartitionTombstone elimination
             * in one pass, and minimize the number of sstables for which we read a partition tombstone.
             */
        Collections.sort(view.sstables, SSTableReader.maxTimestampComparator);
        long mostRecentPartitionTombstone = Long.MIN_VALUE;
        int nonIntersectingSSTables = 0;
        List<SSTableReader> skippedSSTablesWithTombstones = null;
        for (SSTableReader sstable : view.sstables) {
            // than the most recent update to this sstable, we can skip it
            if (sstable.getMaxTimestamp() < mostRecentPartitionTombstone)
                break;
            if (!shouldInclude(sstable)) {
                nonIntersectingSSTables++;
                if (sstable.hasTombstones()) {
                    // if sstable has tombstones we need to check after one pass if it can be safely skipped
                    if (skippedSSTablesWithTombstones == null)
                        skippedSSTablesWithTombstones = new ArrayList<>();
                    skippedSSTablesWithTombstones.add(sstable);
                }
                continue;
            }
            minTimestamp = Math.min(minTimestamp, sstable.getMinTimestamp());
            // 'iter' is added to iterators which is closed on exception,
            @SuppressWarnings("resource") UnfilteredRowIteratorWithLowerBound // or through the closing of the final merged iterator
            iter = makeIterator(cfs, sstable);
            if (!sstable.isRepaired())
                oldestUnrepairedTombstone = Math.min(oldestUnrepairedTombstone, sstable.getMinLocalDeletionTime());
            iterators.add(iter);
            mostRecentPartitionTombstone = Math.max(mostRecentPartitionTombstone, iter.partitionLevelDeletion().markedForDeleteAt());
        }
        int includedDueToTombstones = 0;
        // Check for sstables with tombstones that are not expired
        if (skippedSSTablesWithTombstones != null) {
            for (SSTableReader sstable : skippedSSTablesWithTombstones) {
                if (sstable.getMaxTimestamp() <= minTimestamp)
                    continue;
                // 'iter' is added to iterators which is close on exception,
                @SuppressWarnings("resource") UnfilteredRowIteratorWithLowerBound // or through the closing of the final merged iterator
                iter = makeIterator(cfs, sstable);
                if (!sstable.isRepaired())
                    oldestUnrepairedTombstone = Math.min(oldestUnrepairedTombstone, sstable.getMinLocalDeletionTime());
                iterators.add(iter);
                includedDueToTombstones++;
            }
        }
        if (Tracing.isTracing())
            Tracing.trace("Skipped {}/{} non-slice-intersecting sstables, included {} due to tombstones", nonIntersectingSSTables, view.sstables.size(), includedDueToTombstones);
        if (iterators.isEmpty())
            return EmptyIterators.unfilteredRow(cfs.metadata(), partitionKey(), filter.isReversed());
        StorageHook.instance.reportRead(cfs.metadata().id, partitionKey());
        return withSSTablesIterated(iterators, cfs.metric);
    } catch (RuntimeException | Error e) {
        try {
            FBUtilities.closeAll(iterators);
        } catch (Exception suppressed) {
            e.addSuppressed(suppressed);
        }
        throw e;
    }
}
Also used : RequestExecutionException(org.apache.cassandra.exceptions.RequestExecutionException) IOException(java.io.IOException) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader)

Aggregations

SSTableReader (org.apache.cassandra.io.sstable.format.SSTableReader)289 Test (org.junit.Test)159 ColumnFamilyStore (org.apache.cassandra.db.ColumnFamilyStore)91 LifecycleTransaction (org.apache.cassandra.db.lifecycle.LifecycleTransaction)55 Keyspace (org.apache.cassandra.db.Keyspace)49 File (java.io.File)45 UUID (java.util.UUID)28 Range (org.apache.cassandra.dht.Range)28 Directories (org.apache.cassandra.db.Directories)27 Token (org.apache.cassandra.dht.Token)24 RandomAccessFile (java.io.RandomAccessFile)22 AbstractTransactionalTest (org.apache.cassandra.utils.concurrent.AbstractTransactionalTest)20 ArrayList (java.util.ArrayList)18 ByteBuffer (java.nio.ByteBuffer)17 HashSet (java.util.HashSet)16 SchemaLoader.createKeyspace (org.apache.cassandra.SchemaLoader.createKeyspace)16 DecoratedKey (org.apache.cassandra.db.DecoratedKey)16 RowUpdateBuilder (org.apache.cassandra.db.RowUpdateBuilder)16 CompactionController (org.apache.cassandra.db.compaction.CompactionController)14 CompactionIterator (org.apache.cassandra.db.compaction.CompactionIterator)13