Search in sources :

Example 1 with Memtable

use of org.apache.cassandra.db.Memtable in project cassandra by apache.

the class TrackerTest method testMemtableReplacement.

@Test
public void testMemtableReplacement() {
    boolean backups = DatabaseDescriptor.isIncrementalBackupsEnabled();
    DatabaseDescriptor.setIncrementalBackupsEnabled(false);
    ColumnFamilyStore cfs = MockSchema.newCFS(metadata -> metadata.caching(CachingParams.CACHE_KEYS));
    MockListener listener = new MockListener(false);
    Tracker tracker = cfs.getTracker();
    tracker.subscribe(listener);
    Memtable prev1 = tracker.switchMemtable(true, new Memtable(new AtomicReference<>(CommitLog.instance.getCurrentPosition()), cfs));
    OpOrder.Group write1 = cfs.keyspace.writeOrder.getCurrent();
    OpOrder.Barrier barrier1 = cfs.keyspace.writeOrder.newBarrier();
    prev1.setDiscarding(barrier1, new AtomicReference<>(CommitLog.instance.getCurrentPosition()));
    barrier1.issue();
    Memtable prev2 = tracker.switchMemtable(false, new Memtable(new AtomicReference<>(CommitLog.instance.getCurrentPosition()), cfs));
    OpOrder.Group write2 = cfs.keyspace.writeOrder.getCurrent();
    OpOrder.Barrier barrier2 = cfs.keyspace.writeOrder.newBarrier();
    prev2.setDiscarding(barrier2, new AtomicReference<>(CommitLog.instance.getCurrentPosition()));
    barrier2.issue();
    Memtable cur = tracker.getView().getCurrentMemtable();
    OpOrder.Group writecur = cfs.keyspace.writeOrder.getCurrent();
    Assert.assertEquals(prev1, tracker.getMemtableFor(write1, CommitLogPosition.NONE));
    Assert.assertEquals(prev2, tracker.getMemtableFor(write2, CommitLogPosition.NONE));
    Assert.assertEquals(cur, tracker.getMemtableFor(writecur, CommitLogPosition.NONE));
    Assert.assertEquals(2, listener.received.size());
    Assert.assertTrue(listener.received.get(0) instanceof MemtableRenewedNotification);
    Assert.assertTrue(listener.received.get(1) instanceof MemtableSwitchedNotification);
    listener.received.clear();
    tracker.markFlushing(prev2);
    Assert.assertEquals(1, tracker.getView().flushingMemtables.size());
    Assert.assertTrue(tracker.getView().flushingMemtables.contains(prev2));
    tracker.markFlushing(prev1);
    Assert.assertTrue(tracker.getView().flushingMemtables.contains(prev1));
    Assert.assertEquals(2, tracker.getView().flushingMemtables.size());
    tracker.replaceFlushed(prev1, Collections.emptyList());
    Assert.assertEquals(1, tracker.getView().flushingMemtables.size());
    Assert.assertTrue(tracker.getView().flushingMemtables.contains(prev2));
    SSTableReader reader = MockSchema.sstable(0, 10, false, cfs);
    tracker.replaceFlushed(prev2, singleton(reader));
    Assert.assertEquals(1, tracker.getView().sstables.size());
    Assert.assertEquals(2, listener.received.size());
    Assert.assertEquals(prev2, ((MemtableDiscardedNotification) listener.received.get(0)).memtable);
    Assert.assertEquals(singleton(reader), ((SSTableAddedNotification) listener.received.get(1)).added);
    Assert.assertEquals(Optional.of(prev2), ((SSTableAddedNotification) listener.received.get(1)).memtable());
    listener.received.clear();
    Assert.assertTrue(reader.isKeyCacheEnabled());
    Assert.assertEquals(10, cfs.metric.liveDiskSpaceUsed.getCount());
    // test invalidated CFS
    cfs = MockSchema.newCFS();
    tracker = cfs.getTracker();
    listener = new MockListener(false);
    tracker.subscribe(listener);
    prev1 = tracker.switchMemtable(false, new Memtable(new AtomicReference<>(CommitLog.instance.getCurrentPosition()), cfs));
    tracker.markFlushing(prev1);
    reader = MockSchema.sstable(0, 10, true, cfs);
    cfs.invalidate(false);
    tracker.replaceFlushed(prev1, singleton(reader));
    Assert.assertEquals(0, tracker.getView().sstables.size());
    Assert.assertEquals(0, tracker.getView().flushingMemtables.size());
    Assert.assertEquals(0, cfs.metric.liveDiskSpaceUsed.getCount());
    Assert.assertEquals(5, listener.received.size());
    Assert.assertEquals(prev1, ((MemtableSwitchedNotification) listener.received.get(0)).memtable);
    Assert.assertEquals(prev1, ((MemtableDiscardedNotification) listener.received.get(1)).memtable);
    Assert.assertEquals(singleton(reader), ((SSTableAddedNotification) listener.received.get(2)).added);
    Assert.assertEquals(Optional.of(prev1), ((SSTableAddedNotification) listener.received.get(2)).memtable());
    Assert.assertTrue(listener.received.get(3) instanceof SSTableDeletingNotification);
    Assert.assertEquals(1, ((SSTableListChangedNotification) listener.received.get(4)).removed.size());
    DatabaseDescriptor.setIncrementalBackupsEnabled(backups);
}
Also used : AtomicReference(java.util.concurrent.atomic.AtomicReference) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) OpOrder(org.apache.cassandra.utils.concurrent.OpOrder) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) Memtable(org.apache.cassandra.db.Memtable) Test(org.junit.Test)

Example 2 with Memtable

use of org.apache.cassandra.db.Memtable in project cassandra by apache.

the class ReadTest method setup.

@Setup(Level.Trial)
public void setup() throws Throwable {
    rand = new Random(1);
    CQLTester.setUpClass();
    CQLTester.prepareServer();
    System.err.println("setupClass done.");
    keyspace = createKeyspace("CREATE KEYSPACE %s with replication = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 } and durable_writes = false");
    table = createTable(keyspace, "CREATE TABLE %s ( userid bigint, picid bigint, commentid bigint, PRIMARY KEY(userid, picid)) with compression = {'enabled': false}");
    execute("use " + keyspace + ";");
    switch(async) {
        case SERIAL_NET:
        case PARALLEL_NET:
            CQLTester.requireNetwork();
            executeNet(getDefaultVersion(), "use " + keyspace + ";");
    }
    String writeStatement = "INSERT INTO " + table + "(userid,picid,commentid)VALUES(?,?,?)";
    System.err.println("Prepared, batch " + BATCH + " flush " + flush);
    System.err.println("Disk access mode " + DatabaseDescriptor.getDiskAccessMode() + " index " + DatabaseDescriptor.getIndexAccessMode());
    cfs = Keyspace.open(keyspace).getColumnFamilyStore(table);
    cfs.disableAutoCompaction();
    cfs.forceBlockingFlush();
    // Warm up
    System.err.println("Writing " + count);
    long i;
    for (i = 0; i <= count - BATCH; i += BATCH) performWrite(writeStatement, i, BATCH);
    if (i < count)
        performWrite(writeStatement, i, count - i);
    Memtable memtable = cfs.getTracker().getView().getCurrentMemtable();
    System.err.format("Memtable in %s mode: %d ops, %s serialized bytes, %s (%.0f%%) on heap, %s (%.0f%%) off-heap\n", DatabaseDescriptor.getMemtableAllocationType(), memtable.getOperations(), FBUtilities.prettyPrintMemory(memtable.getLiveDataSize()), FBUtilities.prettyPrintMemory(memtable.getAllocator().onHeap().owns()), 100 * memtable.getAllocator().onHeap().ownershipRatio(), FBUtilities.prettyPrintMemory(memtable.getAllocator().offHeap().owns()), 100 * memtable.getAllocator().offHeap().ownershipRatio());
    switch(flush) {
        case YES:
            cfs.forceBlockingFlush();
            break;
        case INMEM:
            if (!cfs.getLiveSSTables().isEmpty())
                throw new AssertionError("SSTables created for INMEM test.");
        default:
    }
    // Needed to stabilize sstable count for off-cache sized tests (e.g. count = 100_000_000)
    while (cfs.getLiveSSTables().size() >= 15) {
        cfs.enableAutoCompaction(true);
        cfs.disableAutoCompaction();
    }
}
Also used : Random(java.util.Random) Memtable(org.apache.cassandra.db.Memtable)

Example 3 with Memtable

use of org.apache.cassandra.db.Memtable in project cassandra by apache.

the class Ballots method latestBallotsFromPaxosMemtable.

/**
 * Load the current paxos state for the table and key
 */
private static long[] latestBallotsFromPaxosMemtable(DecoratedKey key, TableMetadata metadata) {
    ColumnFamilyStore paxos = Keyspace.open("system").getColumnFamilyStore("paxos");
    long[] result = new long[3];
    List<Memtable> memtables = ImmutableList.copyOf(paxos.getTracker().getView().getAllMemtables());
    for (Memtable memtable : memtables) {
        Partition partition = memtable.getPartition(key);
        if (partition == null)
            continue;
        Row row = partition.getRow(paxos.metadata.get().comparator.make(metadata.id));
        if (row == null)
            continue;
        Cell promise = row.getCell(PROMISE);
        if (promise != null && promise.value() != null)
            result[0] = promise.timestamp();
        Cell proposal = row.getCell(PROPOSAL);
        if (proposal != null && proposal.value() != null)
            result[1] = proposal.timestamp();
        Cell commit = row.getCell(COMMIT);
        if (commit != null && commit.value() != null)
            result[2] = commit.timestamp();
    }
    return result;
}
Also used : AbstractBTreePartition(org.apache.cassandra.db.partitions.AbstractBTreePartition) Partition(org.apache.cassandra.db.partitions.Partition) ImmutableBTreePartition(org.apache.cassandra.db.partitions.ImmutableBTreePartition) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) Memtable(org.apache.cassandra.db.Memtable) Row(org.apache.cassandra.db.rows.Row) Cell(org.apache.cassandra.db.rows.Cell)

Example 4 with Memtable

use of org.apache.cassandra.db.Memtable in project cassandra by apache.

the class MemtableSizeTest method testSize.

private void testSize() {
    try {
        keyspace = createKeyspace("CREATE KEYSPACE %s with replication = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 } and durable_writes = false");
        table = createTable(keyspace, "CREATE TABLE %s ( userid bigint, picid bigint, commentid bigint, PRIMARY KEY(userid, picid)) with compression = {'enabled': false}");
        execute("use " + keyspace + ';');
        String writeStatement = "INSERT INTO " + table + "(userid,picid,commentid)VALUES(?,?,?)";
        cfs = Keyspace.open(keyspace).getColumnFamilyStore(table);
        cfs.disableAutoCompaction();
        cfs.forceBlockingFlush();
        long deepSizeBefore = ObjectSizes.measureDeep(cfs.getTracker().getView().getCurrentMemtable());
        System.out.printf("Memtable deep size before %s\n%n", FBUtilities.prettyPrintMemory(deepSizeBefore));
        long i;
        long limit = partitions;
        System.out.println("Writing " + partitions + " partitions of " + rowsPerPartition + " rows");
        for (i = 0; i < limit; ++i) {
            for (long j = 0; j < rowsPerPartition; ++j) execute(writeStatement, i, j, i + j);
        }
        System.out.println("Deleting " + deletedPartitions + " partitions");
        limit += deletedPartitions;
        for (; i < limit; ++i) {
            // no partition exists, but we will create a tombstone
            execute("DELETE FROM " + table + " WHERE userid = ?", i);
        }
        System.out.println("Deleting " + deletedRows + " rows");
        limit += deletedRows;
        for (; i < limit; ++i) {
            // no row exists, but we will create a tombstone (and partition)
            execute("DELETE FROM " + table + " WHERE userid = ? AND picid = ?", i, 0L);
        }
        if (!cfs.getLiveSSTables().isEmpty())
            System.out.println("Warning: " + cfs.getLiveSSTables().size() + " sstables created.");
        Memtable memtable = cfs.getTracker().getView().getCurrentMemtable();
        long actualHeap = memtable.getAllocator().onHeap().owns();
        System.out.printf("Memtable in %s mode: %d ops, %s serialized bytes, %s (%.0f%%) on heap, %s (%.0f%%) off-heap%n", DatabaseDescriptor.getMemtableAllocationType(), memtable.getOperations(), FBUtilities.prettyPrintMemory(memtable.getLiveDataSize()), FBUtilities.prettyPrintMemory(actualHeap), 100 * memtable.getAllocator().onHeap().ownershipRatio(), FBUtilities.prettyPrintMemory(memtable.getAllocator().offHeap().owns()), 100 * memtable.getAllocator().offHeap().ownershipRatio());
        long deepSizeAfter = ObjectSizes.measureDeep(memtable);
        System.out.printf("Memtable deep size %s\n%n", FBUtilities.prettyPrintMemory(deepSizeAfter));
        long expectedHeap = deepSizeAfter - deepSizeBefore;
        String message = String.format("Expected heap usage close to %s, got %s.\n", FBUtilities.prettyPrintMemory(expectedHeap), FBUtilities.prettyPrintMemory(actualHeap));
        System.out.println(message);
        Assert.assertTrue(message, Math.abs(actualHeap - expectedHeap) <= MAX_DIFFERENCE);
    } catch (Throwable throwable) {
        Throwables.propagate(throwable);
    }
}
Also used : Memtable(org.apache.cassandra.db.Memtable)

Example 5 with Memtable

use of org.apache.cassandra.db.Memtable in project cassandra by apache.

the class CompactionController method getFullyExpiredSSTables.

/**
     * Finds expired sstables
     *
     * works something like this;
     * 1. find "global" minTimestamp of overlapping sstables, compacting sstables and memtables containing any non-expired data
     * 2. build a list of fully expired candidates
     * 3. check if the candidates to be dropped actually can be dropped {@code (maxTimestamp < global minTimestamp)}
     *    - if not droppable, remove from candidates
     * 4. return candidates.
     *
     * @param cfStore
     * @param compacting we take the drop-candidates from this set, it is usually the sstables included in the compaction
     * @param overlapping the sstables that overlap the ones in compacting.
     * @param gcBefore
     * @return
     */
public static Set<SSTableReader> getFullyExpiredSSTables(ColumnFamilyStore cfStore, Iterable<SSTableReader> compacting, Iterable<SSTableReader> overlapping, int gcBefore) {
    logger.trace("Checking droppable sstables in {}", cfStore);
    if (NEVER_PURGE_TOMBSTONES || compacting == null)
        return Collections.<SSTableReader>emptySet();
    if (cfStore.getCompactionStrategyManager().onlyPurgeRepairedTombstones() && !Iterables.all(compacting, SSTableReader::isRepaired))
        return Collections.emptySet();
    List<SSTableReader> candidates = new ArrayList<>();
    long minTimestamp = Long.MAX_VALUE;
    for (SSTableReader sstable : overlapping) {
        // the min timestamp of the overlapping sstables that actually contain live data.
        if (sstable.getSSTableMetadata().maxLocalDeletionTime >= gcBefore)
            minTimestamp = Math.min(minTimestamp, sstable.getMinTimestamp());
    }
    for (SSTableReader candidate : compacting) {
        if (candidate.getSSTableMetadata().maxLocalDeletionTime < gcBefore)
            candidates.add(candidate);
        else
            minTimestamp = Math.min(minTimestamp, candidate.getMinTimestamp());
    }
    for (Memtable memtable : cfStore.getTracker().getView().getAllMemtables()) minTimestamp = Math.min(minTimestamp, memtable.getMinTimestamp());
    // At this point, minTimestamp denotes the lowest timestamp of any relevant
    // SSTable or Memtable that contains a constructive value. candidates contains all the
    // candidates with no constructive values. The ones out of these that have
    // (getMaxTimestamp() < minTimestamp) serve no purpose anymore.
    Iterator<SSTableReader> iterator = candidates.iterator();
    while (iterator.hasNext()) {
        SSTableReader candidate = iterator.next();
        if (candidate.getMaxTimestamp() >= minTimestamp) {
            iterator.remove();
        } else {
            logger.trace("Dropping expired SSTable {} (maxLocalDeletionTime={}, gcBefore={})", candidate, candidate.getSSTableMetadata().maxLocalDeletionTime, gcBefore);
        }
    }
    return new HashSet<>(candidates);
}
Also used : SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) Memtable(org.apache.cassandra.db.Memtable)

Aggregations

Memtable (org.apache.cassandra.db.Memtable)8 ColumnFamilyStore (org.apache.cassandra.db.ColumnFamilyStore)5 SSTableReader (org.apache.cassandra.io.sstable.format.SSTableReader)4 Test (org.junit.Test)3 AbstractBTreePartition (org.apache.cassandra.db.partitions.AbstractBTreePartition)2 ImmutableBTreePartition (org.apache.cassandra.db.partitions.ImmutableBTreePartition)2 Partition (org.apache.cassandra.db.partitions.Partition)2 Random (java.util.Random)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Cell (org.apache.cassandra.db.rows.Cell)1 Row (org.apache.cassandra.db.rows.Row)1 OpOrder (org.apache.cassandra.utils.concurrent.OpOrder)1