Search in sources :

Example 1 with MerkleTrees

use of org.apache.cassandra.utils.MerkleTrees in project cassandra by apache.

the class LocalSyncTaskTest method createInitialTree.

private MerkleTrees createInitialTree(RepairJobDesc desc) {
    MerkleTrees tree = new MerkleTrees(partirioner);
    tree.addMerkleTrees((int) Math.pow(2, 15), desc.ranges);
    tree.init();
    for (MerkleTree.TreeRange r : tree.invalids()) {
        r.ensureHashInitialised();
    }
    return tree;
}
Also used : MerkleTrees(org.apache.cassandra.utils.MerkleTrees) MerkleTree(org.apache.cassandra.utils.MerkleTree)

Example 2 with MerkleTrees

use of org.apache.cassandra.utils.MerkleTrees in project cassandra by apache.

the class ValidationManager method doValidation.

/**
 * Performs a readonly "compaction" of all sstables in order to validate complete rows,
 * but without writing the merge result
 */
@SuppressWarnings("resource")
private void doValidation(ColumnFamilyStore cfs, Validator validator) throws IOException, NoSuchRepairSessionException {
    // concurrently with other compactions, it would otherwise go ahead and scan those again.
    if (!cfs.isValid())
        return;
    // Create Merkle trees suitable to hold estimated partitions for the given ranges.
    // We blindly assume that a partition is evenly distributed on all sstables for now.
    long start = nanoTime();
    long partitionCount = 0;
    long estimatedTotalBytes = 0;
    try (ValidationPartitionIterator vi = getValidationIterator(cfs.getRepairManager(), validator)) {
        MerkleTrees trees = createMerkleTrees(vi, validator.desc.ranges, cfs);
        try {
            // validate the CF as we iterate over it
            validator.prepare(cfs, trees);
            while (vi.hasNext()) {
                try (UnfilteredRowIterator partition = vi.next()) {
                    validator.add(partition);
                    partitionCount++;
                }
            }
            validator.complete();
        } finally {
            estimatedTotalBytes = vi.getEstimatedBytes();
            partitionCount = vi.estimatedPartitions();
        }
    } finally {
        cfs.metric.bytesValidated.update(estimatedTotalBytes);
        cfs.metric.partitionsValidated.update(partitionCount);
    }
    if (logger.isDebugEnabled()) {
        long duration = TimeUnit.NANOSECONDS.toMillis(nanoTime() - start);
        logger.debug("Validation of {} partitions (~{}) finished in {} msec, for {}", partitionCount, FBUtilities.prettyPrintMemory(estimatedTotalBytes), duration, validator.desc);
    }
}
Also used : UnfilteredRowIterator(org.apache.cassandra.db.rows.UnfilteredRowIterator) MerkleTrees(org.apache.cassandra.utils.MerkleTrees)

Example 3 with MerkleTrees

use of org.apache.cassandra.utils.MerkleTrees in project cassandra by apache.

the class ValidatorTest method testValidatorComplete.

@Test
public void testValidatorComplete() throws Throwable {
    Range<Token> range = new Range<>(partitioner.getMinimumToken(), partitioner.getRandomToken());
    final RepairJobDesc desc = new RepairJobDesc(UUID.randomUUID(), UUID.randomUUID(), keyspace, columnFamily, Arrays.asList(range));
    final CompletableFuture<Message> outgoingMessageSink = registerOutgoingMessageSink();
    InetAddressAndPort remote = InetAddressAndPort.getByName("127.0.0.2");
    ColumnFamilyStore cfs = Keyspace.open(keyspace).getColumnFamilyStore(columnFamily);
    Validator validator = new Validator(desc, remote, 0, PreviewKind.NONE);
    MerkleTrees trees = new MerkleTrees(partitioner);
    trees.addMerkleTrees((int) Math.pow(2, 15), validator.desc.ranges);
    validator.prepare(cfs, trees);
    // and confirm that the trees were split
    assertTrue(trees.size() > 1);
    // add a row
    Token mid = partitioner.midpoint(range.left, range.right);
    validator.add(EmptyIterators.unfilteredRow(cfs.metadata(), new BufferDecoratedKey(mid, ByteBufferUtil.bytes("inconceivable!")), false));
    validator.complete();
    // confirm that the trees were validated
    Token min = trees.partitioner().getMinimumToken();
    assertNotNull(trees.hash(new Range<>(min, min)));
    Message message = outgoingMessageSink.get(TEST_TIMEOUT, TimeUnit.SECONDS);
    assertEquals(Verb.VALIDATION_RSP, message.verb());
    ValidationResponse m = (ValidationResponse) message.payload;
    assertEquals(desc, m.desc);
    assertTrue(m.success());
    assertNotNull(m.trees);
}
Also used : MerkleTrees(org.apache.cassandra.utils.MerkleTrees) InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) Message(org.apache.cassandra.net.Message) ValidationResponse(org.apache.cassandra.repair.messages.ValidationResponse) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) Token(org.apache.cassandra.dht.Token) BufferDecoratedKey(org.apache.cassandra.db.BufferDecoratedKey) Range(org.apache.cassandra.dht.Range) CompactionsTest(org.apache.cassandra.db.compaction.CompactionsTest) Test(org.junit.Test)

Example 4 with MerkleTrees

use of org.apache.cassandra.utils.MerkleTrees in project cassandra by apache.

the class ValidatorTest method testSizeLimiting.

/*
     * Test for CASSANDRA-14096 size limiting. We:
     * 1. Limit the size of a repair session
     * 2. Submit a validation
     * 3. Check that the resulting tree is of limited depth
     */
@Test
public void testSizeLimiting() throws Exception {
    Keyspace ks = Keyspace.open(keyspace);
    ColumnFamilyStore cfs = ks.getColumnFamilyStore(columnFamily);
    cfs.clearUnsafe();
    DatabaseDescriptor.setRepairSessionSpaceInMiB(1);
    // disable compaction while flushing
    cfs.disableAutoCompaction();
    // 2 ** 14 rows would normally use 2^14 leaves, but with only 1 meg we should only use 2^12
    CompactionsTest.populate(keyspace, columnFamily, 0, 1 << 14, 0);
    cfs.forceBlockingFlush();
    assertEquals(1, cfs.getLiveSSTables().size());
    // wait enough to force single compaction
    TimeUnit.SECONDS.sleep(5);
    SSTableReader sstable = cfs.getLiveSSTables().iterator().next();
    UUID repairSessionId = UUIDGen.getTimeUUID();
    final RepairJobDesc desc = new RepairJobDesc(repairSessionId, UUIDGen.getTimeUUID(), cfs.keyspace.getName(), cfs.getTableName(), Collections.singletonList(new Range<>(sstable.first.getToken(), sstable.last.getToken())));
    InetAddressAndPort host = InetAddressAndPort.getByName("127.0.0.2");
    ActiveRepairService.instance.registerParentRepairSession(repairSessionId, host, Collections.singletonList(cfs), desc.ranges, false, ActiveRepairService.UNREPAIRED_SSTABLE, false, PreviewKind.NONE);
    final CompletableFuture<Message> outgoingMessageSink = registerOutgoingMessageSink();
    Validator validator = new Validator(desc, host, 0, true, false, PreviewKind.NONE);
    ValidationManager.instance.submitValidation(cfs, validator);
    Message message = outgoingMessageSink.get(TEST_TIMEOUT, TimeUnit.SECONDS);
    MerkleTrees trees = ((ValidationResponse) message.payload).trees;
    Iterator<Map.Entry<Range<Token>, MerkleTree>> iterator = trees.iterator();
    int numTrees = 0;
    while (iterator.hasNext()) {
        assertEquals(1 << 12, iterator.next().getValue().size(), 0.0);
        numTrees++;
    }
    assertEquals(1, numTrees);
    assertEquals(trees.rowCount(), 1 << 14);
}
Also used : MerkleTrees(org.apache.cassandra.utils.MerkleTrees) InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) Message(org.apache.cassandra.net.Message) Token(org.apache.cassandra.dht.Token) Range(org.apache.cassandra.dht.Range) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) ValidationResponse(org.apache.cassandra.repair.messages.ValidationResponse) Keyspace(org.apache.cassandra.db.Keyspace) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) UUID(java.util.UUID) CompactionsTest(org.apache.cassandra.db.compaction.CompactionsTest) Test(org.junit.Test)

Example 5 with MerkleTrees

use of org.apache.cassandra.utils.MerkleTrees in project cassandra by apache.

the class ValidatorTest method testRangeSplittingTreeSizeLimit.

/*
     * Test for CASSANDRA-11390. When there are multiple subranges the trees should
     * automatically size down to make each subrange fit in the provided memory
     * 1. Limit the size of all the trees
     * 2. Submit a validation against more than one range
     * 3. Check that we have the right number and sizes of trees
     */
@Test
public void testRangeSplittingTreeSizeLimit() throws Exception {
    Keyspace ks = Keyspace.open(keyspace);
    ColumnFamilyStore cfs = ks.getColumnFamilyStore(columnFamily);
    cfs.clearUnsafe();
    DatabaseDescriptor.setRepairSessionSpaceInMiB(1);
    // disable compaction while flushing
    cfs.disableAutoCompaction();
    // 2 ** 14 rows would normally use 2^14 leaves, but with only 1 meg we should only use 2^12
    CompactionsTest.populate(keyspace, columnFamily, 0, 1 << 14, 0);
    cfs.forceBlockingFlush();
    assertEquals(1, cfs.getLiveSSTables().size());
    // wait enough to force single compaction
    TimeUnit.SECONDS.sleep(5);
    SSTableReader sstable = cfs.getLiveSSTables().iterator().next();
    UUID repairSessionId = UUIDGen.getTimeUUID();
    List<Range<Token>> ranges = splitHelper(new Range<>(sstable.first.getToken(), sstable.last.getToken()), 2);
    final RepairJobDesc desc = new RepairJobDesc(repairSessionId, UUIDGen.getTimeUUID(), cfs.keyspace.getName(), cfs.getTableName(), ranges);
    InetAddressAndPort host = InetAddressAndPort.getByName("127.0.0.2");
    ActiveRepairService.instance.registerParentRepairSession(repairSessionId, host, Collections.singletonList(cfs), desc.ranges, false, ActiveRepairService.UNREPAIRED_SSTABLE, false, PreviewKind.NONE);
    final CompletableFuture<Message> outgoingMessageSink = registerOutgoingMessageSink();
    Validator validator = new Validator(desc, host, 0, true, false, PreviewKind.NONE);
    ValidationManager.instance.submitValidation(cfs, validator);
    Message message = outgoingMessageSink.get(TEST_TIMEOUT, TimeUnit.SECONDS);
    MerkleTrees trees = ((ValidationResponse) message.payload).trees;
    // Should have 4 trees each with a depth of on average 10 (since each range should have gotten 0.25 mebibytes)
    Iterator<Map.Entry<Range<Token>, MerkleTree>> iterator = trees.iterator();
    int numTrees = 0;
    double totalResolution = 0;
    while (iterator.hasNext()) {
        long size = iterator.next().getValue().size();
        // So it turns out that sstable range estimates are pretty variable, depending on the sampling we can
        // get a wide range of values here. So we just make sure that we're smaller than in the single range
        // case and have the right total size.
        assertTrue(size <= (1 << 11));
        assertTrue(size >= (1 << 9));
        totalResolution += size;
        numTrees += 1;
    }
    assertEquals(trees.rowCount(), 1 << 14);
    assertEquals(4, numTrees);
    // With a single tree and a mebibyte we should had a total resolution of 2^12 leaves; with multiple
    // ranges we should get similar overall resolution, but not more.
    assertTrue(totalResolution > (1 << 11) && totalResolution < (1 << 13));
}
Also used : MerkleTrees(org.apache.cassandra.utils.MerkleTrees) InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) Message(org.apache.cassandra.net.Message) Token(org.apache.cassandra.dht.Token) Range(org.apache.cassandra.dht.Range) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) ValidationResponse(org.apache.cassandra.repair.messages.ValidationResponse) Keyspace(org.apache.cassandra.db.Keyspace) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) UUID(java.util.UUID) CompactionsTest(org.apache.cassandra.db.compaction.CompactionsTest) Test(org.junit.Test)

Aggregations

MerkleTrees (org.apache.cassandra.utils.MerkleTrees)18 Test (org.junit.Test)10 Token (org.apache.cassandra.dht.Token)9 Range (org.apache.cassandra.dht.Range)8 InetAddressAndPort (org.apache.cassandra.locator.InetAddressAndPort)6 ColumnFamilyStore (org.apache.cassandra.db.ColumnFamilyStore)4 ValidationResponse (org.apache.cassandra.repair.messages.ValidationResponse)4 MerkleTree (org.apache.cassandra.utils.MerkleTree)4 UUID (java.util.UUID)3 Keyspace (org.apache.cassandra.db.Keyspace)3 CompactionsTest (org.apache.cassandra.db.compaction.CompactionsTest)3 IPartitioner (org.apache.cassandra.dht.IPartitioner)3 Message (org.apache.cassandra.net.Message)3 SSTableReader (org.apache.cassandra.io.sstable.format.SSTableReader)2 TreeResponse (org.apache.cassandra.repair.TreeResponse)2 MerkleTreesTest (org.apache.cassandra.utils.MerkleTreesTest)2 HashSet (java.util.HashSet)1 BufferDecoratedKey (org.apache.cassandra.db.BufferDecoratedKey)1 UnfilteredRowIterator (org.apache.cassandra.db.rows.UnfilteredRowIterator)1 LongToken (org.apache.cassandra.dht.Murmur3Partitioner.LongToken)1