Search in sources :

Example 1 with RepairResult

use of org.apache.cassandra.distributed.shared.RepairResult in project cassandra by apache.

the class PreviewRepairTest method repair.

/**
 * returns a pair with [repair success, was inconsistent]
 */
private static IIsolatedExecutor.SerializableCallable<RepairResult> repair(Map<String, String> options) {
    return () -> {
        Condition await = newOneTimeCondition();
        AtomicBoolean success = new AtomicBoolean(true);
        AtomicBoolean wasInconsistent = new AtomicBoolean(false);
        instance.repair(KEYSPACE, options, of((tag, event) -> {
            if (event.getType() == ERROR) {
                success.set(false);
                await.signalAll();
            } else if (event.getType() == NOTIFICATION && event.getMessage().contains("Repaired data is inconsistent")) {
                wasInconsistent.set(true);
            } else if (event.getType() == COMPLETE)
                await.signalAll();
        }));
        try {
            await.await(1, MINUTES);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        return new RepairResult(success.get(), wasInconsistent.get());
    };
}
Also used : Condition.newOneTimeCondition(org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition) Condition(org.apache.cassandra.utils.concurrent.Condition) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RepairResult(org.apache.cassandra.distributed.shared.RepairResult)

Example 2 with RepairResult

use of org.apache.cassandra.distributed.shared.RepairResult in project cassandra by apache.

the class PreviewRepairTest method testFinishingIncRepairDuringPreview.

/**
 * another case where the repaired datasets could mismatch is if an incremental repair finishes just as the preview
 * repair is starting up.
 *
 * This tests this case:
 * 1. we start a preview repair
 * 2. pause the validation requests from node1 -> node2
 * 3. node1 starts its validation
 * 4. run an incremental repair which completes fine
 * 5. node2 resumes its validation
 *
 * Now we will include sstables from the second incremental repair on node2 but not on node1
 * This should fail since we fail any preview repair which is ongoing when an incremental repair finishes (step 4 above)
 */
@Test
public void testFinishingIncRepairDuringPreview() throws IOException, InterruptedException, ExecutionException {
    ExecutorService es = Executors.newSingleThreadExecutor();
    try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
        cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
        insert(cluster.coordinator(1), 0, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        cluster.get(1).callOnInstance(repair(options(false, false)));
        insert(cluster.coordinator(1), 100, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        Condition previewRepairStarted = newOneTimeCondition();
        Condition continuePreviewRepair = newOneTimeCondition();
        DelayFirstRepairTypeMessageFilter filter = validationRequest(previewRepairStarted, continuePreviewRepair);
        // this pauses the validation request sent from node1 to node2 until we have run a full inc repair below
        cluster.filters().outbound().verbs(VALIDATION_REQ.id).from(1).to(2).messagesMatching(filter).drop();
        Future<RepairResult> rsFuture = es.submit(() -> cluster.get(1).callOnInstance(repair(options(true, false))));
        previewRepairStarted.await();
        // this needs to finish before the preview repair is unpaused on node2
        cluster.get(1).callOnInstance(repair(options(false, false)));
        continuePreviewRepair.signalAll();
        RepairResult rs = rsFuture.get();
        // preview repair should have failed
        assertFalse(rs.success);
        // and no mismatches should have been reported
        assertFalse(rs.wasInconsistent);
    } finally {
        es.shutdown();
    }
}
Also used : Condition.newOneTimeCondition(org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition) Condition(org.apache.cassandra.utils.concurrent.Condition) ExecutorService(java.util.concurrent.ExecutorService) Cluster(org.apache.cassandra.distributed.Cluster) RepairResult(org.apache.cassandra.distributed.shared.RepairResult) Test(org.junit.Test)

Example 3 with RepairResult

use of org.apache.cassandra.distributed.shared.RepairResult in project cassandra by apache.

the class PreviewRepairTest method testConcurrentIncRepairDuringPreview.

/**
 * Tests that a IR is running, but not completed before validation compaction starts
 */
@Test
public void testConcurrentIncRepairDuringPreview() throws IOException, InterruptedException, ExecutionException {
    try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
        cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
        insert(cluster.coordinator(1), 0, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        cluster.get(1).callOnInstance(repair(options(false, false)));
        insert(cluster.coordinator(1), 100, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        Condition previewRepairStarted = newOneTimeCondition();
        Condition continuePreviewRepair = newOneTimeCondition();
        // this pauses the validation request sent from node1 to node2 until the inc repair below has run
        cluster.filters().outbound().verbs(VALIDATION_REQ.id).from(1).to(2).messagesMatching(validationRequest(previewRepairStarted, continuePreviewRepair)).drop();
        Condition irRepairStarted = newOneTimeCondition();
        Condition continueIrRepair = newOneTimeCondition();
        // this blocks the IR from committing, so we can reenable the preview
        cluster.filters().outbound().verbs(FINALIZE_PROPOSE_MSG.id).from(1).to(2).messagesMatching(finalizePropose(irRepairStarted, continueIrRepair)).drop();
        Future<RepairResult> previewResult = cluster.get(1).asyncCallsOnInstance(repair(options(true, false))).call();
        previewRepairStarted.await();
        // trigger IR and wait till its ready to commit
        Future<RepairResult> irResult = cluster.get(1).asyncCallsOnInstance(repair(options(false, false))).call();
        irRepairStarted.await();
        // unblock preview repair and wait for it to complete
        continuePreviewRepair.signalAll();
        RepairResult rs = previewResult.get();
        // preview repair should have failed
        assertFalse(rs.success);
        // and no mismatches should have been reported
        assertFalse(rs.wasInconsistent);
        continueIrRepair.signalAll();
        RepairResult ir = irResult.get();
        assertTrue(ir.success);
        // not preview, so we don't care about preview notification
        assertFalse(ir.wasInconsistent);
    }
}
Also used : Condition.newOneTimeCondition(org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition) Condition(org.apache.cassandra.utils.concurrent.Condition) Cluster(org.apache.cassandra.distributed.Cluster) RepairResult(org.apache.cassandra.distributed.shared.RepairResult) Test(org.junit.Test)

Example 4 with RepairResult

use of org.apache.cassandra.distributed.shared.RepairResult in project cassandra by apache.

the class PreviewRepairTest method testFinishingNonIntersectingIncRepairDuringPreview.

/**
 * Same as testFinishingIncRepairDuringPreview but the previewed range does not intersect the incremental repair
 * so both preview and incremental repair should finish fine (without any mismatches)
 */
@Test
public void testFinishingNonIntersectingIncRepairDuringPreview() throws IOException, InterruptedException, ExecutionException {
    ExecutorService es = Executors.newSingleThreadExecutor();
    try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
        cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
        insert(cluster.coordinator(1), 0, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        assertTrue(cluster.get(1).callOnInstance(repair(options(false, false))).success);
        insert(cluster.coordinator(1), 100, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        // pause preview repair validation messages on node2 until node1 has finished
        Condition previewRepairStarted = newOneTimeCondition();
        Condition continuePreviewRepair = newOneTimeCondition();
        DelayFirstRepairTypeMessageFilter filter = validationRequest(previewRepairStarted, continuePreviewRepair);
        cluster.filters().outbound().verbs(VALIDATION_REQ.id).from(1).to(2).messagesMatching(filter).drop();
        // get local ranges to repair two separate ranges:
        List<String> localRanges = cluster.get(1).callOnInstance(() -> {
            List<String> res = new ArrayList<>();
            for (Range<Token> r : instance.getLocalReplicas(KEYSPACE).ranges()) res.add(r.left.getTokenValue() + ":" + r.right.getTokenValue());
            return res;
        });
        assertEquals(2, localRanges.size());
        Future<RepairResult> repairStatusFuture = es.submit(() -> cluster.get(1).callOnInstance(repair(options(true, false, localRanges.get(0)))));
        // wait for node1 to start validation compaction
        previewRepairStarted.await();
        // this needs to finish before the preview repair is unpaused on node2
        assertTrue(cluster.get(1).callOnInstance(repair(options(false, false, localRanges.get(1)))).success);
        continuePreviewRepair.signalAll();
        RepairResult rs = repairStatusFuture.get();
        // repair should succeed
        assertTrue(rs.success);
        // and no mismatches
        assertFalse(rs.wasInconsistent);
    } finally {
        es.shutdown();
    }
}
Also used : Condition.newOneTimeCondition(org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition) Condition(org.apache.cassandra.utils.concurrent.Condition) ExecutorService(java.util.concurrent.ExecutorService) ArrayList(java.util.ArrayList) Cluster(org.apache.cassandra.distributed.Cluster) Token(org.apache.cassandra.dht.Token) RepairResult(org.apache.cassandra.distributed.shared.RepairResult) Test(org.junit.Test)

Example 5 with RepairResult

use of org.apache.cassandra.distributed.shared.RepairResult in project cassandra by apache.

the class PreviewRepairTest method testWithMismatchingPending.

/**
 * makes sure that the repaired sstables are not matching on the two
 * nodes by disabling autocompaction on node2 and then running an
 * incremental repair
 */
@Test
public void testWithMismatchingPending() throws Throwable {
    try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
        cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
        insert(cluster.coordinator(1), 0, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        cluster.get(1).callOnInstance(repair(options(false, false)));
        insert(cluster.coordinator(1), 100, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        // make sure that all sstables have moved to repaired by triggering a compaction
        // also disables autocompaction on the nodes
        cluster.forEach((node) -> node.runOnInstance(() -> {
            ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore("tbl");
            FBUtilities.waitOnFutures(CompactionManager.instance.submitBackground(cfs));
            cfs.disableAutoCompaction();
        }));
        long[] marks = logMark(cluster);
        cluster.get(1).callOnInstance(repair(options(false, false)));
        // now re-enable autocompaction on node1, this moves the sstables for the new repair to repaired
        cluster.get(1).runOnInstance(() -> {
            ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore("tbl");
            cfs.enableAutoCompaction();
            FBUtilities.waitOnFutures(CompactionManager.instance.submitBackground(cfs));
        });
        waitLogsRepairFullyFinished(cluster, marks);
        RepairResult rs = cluster.get(1).callOnInstance(repair(options(true, false)));
        // preview repair should succeed
        assertTrue(rs.success);
        // and we should see no mismatches
        assertFalse(rs.wasInconsistent);
    }
}
Also used : ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) Cluster(org.apache.cassandra.distributed.Cluster) RepairResult(org.apache.cassandra.distributed.shared.RepairResult) Test(org.junit.Test)

Aggregations

RepairResult (org.apache.cassandra.distributed.shared.RepairResult)5 Cluster (org.apache.cassandra.distributed.Cluster)4 Condition (org.apache.cassandra.utils.concurrent.Condition)4 Condition.newOneTimeCondition (org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition)4 Test (org.junit.Test)4 ExecutorService (java.util.concurrent.ExecutorService)2 ArrayList (java.util.ArrayList)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 ColumnFamilyStore (org.apache.cassandra.db.ColumnFamilyStore)1 Token (org.apache.cassandra.dht.Token)1