Search in sources :

Example 1 with Condition

use of org.apache.cassandra.utils.concurrent.Condition in project cassandra by apache.

the class PreviewRepairTest method testStartNonIntersectingPreviewRepair.

/**
 * Makes sure we can start a non-intersecting preview repair while there are other pending sstables on disk
 */
@Test
public void testStartNonIntersectingPreviewRepair() throws IOException, InterruptedException, ExecutionException {
    ExecutorService es = Executors.newSingleThreadExecutor();
    try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
        cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
        insert(cluster.coordinator(1), 0, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        cluster.get(1).nodetoolResult("repair", KEYSPACE, "tbl").asserts().success();
        insert(cluster.coordinator(1), 100, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        // pause inc repair validation messages on node2 until node1 has finished
        Condition incRepairStarted = newOneTimeCondition();
        Condition continueIncRepair = newOneTimeCondition();
        DelayFirstRepairTypeMessageFilter filter = DelayFirstRepairTypeMessageFilter.validationRequest(incRepairStarted, continueIncRepair);
        cluster.filters().outbound().verbs(Verb.VALIDATION_REQ.id).from(1).to(2).messagesMatching(filter).drop();
        // get local ranges to repair two separate ranges:
        List<String> localRanges = cluster.get(1).callOnInstance(() -> {
            List<String> res = new ArrayList<>();
            for (Range<Token> r : StorageService.instance.getLocalReplicas(KEYSPACE).ranges()) res.add(r.left.getTokenValue() + ":" + r.right.getTokenValue());
            return res;
        });
        assertEquals(2, localRanges.size());
        String[] previewedRange = localRanges.get(0).split(":");
        String[] repairedRange = localRanges.get(1).split(":");
        Future<NodeToolResult> repairStatusFuture = es.submit(() -> cluster.get(1).nodetoolResult("repair", "-st", repairedRange[0], "-et", repairedRange[1], KEYSPACE, "tbl"));
        // wait for node1 to start validation compaction
        incRepairStarted.await();
        // now we have pending sstables in range "repairedRange", make sure we can preview "previewedRange"
        cluster.get(1).nodetoolResult("repair", "-vd", "-st", previewedRange[0], "-et", previewedRange[1], KEYSPACE, "tbl").asserts().success().notificationContains("Repaired data is in sync");
        continueIncRepair.signalAll();
        repairStatusFuture.get().asserts().success();
    } finally {
        es.shutdown();
    }
}
Also used : Condition.newOneTimeCondition(org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition) Condition(org.apache.cassandra.utils.concurrent.Condition) ExecutorService(java.util.concurrent.ExecutorService) ArrayList(java.util.ArrayList) Cluster(org.apache.cassandra.distributed.Cluster) Token(org.apache.cassandra.dht.Token) NodeToolResult(org.apache.cassandra.distributed.api.NodeToolResult) Test(org.junit.Test)

Example 2 with Condition

use of org.apache.cassandra.utils.concurrent.Condition in project cassandra by apache.

the class PreviewRepairTest method repair.

/**
 * returns a pair with [repair success, was inconsistent]
 */
private static IIsolatedExecutor.SerializableCallable<RepairResult> repair(Map<String, String> options) {
    return () -> {
        Condition await = newOneTimeCondition();
        AtomicBoolean success = new AtomicBoolean(true);
        AtomicBoolean wasInconsistent = new AtomicBoolean(false);
        instance.repair(KEYSPACE, options, of((tag, event) -> {
            if (event.getType() == ERROR) {
                success.set(false);
                await.signalAll();
            } else if (event.getType() == NOTIFICATION && event.getMessage().contains("Repaired data is inconsistent")) {
                wasInconsistent.set(true);
            } else if (event.getType() == COMPLETE)
                await.signalAll();
        }));
        try {
            await.await(1, MINUTES);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        return new RepairResult(success.get(), wasInconsistent.get());
    };
}
Also used : Condition.newOneTimeCondition(org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition) Condition(org.apache.cassandra.utils.concurrent.Condition) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RepairResult(org.apache.cassandra.distributed.shared.RepairResult)

Example 3 with Condition

use of org.apache.cassandra.utils.concurrent.Condition in project cassandra by apache.

the class PreviewRepairTest method testFinishingIncRepairDuringPreview.

/**
 * another case where the repaired datasets could mismatch is if an incremental repair finishes just as the preview
 * repair is starting up.
 *
 * This tests this case:
 * 1. we start a preview repair
 * 2. pause the validation requests from node1 -> node2
 * 3. node1 starts its validation
 * 4. run an incremental repair which completes fine
 * 5. node2 resumes its validation
 *
 * Now we will include sstables from the second incremental repair on node2 but not on node1
 * This should fail since we fail any preview repair which is ongoing when an incremental repair finishes (step 4 above)
 */
@Test
public void testFinishingIncRepairDuringPreview() throws IOException, InterruptedException, ExecutionException {
    ExecutorService es = Executors.newSingleThreadExecutor();
    try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
        cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
        insert(cluster.coordinator(1), 0, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        cluster.get(1).callOnInstance(repair(options(false, false)));
        insert(cluster.coordinator(1), 100, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        Condition previewRepairStarted = newOneTimeCondition();
        Condition continuePreviewRepair = newOneTimeCondition();
        DelayFirstRepairTypeMessageFilter filter = validationRequest(previewRepairStarted, continuePreviewRepair);
        // this pauses the validation request sent from node1 to node2 until we have run a full inc repair below
        cluster.filters().outbound().verbs(VALIDATION_REQ.id).from(1).to(2).messagesMatching(filter).drop();
        Future<RepairResult> rsFuture = es.submit(() -> cluster.get(1).callOnInstance(repair(options(true, false))));
        previewRepairStarted.await();
        // this needs to finish before the preview repair is unpaused on node2
        cluster.get(1).callOnInstance(repair(options(false, false)));
        continuePreviewRepair.signalAll();
        RepairResult rs = rsFuture.get();
        // preview repair should have failed
        assertFalse(rs.success);
        // and no mismatches should have been reported
        assertFalse(rs.wasInconsistent);
    } finally {
        es.shutdown();
    }
}
Also used : Condition.newOneTimeCondition(org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition) Condition(org.apache.cassandra.utils.concurrent.Condition) ExecutorService(java.util.concurrent.ExecutorService) Cluster(org.apache.cassandra.distributed.Cluster) RepairResult(org.apache.cassandra.distributed.shared.RepairResult) Test(org.junit.Test)

Example 4 with Condition

use of org.apache.cassandra.utils.concurrent.Condition in project cassandra by apache.

the class GossipShutdownTest method shutdownStayDownTest.

/**
 * Makes sure that a node that has shutdown doesn't come back as live (without being restarted)
 */
@Test
public void shutdownStayDownTest() throws IOException, InterruptedException, ExecutionException {
    ExecutorService es = Executors.newSingleThreadExecutor();
    try (Cluster cluster = init(builder().withNodes(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
        cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, v int)");
        for (int i = 0; i < 10; i++) cluster.coordinator(1).execute("insert into " + KEYSPACE + ".tbl (id, v) values (?,?)", ALL, i, i);
        Condition timeToShutdown = newOneTimeCondition();
        Condition waitForShutdown = newOneTimeCondition();
        AtomicBoolean signalled = new AtomicBoolean(false);
        Future f = es.submit(() -> {
            await(timeToShutdown);
            cluster.get(1).runOnInstance(() -> {
                instance.register(new EPChanges());
            });
            cluster.get(2).runOnInstance(() -> {
                StorageService.instance.setIsShutdownUnsafeForTests(true);
                instance.stop();
            });
            waitForShutdown.signalAll();
        });
        cluster.filters().outbound().from(2).to(1).verbs(GOSSIP_DIGEST_SYN.id).messagesMatching((from, to, message) -> true).drop();
        cluster.filters().outbound().from(2).to(1).verbs(GOSSIP_DIGEST_ACK.id).messagesMatching((from, to, message) -> {
            if (signalled.compareAndSet(false, true)) {
                timeToShutdown.signalAll();
                await(waitForShutdown);
                return false;
            }
            return true;
        }).drop();
        // wait for gossip to exchange a few messages
        sleep(10000);
        f.get();
    } finally {
        es.shutdown();
    }
}
Also used : Condition(org.apache.cassandra.utils.concurrent.Condition) Condition.newOneTimeCondition(org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition) EndpointState(org.apache.cassandra.gms.EndpointState) VersionedValue(org.apache.cassandra.gms.VersionedValue) InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) ApplicationState(org.apache.cassandra.gms.ApplicationState) IEndpointStateChangeSubscriber(org.apache.cassandra.gms.IEndpointStateChangeSubscriber) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) StorageService(org.apache.cassandra.service.StorageService) IOException(java.io.IOException) Condition(org.apache.cassandra.utils.concurrent.Condition) Test(org.junit.Test) Executors(java.util.concurrent.Executors) Serializable(java.io.Serializable) ExecutionException(java.util.concurrent.ExecutionException) Future(java.util.concurrent.Future) GOSSIP_DIGEST_SYN(org.apache.cassandra.net.Verb.GOSSIP_DIGEST_SYN) GOSSIP_DIGEST_ACK(org.apache.cassandra.net.Verb.GOSSIP_DIGEST_ACK) Gossiper.instance(org.apache.cassandra.gms.Gossiper.instance) Thread.sleep(java.lang.Thread.sleep) Cluster(org.apache.cassandra.distributed.Cluster) Condition.newOneTimeCondition(org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition) ALL(org.apache.cassandra.distributed.api.ConsistencyLevel.ALL) ExecutorService(java.util.concurrent.ExecutorService) NETWORK(org.apache.cassandra.distributed.api.Feature.NETWORK) GOSSIP(org.apache.cassandra.distributed.api.Feature.GOSSIP) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ExecutorService(java.util.concurrent.ExecutorService) Cluster(org.apache.cassandra.distributed.Cluster) Future(java.util.concurrent.Future) Test(org.junit.Test)

Example 5 with Condition

use of org.apache.cassandra.utils.concurrent.Condition in project cassandra by apache.

the class PreviewRepairTest method testConcurrentIncRepairDuringPreview.

/**
 * Tests that a IR is running, but not completed before validation compaction starts
 */
@Test
public void testConcurrentIncRepairDuringPreview() throws IOException, InterruptedException, ExecutionException {
    try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
        cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
        insert(cluster.coordinator(1), 0, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        cluster.get(1).callOnInstance(repair(options(false, false)));
        insert(cluster.coordinator(1), 100, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        Condition previewRepairStarted = newOneTimeCondition();
        Condition continuePreviewRepair = newOneTimeCondition();
        // this pauses the validation request sent from node1 to node2 until the inc repair below has run
        cluster.filters().outbound().verbs(VALIDATION_REQ.id).from(1).to(2).messagesMatching(validationRequest(previewRepairStarted, continuePreviewRepair)).drop();
        Condition irRepairStarted = newOneTimeCondition();
        Condition continueIrRepair = newOneTimeCondition();
        // this blocks the IR from committing, so we can reenable the preview
        cluster.filters().outbound().verbs(FINALIZE_PROPOSE_MSG.id).from(1).to(2).messagesMatching(finalizePropose(irRepairStarted, continueIrRepair)).drop();
        Future<RepairResult> previewResult = cluster.get(1).asyncCallsOnInstance(repair(options(true, false))).call();
        previewRepairStarted.await();
        // trigger IR and wait till its ready to commit
        Future<RepairResult> irResult = cluster.get(1).asyncCallsOnInstance(repair(options(false, false))).call();
        irRepairStarted.await();
        // unblock preview repair and wait for it to complete
        continuePreviewRepair.signalAll();
        RepairResult rs = previewResult.get();
        // preview repair should have failed
        assertFalse(rs.success);
        // and no mismatches should have been reported
        assertFalse(rs.wasInconsistent);
        continueIrRepair.signalAll();
        RepairResult ir = irResult.get();
        assertTrue(ir.success);
        // not preview, so we don't care about preview notification
        assertFalse(ir.wasInconsistent);
    }
}
Also used : Condition.newOneTimeCondition(org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition) Condition(org.apache.cassandra.utils.concurrent.Condition) Cluster(org.apache.cassandra.distributed.Cluster) RepairResult(org.apache.cassandra.distributed.shared.RepairResult) Test(org.junit.Test)

Aggregations

Condition (org.apache.cassandra.utils.concurrent.Condition)10 Condition.newOneTimeCondition (org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition)10 Test (org.junit.Test)9 ExecutorService (java.util.concurrent.ExecutorService)7 Cluster (org.apache.cassandra.distributed.Cluster)7 Token (org.apache.cassandra.dht.Token)3 RepairResult (org.apache.cassandra.distributed.shared.RepairResult)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 Executors (java.util.concurrent.Executors)2 Future (java.util.concurrent.Future)2 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)2 Murmur3Partitioner (org.apache.cassandra.dht.Murmur3Partitioner)2 ConsistencyLevel (org.apache.cassandra.distributed.api.ConsistencyLevel)2 ALL (org.apache.cassandra.distributed.api.ConsistencyLevel.ALL)2 GOSSIP (org.apache.cassandra.distributed.api.Feature.GOSSIP)2 AssertUtils.assertRows (org.apache.cassandra.distributed.shared.AssertUtils.assertRows)2 ImmutableList.of (com.google.common.collect.ImmutableList.of)1