Search in sources :

Example 91 with Cluster

use of org.apache.cassandra.distributed.Cluster in project cassandra by apache.

the class RepairErrorsTest method testRemoteValidationFailure.

@Test
public void testRemoteValidationFailure() throws IOException {
    Cluster.Builder builder = Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).withInstanceInitializer(ByteBuddyHelper::install);
    try (Cluster cluster = builder.createWithoutStarting()) {
        cluster.setUncaughtExceptionsFilter((i, throwable) -> {
            if (i == 2)
                return throwable.getMessage() != null && throwable.getMessage().contains("IGNORE");
            return false;
        });
        cluster.startup();
        init(cluster);
        cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, x int)");
        for (int i = 0; i < 10; i++) cluster.coordinator(1).execute("insert into " + KEYSPACE + ".tbl (id, x) VALUES (?,?)", ConsistencyLevel.ALL, i, i);
        cluster.forEach(i -> i.flush(KEYSPACE));
        long mark = cluster.get(1).logs().mark();
        cluster.forEach(i -> i.nodetoolResult("repair", "--full").asserts().failure());
        Assertions.assertThat(cluster.get(1).logs().grep(mark, "^ERROR").getResult()).isEmpty();
    }
}
Also used : CompactionManager(org.apache.cassandra.db.compaction.CompactionManager) MethodDelegation(net.bytebuddy.implementation.MethodDelegation) ElementMatchers.named(net.bytebuddy.matcher.ElementMatchers.named) Collection(java.util.Collection) ByteBuddy(net.bytebuddy.ByteBuddy) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Test(org.junit.Test) UUID(java.util.UUID) ConsistencyLevel(org.apache.cassandra.distributed.api.ConsistencyLevel) ClassLoadingStrategy(net.bytebuddy.dynamic.loading.ClassLoadingStrategy) CompactionInterruptedException(org.apache.cassandra.db.compaction.CompactionInterruptedException) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) RangesAtEndpoint(org.apache.cassandra.locator.RangesAtEndpoint) UnfilteredRowIterator(org.apache.cassandra.db.rows.UnfilteredRowIterator) CompactionIterator(org.apache.cassandra.db.compaction.CompactionIterator) Assertions(org.assertj.core.api.Assertions) Cluster(org.apache.cassandra.distributed.Cluster) NETWORK(org.apache.cassandra.distributed.api.Feature.NETWORK) GOSSIP(org.apache.cassandra.distributed.api.Feature.GOSSIP) Cluster(org.apache.cassandra.distributed.Cluster) RangesAtEndpoint(org.apache.cassandra.locator.RangesAtEndpoint) Test(org.junit.Test)

Example 92 with Cluster

use of org.apache.cassandra.distributed.Cluster in project cassandra by apache.

the class PreviewRepairTest method snapshotTest.

@Test
public void snapshotTest() throws IOException, InterruptedException {
    try (Cluster cluster = init(Cluster.build(3).withConfig(config -> config.set("snapshot_on_repaired_data_mismatch", true).with(GOSSIP).with(NETWORK)).start())) {
        cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
        cluster.schemaChange("create table " + KEYSPACE + ".tbl2 (id int primary key, t int)");
        // populate 2 tables
        insert(cluster.coordinator(1), 0, 100, "tbl");
        insert(cluster.coordinator(1), 0, 100, "tbl2");
        cluster.forEach((n) -> n.flush(KEYSPACE));
        // make sure everything is marked repaired
        cluster.get(1).callOnInstance(repair(options(false, false)));
        waitMarkedRepaired(cluster);
        // make node2 mismatch
        unmarkRepaired(cluster.get(2), "tbl");
        verifySnapshots(cluster, "tbl", true);
        verifySnapshots(cluster, "tbl2", true);
        AtomicInteger snapshotMessageCounter = new AtomicInteger();
        cluster.filters().verbs(Verb.SNAPSHOT_REQ.id).messagesMatching((from, to, message) -> {
            snapshotMessageCounter.incrementAndGet();
            return false;
        }).drop();
        cluster.get(1).callOnInstance(repair(options(true, true)));
        verifySnapshots(cluster, "tbl", false);
        // tbl2 should not have a mismatch, so the snapshots should be empty here
        verifySnapshots(cluster, "tbl2", true);
        assertEquals(3, snapshotMessageCounter.get());
        // and make sure that we don't try to snapshot again
        snapshotMessageCounter.set(0);
        cluster.get(3).callOnInstance(repair(options(true, true)));
        assertEquals(0, snapshotMessageCounter.get());
    }
}
Also used : Arrays(java.util.Arrays) TimeoutException(java.util.concurrent.TimeoutException) IMessage(org.apache.cassandra.distributed.api.IMessage) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) Future(java.util.concurrent.Future) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Condition.newOneTimeCondition(org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition) DatabaseDescriptor(org.apache.cassandra.config.DatabaseDescriptor) Keyspace(org.apache.cassandra.db.Keyspace) NodeToolResult(org.apache.cassandra.distributed.api.NodeToolResult) ImmutableList.of(com.google.common.collect.ImmutableList.of) FBUtilities(org.apache.cassandra.utils.FBUtilities) RepairResult(org.apache.cassandra.distributed.shared.RepairResult) ConsistencyLevel(org.apache.cassandra.distributed.api.ConsistencyLevel) Verb(org.apache.cassandra.net.Verb) Executors(java.util.concurrent.Executors) FinalizePropose(org.apache.cassandra.repair.messages.FinalizePropose) List(java.util.List) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) Assert.assertFalse(org.junit.Assert.assertFalse) Instance.deserializeMessage(org.apache.cassandra.distributed.impl.Instance.deserializeMessage) CompactionManager(org.apache.cassandra.db.compaction.CompactionManager) RepairOption(org.apache.cassandra.repair.messages.RepairOption) Matcher(org.apache.cassandra.distributed.api.IMessageFilters.Matcher) BeforeClass(org.junit.BeforeClass) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Range(org.apache.cassandra.dht.Range) HashMap(java.util.HashMap) Message(org.apache.cassandra.net.Message) MINUTES(java.util.concurrent.TimeUnit.MINUTES) RepairParallelism(org.apache.cassandra.repair.RepairParallelism) ArrayList(java.util.ArrayList) Token(org.apache.cassandra.dht.Token) ActiveRepairService(org.apache.cassandra.service.ActiveRepairService) DelayFirstRepairTypeMessageFilter.finalizePropose(org.apache.cassandra.distributed.test.PreviewRepairTest.DelayFirstRepairTypeMessageFilter.finalizePropose) ICoordinator(org.apache.cassandra.distributed.api.ICoordinator) IIsolatedExecutor(org.apache.cassandra.distributed.api.IIsolatedExecutor) StorageService.instance(org.apache.cassandra.service.StorageService.instance) ProgressEventType(org.apache.cassandra.utils.progress.ProgressEventType) ExecutorService(java.util.concurrent.ExecutorService) NETWORK(org.apache.cassandra.distributed.api.Feature.NETWORK) Uninterruptibles(com.google.common.util.concurrent.Uninterruptibles) DelayFirstRepairTypeMessageFilter.validationRequest(org.apache.cassandra.distributed.test.PreviewRepairTest.DelayFirstRepairTypeMessageFilter.validationRequest) VALIDATION_REQ(org.apache.cassandra.net.Verb.VALIDATION_REQ) StorageService(org.apache.cassandra.service.StorageService) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Condition(org.apache.cassandra.utils.concurrent.Condition) Test(org.junit.Test) FINALIZE_PROPOSE_MSG(org.apache.cassandra.net.Verb.FINALIZE_PROPOSE_MSG) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) RepairMessage(org.apache.cassandra.repair.messages.RepairMessage) PreviewKind(org.apache.cassandra.streaming.PreviewKind) IInvokableInstance(org.apache.cassandra.distributed.api.IInvokableInstance) ValidationRequest(org.apache.cassandra.repair.messages.ValidationRequest) Cluster(org.apache.cassandra.distributed.Cluster) Assert.assertEquals(org.junit.Assert.assertEquals) GOSSIP(org.apache.cassandra.distributed.api.Feature.GOSSIP) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Cluster(org.apache.cassandra.distributed.Cluster) Test(org.junit.Test)

Example 93 with Cluster

use of org.apache.cassandra.distributed.Cluster in project cassandra by apache.

the class PreviewRepairTest method testFinishingNonIntersectingIncRepairDuringPreview.

/**
 * Same as testFinishingIncRepairDuringPreview but the previewed range does not intersect the incremental repair
 * so both preview and incremental repair should finish fine (without any mismatches)
 */
@Test
public void testFinishingNonIntersectingIncRepairDuringPreview() throws IOException, InterruptedException, ExecutionException {
    ExecutorService es = Executors.newSingleThreadExecutor();
    try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
        cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
        insert(cluster.coordinator(1), 0, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        assertTrue(cluster.get(1).callOnInstance(repair(options(false, false))).success);
        insert(cluster.coordinator(1), 100, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        // pause preview repair validation messages on node2 until node1 has finished
        Condition previewRepairStarted = newOneTimeCondition();
        Condition continuePreviewRepair = newOneTimeCondition();
        DelayFirstRepairTypeMessageFilter filter = validationRequest(previewRepairStarted, continuePreviewRepair);
        cluster.filters().outbound().verbs(VALIDATION_REQ.id).from(1).to(2).messagesMatching(filter).drop();
        // get local ranges to repair two separate ranges:
        List<String> localRanges = cluster.get(1).callOnInstance(() -> {
            List<String> res = new ArrayList<>();
            for (Range<Token> r : instance.getLocalReplicas(KEYSPACE).ranges()) res.add(r.left.getTokenValue() + ":" + r.right.getTokenValue());
            return res;
        });
        assertEquals(2, localRanges.size());
        Future<RepairResult> repairStatusFuture = es.submit(() -> cluster.get(1).callOnInstance(repair(options(true, false, localRanges.get(0)))));
        // wait for node1 to start validation compaction
        previewRepairStarted.await();
        // this needs to finish before the preview repair is unpaused on node2
        assertTrue(cluster.get(1).callOnInstance(repair(options(false, false, localRanges.get(1)))).success);
        continuePreviewRepair.signalAll();
        RepairResult rs = repairStatusFuture.get();
        // repair should succeed
        assertTrue(rs.success);
        // and no mismatches
        assertFalse(rs.wasInconsistent);
    } finally {
        es.shutdown();
    }
}
Also used : Condition.newOneTimeCondition(org.apache.cassandra.utils.concurrent.Condition.newOneTimeCondition) Condition(org.apache.cassandra.utils.concurrent.Condition) ExecutorService(java.util.concurrent.ExecutorService) ArrayList(java.util.ArrayList) Cluster(org.apache.cassandra.distributed.Cluster) Token(org.apache.cassandra.dht.Token) RepairResult(org.apache.cassandra.distributed.shared.RepairResult) Test(org.junit.Test)

Example 94 with Cluster

use of org.apache.cassandra.distributed.Cluster in project cassandra by apache.

the class PreviewRepairTest method testWithMismatchingPending.

/**
 * makes sure that the repaired sstables are not matching on the two
 * nodes by disabling autocompaction on node2 and then running an
 * incremental repair
 */
@Test
public void testWithMismatchingPending() throws Throwable {
    try (Cluster cluster = init(Cluster.build(2).withConfig(config -> config.with(GOSSIP).with(NETWORK)).start())) {
        cluster.schemaChange("create table " + KEYSPACE + ".tbl (id int primary key, t int)");
        insert(cluster.coordinator(1), 0, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        cluster.get(1).callOnInstance(repair(options(false, false)));
        insert(cluster.coordinator(1), 100, 100);
        cluster.forEach((node) -> node.flush(KEYSPACE));
        // make sure that all sstables have moved to repaired by triggering a compaction
        // also disables autocompaction on the nodes
        cluster.forEach((node) -> node.runOnInstance(() -> {
            ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore("tbl");
            FBUtilities.waitOnFutures(CompactionManager.instance.submitBackground(cfs));
            cfs.disableAutoCompaction();
        }));
        long[] marks = logMark(cluster);
        cluster.get(1).callOnInstance(repair(options(false, false)));
        // now re-enable autocompaction on node1, this moves the sstables for the new repair to repaired
        cluster.get(1).runOnInstance(() -> {
            ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore("tbl");
            cfs.enableAutoCompaction();
            FBUtilities.waitOnFutures(CompactionManager.instance.submitBackground(cfs));
        });
        waitLogsRepairFullyFinished(cluster, marks);
        RepairResult rs = cluster.get(1).callOnInstance(repair(options(true, false)));
        // preview repair should succeed
        assertTrue(rs.success);
        // and we should see no mismatches
        assertFalse(rs.wasInconsistent);
    }
}
Also used : ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) Cluster(org.apache.cassandra.distributed.Cluster) RepairResult(org.apache.cassandra.distributed.shared.RepairResult) Test(org.junit.Test)

Example 95 with Cluster

use of org.apache.cassandra.distributed.Cluster in project cassandra by apache.

the class HintsServiceMetricsTest method testHintsServiceMetrics.

@Test
public void testHintsServiceMetrics() throws Exception {
    // setup a 3-node cluster with a bytebuddy injection that makes the writting of some hints to fail
    try (Cluster cluster = builder().withNodes(3).withConfig(config -> config.with(NETWORK, GOSSIP, NATIVE_PROTOCOL)).withInstanceInitializer(FailHints::install).start()) {
        // setup a message filter to drop some of the hint request messages from node1
        AtomicInteger hintsNode2 = new AtomicInteger();
        AtomicInteger hintsNode3 = new AtomicInteger();
        cluster.filters().verbs(Verb.HINT_REQ.id).from(1).messagesMatching((from, to, message) -> (to == 2 && hintsNode2.incrementAndGet() <= NUM_TIMEOUTS_PER_NODE) || (to == 3 && hintsNode3.incrementAndGet() <= NUM_TIMEOUTS_PER_NODE)).drop();
        // setup a message filter to drop mutations requests from node1, so it creates hints for those mutations
        AtomicBoolean dropWritesForNode2 = new AtomicBoolean(false);
        AtomicBoolean dropWritesForNode3 = new AtomicBoolean(false);
        cluster.filters().verbs(Verb.MUTATION_REQ.id).from(1).messagesMatching((from, to, message) -> (to == 2 && dropWritesForNode2.get()) || (to == 3 && dropWritesForNode3.get())).drop();
        // fix under replicated keyspaces so they don't produce hint requests while we are dropping mutations
        fixDistributedSchemas(cluster);
        cluster.schemaChange(withKeyspace("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3}"));
        cluster.schemaChange(withKeyspace("CREATE TABLE %s.t (k int PRIMARY KEY, v int)"));
        ICoordinator coordinator = cluster.coordinator(1);
        IInvokableInstance node1 = cluster.get(1);
        IInvokableInstance node2 = cluster.get(2);
        IInvokableInstance node3 = cluster.get(3);
        // write the first half of the rows with the second node dropping mutation requests,
        // so some hints will be created for that node
        dropWritesForNode2.set(true);
        for (int i = 0; i < NUM_ROWS / 2; i++) coordinator.execute(withKeyspace("INSERT INTO %s.t (k, v) VALUES (?, ?)"), QUORUM, i, i);
        dropWritesForNode2.set(false);
        // write the second half of the rows with the third node dropping mutations requests,
        // so some hints will be created for that node
        dropWritesForNode3.set(true);
        for (int i = NUM_ROWS / 2; i < NUM_ROWS; i++) coordinator.execute(withKeyspace("INSERT INTO %s.t (k, v) VALUES (?, ?)"), QUORUM, i, i);
        dropWritesForNode3.set(false);
        // wait until all the hints have been successfully applied to the nodes that have been dropping mutations
        waitUntilAsserted(() -> assertThat(countRows(node2)).isEqualTo(countRows(node3)).isEqualTo(NUM_ROWS));
        // Verify the metrics for the coordinator node, which is the only one actually sending hints.
        // The hint delivery errors that we have injected should have made the service try to send them again.
        // These retries are done periodically and in pages, so the retries may send again some of the hints that
        // were already successfully sent. This way, there may be more succeeded hints than actual hints/rows.
        waitUntilAsserted(() -> assertThat(countHintsSucceeded(node1)).isGreaterThanOrEqualTo(NUM_ROWS));
        waitUntilAsserted(() -> assertThat(countHintsFailed(node1)).isEqualTo(NUM_FAILURES_PER_NODE * 2));
        waitUntilAsserted(() -> assertThat(countHintsTimedOut(node1)).isEqualTo(NUM_TIMEOUTS_PER_NODE * 2));
        // verify delay metrics
        long numGlobalDelays = countGlobalDelays(node1);
        assertThat(numGlobalDelays).isGreaterThanOrEqualTo(NUM_ROWS);
        assertThat(countEndpointDelays(node1, node1)).isEqualTo(0);
        assertThat(countEndpointDelays(node1, node2)).isGreaterThan(0).isLessThanOrEqualTo(numGlobalDelays);
        assertThat(countEndpointDelays(node1, node3)).isGreaterThan(0).isLessThanOrEqualTo(numGlobalDelays);
        assertThat(countEndpointDelays(node1, node2) + countEndpointDelays(node1, node3)).isGreaterThanOrEqualTo(numGlobalDelays);
        // verify that the metrics for the not-coordinator nodes are zero
        for (IInvokableInstance node : Arrays.asList(node2, node3)) {
            assertThat(countHintsSucceeded(node)).isEqualTo(0);
            assertThat(countHintsFailed(node)).isEqualTo(0);
            assertThat(countHintsTimedOut(node)).isEqualTo(0);
            assertThat(countGlobalDelays(node)).isEqualTo(0);
            cluster.forEach(target -> assertThat(countEndpointDelays(node, target)).isEqualTo(0));
        }
    }
}
Also used : Arrays(java.util.Arrays) MethodDelegation(net.bytebuddy.implementation.MethodDelegation) ByteBuddy(net.bytebuddy.ByteBuddy) ElementMatchers.takesArguments(net.bytebuddy.matcher.ElementMatchers.takesArguments) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Callable(java.util.concurrent.Callable) MINUTES(java.util.concurrent.TimeUnit.MINUTES) NATIVE_PROTOCOL(org.apache.cassandra.distributed.api.Feature.NATIVE_PROTOCOL) ThrowingRunnable(org.awaitility.core.ThrowingRunnable) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ICoordinator(org.apache.cassandra.distributed.api.ICoordinator) Metrics(org.apache.cassandra.distributed.shared.Metrics) TestBaseImpl(org.apache.cassandra.distributed.test.TestBaseImpl) AssertionsForClassTypes.assertThat(org.assertj.core.api.AssertionsForClassTypes.assertThat) QUORUM(org.apache.cassandra.distributed.api.ConsistencyLevel.QUORUM) NETWORK(org.apache.cassandra.distributed.api.Feature.NETWORK) Awaitility.await(org.awaitility.Awaitility.await) ElementMatchers.named(net.bytebuddy.matcher.ElementMatchers.named) HintsServiceMetrics(org.apache.cassandra.metrics.HintsServiceMetrics) Test(org.junit.Test) Hint(org.apache.cassandra.hints.Hint) Verb(org.apache.cassandra.net.Verb) ClassLoadingStrategy(net.bytebuddy.dynamic.loading.ClassLoadingStrategy) SuperCall(net.bytebuddy.implementation.bind.annotation.SuperCall) IInvokableInstance(org.apache.cassandra.distributed.api.IInvokableInstance) Future(org.apache.cassandra.utils.concurrent.Future) Cluster(org.apache.cassandra.distributed.Cluster) SECONDS(java.util.concurrent.TimeUnit.SECONDS) GOSSIP(org.apache.cassandra.distributed.api.Feature.GOSSIP) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ICoordinator(org.apache.cassandra.distributed.api.ICoordinator) IInvokableInstance(org.apache.cassandra.distributed.api.IInvokableInstance) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Cluster(org.apache.cassandra.distributed.Cluster) Hint(org.apache.cassandra.hints.Hint) Test(org.junit.Test)

Aggregations

Cluster (org.apache.cassandra.distributed.Cluster)161 Test (org.junit.Test)151 IInvokableInstance (org.apache.cassandra.distributed.api.IInvokableInstance)37 Assert (org.junit.Assert)37 IOException (java.io.IOException)36 Feature (org.apache.cassandra.distributed.api.Feature)34 GOSSIP (org.apache.cassandra.distributed.api.Feature.GOSSIP)30 NETWORK (org.apache.cassandra.distributed.api.Feature.NETWORK)30 ConsistencyLevel (org.apache.cassandra.distributed.api.ConsistencyLevel)29 List (java.util.List)22 ImmutableMap (com.google.common.collect.ImmutableMap)21 InetAddress (java.net.InetAddress)20 TokenSupplier (org.apache.cassandra.distributed.api.TokenSupplier)20 StorageService (org.apache.cassandra.service.StorageService)18 Arrays (java.util.Arrays)17 Collections (java.util.Collections)17 Assertions (org.assertj.core.api.Assertions)17 Map (java.util.Map)16 TestBaseImpl (org.apache.cassandra.distributed.test.TestBaseImpl)15 ICoordinator (org.apache.cassandra.distributed.api.ICoordinator)14