Search in sources :

Example 1 with Metrics

use of org.apache.cassandra.distributed.shared.Metrics in project cassandra by apache.

the class HintsServiceMetricsTest method testHintsServiceMetrics.

@Test
public void testHintsServiceMetrics() throws Exception {
    // setup a 3-node cluster with a bytebuddy injection that makes the writting of some hints to fail
    try (Cluster cluster = builder().withNodes(3).withConfig(config -> config.with(NETWORK, GOSSIP, NATIVE_PROTOCOL)).withInstanceInitializer(FailHints::install).start()) {
        // setup a message filter to drop some of the hint request messages from node1
        AtomicInteger hintsNode2 = new AtomicInteger();
        AtomicInteger hintsNode3 = new AtomicInteger();
        cluster.filters().verbs(Verb.HINT_REQ.id).from(1).messagesMatching((from, to, message) -> (to == 2 && hintsNode2.incrementAndGet() <= NUM_TIMEOUTS_PER_NODE) || (to == 3 && hintsNode3.incrementAndGet() <= NUM_TIMEOUTS_PER_NODE)).drop();
        // setup a message filter to drop mutations requests from node1, so it creates hints for those mutations
        AtomicBoolean dropWritesForNode2 = new AtomicBoolean(false);
        AtomicBoolean dropWritesForNode3 = new AtomicBoolean(false);
        cluster.filters().verbs(Verb.MUTATION_REQ.id).from(1).messagesMatching((from, to, message) -> (to == 2 && dropWritesForNode2.get()) || (to == 3 && dropWritesForNode3.get())).drop();
        // fix under replicated keyspaces so they don't produce hint requests while we are dropping mutations
        fixDistributedSchemas(cluster);
        cluster.schemaChange(withKeyspace("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3}"));
        cluster.schemaChange(withKeyspace("CREATE TABLE %s.t (k int PRIMARY KEY, v int)"));
        ICoordinator coordinator = cluster.coordinator(1);
        IInvokableInstance node1 = cluster.get(1);
        IInvokableInstance node2 = cluster.get(2);
        IInvokableInstance node3 = cluster.get(3);
        // write the first half of the rows with the second node dropping mutation requests,
        // so some hints will be created for that node
        dropWritesForNode2.set(true);
        for (int i = 0; i < NUM_ROWS / 2; i++) coordinator.execute(withKeyspace("INSERT INTO %s.t (k, v) VALUES (?, ?)"), QUORUM, i, i);
        dropWritesForNode2.set(false);
        // write the second half of the rows with the third node dropping mutations requests,
        // so some hints will be created for that node
        dropWritesForNode3.set(true);
        for (int i = NUM_ROWS / 2; i < NUM_ROWS; i++) coordinator.execute(withKeyspace("INSERT INTO %s.t (k, v) VALUES (?, ?)"), QUORUM, i, i);
        dropWritesForNode3.set(false);
        // wait until all the hints have been successfully applied to the nodes that have been dropping mutations
        waitUntilAsserted(() -> assertThat(countRows(node2)).isEqualTo(countRows(node3)).isEqualTo(NUM_ROWS));
        // Verify the metrics for the coordinator node, which is the only one actually sending hints.
        // The hint delivery errors that we have injected should have made the service try to send them again.
        // These retries are done periodically and in pages, so the retries may send again some of the hints that
        // were already successfully sent. This way, there may be more succeeded hints than actual hints/rows.
        waitUntilAsserted(() -> assertThat(countHintsSucceeded(node1)).isGreaterThanOrEqualTo(NUM_ROWS));
        waitUntilAsserted(() -> assertThat(countHintsFailed(node1)).isEqualTo(NUM_FAILURES_PER_NODE * 2));
        waitUntilAsserted(() -> assertThat(countHintsTimedOut(node1)).isEqualTo(NUM_TIMEOUTS_PER_NODE * 2));
        // verify delay metrics
        long numGlobalDelays = countGlobalDelays(node1);
        assertThat(numGlobalDelays).isGreaterThanOrEqualTo(NUM_ROWS);
        assertThat(countEndpointDelays(node1, node1)).isEqualTo(0);
        assertThat(countEndpointDelays(node1, node2)).isGreaterThan(0).isLessThanOrEqualTo(numGlobalDelays);
        assertThat(countEndpointDelays(node1, node3)).isGreaterThan(0).isLessThanOrEqualTo(numGlobalDelays);
        assertThat(countEndpointDelays(node1, node2) + countEndpointDelays(node1, node3)).isGreaterThanOrEqualTo(numGlobalDelays);
        // verify that the metrics for the not-coordinator nodes are zero
        for (IInvokableInstance node : Arrays.asList(node2, node3)) {
            assertThat(countHintsSucceeded(node)).isEqualTo(0);
            assertThat(countHintsFailed(node)).isEqualTo(0);
            assertThat(countHintsTimedOut(node)).isEqualTo(0);
            assertThat(countGlobalDelays(node)).isEqualTo(0);
            cluster.forEach(target -> assertThat(countEndpointDelays(node, target)).isEqualTo(0));
        }
    }
}
Also used : Arrays(java.util.Arrays) MethodDelegation(net.bytebuddy.implementation.MethodDelegation) ByteBuddy(net.bytebuddy.ByteBuddy) ElementMatchers.takesArguments(net.bytebuddy.matcher.ElementMatchers.takesArguments) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Callable(java.util.concurrent.Callable) MINUTES(java.util.concurrent.TimeUnit.MINUTES) NATIVE_PROTOCOL(org.apache.cassandra.distributed.api.Feature.NATIVE_PROTOCOL) ThrowingRunnable(org.awaitility.core.ThrowingRunnable) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ICoordinator(org.apache.cassandra.distributed.api.ICoordinator) Metrics(org.apache.cassandra.distributed.shared.Metrics) TestBaseImpl(org.apache.cassandra.distributed.test.TestBaseImpl) AssertionsForClassTypes.assertThat(org.assertj.core.api.AssertionsForClassTypes.assertThat) QUORUM(org.apache.cassandra.distributed.api.ConsistencyLevel.QUORUM) NETWORK(org.apache.cassandra.distributed.api.Feature.NETWORK) Awaitility.await(org.awaitility.Awaitility.await) ElementMatchers.named(net.bytebuddy.matcher.ElementMatchers.named) HintsServiceMetrics(org.apache.cassandra.metrics.HintsServiceMetrics) Test(org.junit.Test) Hint(org.apache.cassandra.hints.Hint) Verb(org.apache.cassandra.net.Verb) ClassLoadingStrategy(net.bytebuddy.dynamic.loading.ClassLoadingStrategy) SuperCall(net.bytebuddy.implementation.bind.annotation.SuperCall) IInvokableInstance(org.apache.cassandra.distributed.api.IInvokableInstance) Future(org.apache.cassandra.utils.concurrent.Future) Cluster(org.apache.cassandra.distributed.Cluster) SECONDS(java.util.concurrent.TimeUnit.SECONDS) GOSSIP(org.apache.cassandra.distributed.api.Feature.GOSSIP) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ICoordinator(org.apache.cassandra.distributed.api.ICoordinator) IInvokableInstance(org.apache.cassandra.distributed.api.IInvokableInstance) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Cluster(org.apache.cassandra.distributed.Cluster) Hint(org.apache.cassandra.hints.Hint) Test(org.junit.Test)

Aggregations

Arrays (java.util.Arrays)1 Callable (java.util.concurrent.Callable)1 MINUTES (java.util.concurrent.TimeUnit.MINUTES)1 SECONDS (java.util.concurrent.TimeUnit.SECONDS)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 ByteBuddy (net.bytebuddy.ByteBuddy)1 ClassLoadingStrategy (net.bytebuddy.dynamic.loading.ClassLoadingStrategy)1 MethodDelegation (net.bytebuddy.implementation.MethodDelegation)1 SuperCall (net.bytebuddy.implementation.bind.annotation.SuperCall)1 ElementMatchers.named (net.bytebuddy.matcher.ElementMatchers.named)1 ElementMatchers.takesArguments (net.bytebuddy.matcher.ElementMatchers.takesArguments)1 Cluster (org.apache.cassandra.distributed.Cluster)1 QUORUM (org.apache.cassandra.distributed.api.ConsistencyLevel.QUORUM)1 GOSSIP (org.apache.cassandra.distributed.api.Feature.GOSSIP)1 NATIVE_PROTOCOL (org.apache.cassandra.distributed.api.Feature.NATIVE_PROTOCOL)1 NETWORK (org.apache.cassandra.distributed.api.Feature.NETWORK)1 ICoordinator (org.apache.cassandra.distributed.api.ICoordinator)1 IInvokableInstance (org.apache.cassandra.distributed.api.IInvokableInstance)1 Metrics (org.apache.cassandra.distributed.shared.Metrics)1