use of org.apache.cassandra.distributed.shared.Metrics in project cassandra by apache.
the class HintsServiceMetricsTest method testHintsServiceMetrics.
@Test
public void testHintsServiceMetrics() throws Exception {
// setup a 3-node cluster with a bytebuddy injection that makes the writting of some hints to fail
try (Cluster cluster = builder().withNodes(3).withConfig(config -> config.with(NETWORK, GOSSIP, NATIVE_PROTOCOL)).withInstanceInitializer(FailHints::install).start()) {
// setup a message filter to drop some of the hint request messages from node1
AtomicInteger hintsNode2 = new AtomicInteger();
AtomicInteger hintsNode3 = new AtomicInteger();
cluster.filters().verbs(Verb.HINT_REQ.id).from(1).messagesMatching((from, to, message) -> (to == 2 && hintsNode2.incrementAndGet() <= NUM_TIMEOUTS_PER_NODE) || (to == 3 && hintsNode3.incrementAndGet() <= NUM_TIMEOUTS_PER_NODE)).drop();
// setup a message filter to drop mutations requests from node1, so it creates hints for those mutations
AtomicBoolean dropWritesForNode2 = new AtomicBoolean(false);
AtomicBoolean dropWritesForNode3 = new AtomicBoolean(false);
cluster.filters().verbs(Verb.MUTATION_REQ.id).from(1).messagesMatching((from, to, message) -> (to == 2 && dropWritesForNode2.get()) || (to == 3 && dropWritesForNode3.get())).drop();
// fix under replicated keyspaces so they don't produce hint requests while we are dropping mutations
fixDistributedSchemas(cluster);
cluster.schemaChange(withKeyspace("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3}"));
cluster.schemaChange(withKeyspace("CREATE TABLE %s.t (k int PRIMARY KEY, v int)"));
ICoordinator coordinator = cluster.coordinator(1);
IInvokableInstance node1 = cluster.get(1);
IInvokableInstance node2 = cluster.get(2);
IInvokableInstance node3 = cluster.get(3);
// write the first half of the rows with the second node dropping mutation requests,
// so some hints will be created for that node
dropWritesForNode2.set(true);
for (int i = 0; i < NUM_ROWS / 2; i++) coordinator.execute(withKeyspace("INSERT INTO %s.t (k, v) VALUES (?, ?)"), QUORUM, i, i);
dropWritesForNode2.set(false);
// write the second half of the rows with the third node dropping mutations requests,
// so some hints will be created for that node
dropWritesForNode3.set(true);
for (int i = NUM_ROWS / 2; i < NUM_ROWS; i++) coordinator.execute(withKeyspace("INSERT INTO %s.t (k, v) VALUES (?, ?)"), QUORUM, i, i);
dropWritesForNode3.set(false);
// wait until all the hints have been successfully applied to the nodes that have been dropping mutations
waitUntilAsserted(() -> assertThat(countRows(node2)).isEqualTo(countRows(node3)).isEqualTo(NUM_ROWS));
// Verify the metrics for the coordinator node, which is the only one actually sending hints.
// The hint delivery errors that we have injected should have made the service try to send them again.
// These retries are done periodically and in pages, so the retries may send again some of the hints that
// were already successfully sent. This way, there may be more succeeded hints than actual hints/rows.
waitUntilAsserted(() -> assertThat(countHintsSucceeded(node1)).isGreaterThanOrEqualTo(NUM_ROWS));
waitUntilAsserted(() -> assertThat(countHintsFailed(node1)).isEqualTo(NUM_FAILURES_PER_NODE * 2));
waitUntilAsserted(() -> assertThat(countHintsTimedOut(node1)).isEqualTo(NUM_TIMEOUTS_PER_NODE * 2));
// verify delay metrics
long numGlobalDelays = countGlobalDelays(node1);
assertThat(numGlobalDelays).isGreaterThanOrEqualTo(NUM_ROWS);
assertThat(countEndpointDelays(node1, node1)).isEqualTo(0);
assertThat(countEndpointDelays(node1, node2)).isGreaterThan(0).isLessThanOrEqualTo(numGlobalDelays);
assertThat(countEndpointDelays(node1, node3)).isGreaterThan(0).isLessThanOrEqualTo(numGlobalDelays);
assertThat(countEndpointDelays(node1, node2) + countEndpointDelays(node1, node3)).isGreaterThanOrEqualTo(numGlobalDelays);
// verify that the metrics for the not-coordinator nodes are zero
for (IInvokableInstance node : Arrays.asList(node2, node3)) {
assertThat(countHintsSucceeded(node)).isEqualTo(0);
assertThat(countHintsFailed(node)).isEqualTo(0);
assertThat(countHintsTimedOut(node)).isEqualTo(0);
assertThat(countGlobalDelays(node)).isEqualTo(0);
cluster.forEach(target -> assertThat(countEndpointDelays(node, target)).isEqualTo(0));
}
}
}
Aggregations