use of org.apache.ratis.server.metrics.RaftServerMetricsImpl in project incubator-ratis by apache.
the class RaftSnapshotBaseTest method verifyInstallSnapshotMetric.
protected void verifyInstallSnapshotMetric(RaftServer.Division leader) {
final Counter installSnapshotCounter = ((RaftServerMetricsImpl) leader.getRaftServerMetrics()).getCounter(RATIS_SERVER_INSTALL_SNAPSHOT_COUNT);
Assert.assertNotNull(installSnapshotCounter);
Assert.assertTrue(installSnapshotCounter.getCount() >= 1);
}
use of org.apache.ratis.server.metrics.RaftServerMetricsImpl in project incubator-ratis by apache.
the class TestRaftServerSlownessDetection method testSlownessDetection.
@Test
public void testSlownessDetection() throws Exception {
RaftServer.Division leaderServer = RaftTestUtil.waitForLeader(cluster);
long slownessTimeout = RaftServerConfigKeys.Rpc.slownessTimeout(cluster.getProperties()).toIntExact(TimeUnit.MILLISECONDS);
RaftServer.Division failedFollower = cluster.getFollowers().get(0);
final RatisMetricRegistry ratisMetricRegistry = ((RaftServerMetricsImpl) leaderServer.getRaftServerMetrics()).getRegistry();
SortedMap<String, Gauge> heartbeatElapsedTimeGauges = ratisMetricRegistry.getGauges((s, metric) -> s.contains("lastHeartbeatElapsedTime"));
String followerId = failedFollower.getId().toString();
Gauge metric = heartbeatElapsedTimeGauges.entrySet().parallelStream().filter(e -> e.getKey().contains(followerId)).iterator().next().getValue();
long followerHeartBeatElapsedMetric = (long) metric.getValue();
// fail the node and wait for the callback to be triggered
cluster.killServer(failedFollower.getId());
Thread.sleep(slownessTimeout * 2);
long followerHeartBeatElapsedMetricNew = (long) metric.getValue();
Assert.assertTrue(followerHeartBeatElapsedMetricNew > followerHeartBeatElapsedMetric);
// Followers should not get any failed not notification
for (RaftServer.Division followerServer : cluster.getFollowers()) {
Assert.assertNull(SimpleStateMachine4Testing.get(followerServer).getSlownessInfo());
}
// the leader should get notification that the follower has failed now
RaftProtos.RoleInfoProto roleInfoProto = SimpleStateMachine4Testing.get(cluster.getLeader()).getSlownessInfo();
Assert.assertNotNull(roleInfoProto);
List<RaftProtos.ServerRpcProto> followers = roleInfoProto.getLeaderInfo().getFollowerInfoList();
// Assert that the node shutdown is lagging behind
for (RaftProtos.ServerRpcProto serverProto : followers) {
if (RaftPeerId.valueOf(serverProto.getId().getId()).equals(failedFollower.getId())) {
Assert.assertTrue(serverProto.getLastRpcElapsedTimeMs() > slownessTimeout);
}
}
}
use of org.apache.ratis.server.metrics.RaftServerMetricsImpl in project incubator-ratis by apache.
the class TestRetryCacheMetrics method setUp.
@BeforeClass
public static void setUp() {
RaftGroupId raftGroupId = RaftGroupId.randomId();
RaftPeerId raftPeerId = RaftPeerId.valueOf("TestId");
RaftGroupMemberId raftGroupMemberId = RaftGroupMemberId.valueOf(raftPeerId, raftGroupId);
retryCache = new RetryCacheImpl(RaftServerConfigKeys.RetryCache.EXPIRY_TIME_DEFAULT, null);
final RaftServerMetricsImpl raftServerMetrics = RaftServerMetricsImpl.computeIfAbsentRaftServerMetrics(raftGroupMemberId, () -> null, retryCache::getStatistics);
ratisMetricRegistry = raftServerMetrics.getRegistry();
}
use of org.apache.ratis.server.metrics.RaftServerMetricsImpl in project incubator-ratis by apache.
the class LogAppenderTests method testFollowerHeartbeatMetric.
@Test
public void testFollowerHeartbeatMetric() throws IOException, InterruptedException {
// Start a 3 node Ratis ring.
final MiniRaftCluster cluster = newCluster(3);
cluster.start();
final RaftServer.Division leaderServer = waitForLeader(cluster);
// Write 10 messages to leader.
try (RaftClient client = cluster.createClient(leaderServer.getId())) {
for (int i = 1; i <= 10; i++) {
client.io().send(new RaftTestUtil.SimpleMessage("Msg to make leader ready " + i));
}
} catch (IOException e) {
throw e;
}
final RatisMetricRegistry ratisMetricRegistry = ((RaftServerMetricsImpl) leaderServer.getRaftServerMetrics()).getRegistry();
// Get all last_heartbeat_elapsed_time metric gauges. Should be equal to number of followers.
SortedMap<String, Gauge> heartbeatElapsedTimeGauges = ratisMetricRegistry.getGauges((s, metric) -> s.contains("lastHeartbeatElapsedTime"));
assertTrue(heartbeatElapsedTimeGauges.size() == 2);
for (RaftServer.Division followerServer : cluster.getFollowers()) {
String followerId = followerServer.getId().toString();
Gauge metric = heartbeatElapsedTimeGauges.entrySet().parallelStream().filter(e -> e.getKey().contains(followerId)).iterator().next().getValue();
// Metric for this follower exists.
assertTrue(metric != null);
// Metric in nanos > 0.
assertTrue((long) metric.getValue() > 0);
// Try to get Heartbeat metrics for follower.
final RaftServerMetricsImpl followerMetrics = (RaftServerMetricsImpl) followerServer.getRaftServerMetrics();
// Metric should not exist. It only exists in leader.
assertTrue(followerMetrics.getRegistry().getGauges((s, m) -> s.contains("lastHeartbeatElapsedTime")).isEmpty());
for (boolean heartbeat : new boolean[] { true, false }) {
assertTrue(followerMetrics.getFollowerAppendEntryTimer(heartbeat).getMeanRate() > 0.0d);
assertTrue(followerMetrics.getFollowerAppendEntryTimer(heartbeat).getCount() > 0L);
}
}
}
use of org.apache.ratis.server.metrics.RaftServerMetricsImpl in project incubator-ratis by apache.
the class TestRatisServerMetricsBase method runTestClientFailedRequest.
void runTestClientFailedRequest(CLUSTER cluster) throws InterruptedException, IOException, ExecutionException {
final RaftServer.Division leaderImpl = RaftTestUtil.waitForLeader(cluster);
ClientId clientId = ClientId.randomId();
// StaleRead with Long.MAX_VALUE minIndex will fail.
RaftClientRequest r = RaftClientRequest.newBuilder().setClientId(clientId).setLeaderId(leaderImpl.getId()).setGroupId(cluster.getGroupId()).setCallId(0).setMessage(Message.EMPTY).setType(RaftClientRequest.staleReadRequestType(Long.MAX_VALUE)).build();
final CompletableFuture<RaftClientReply> f = leaderImpl.getRaftServer().submitClientRequestAsync(r);
Assert.assertTrue(!f.get().isSuccess());
assertEquals(1L, ((RaftServerMetricsImpl) leaderImpl.getRaftServerMetrics()).getCounter(RATIS_SERVER_FAILED_CLIENT_STALE_READ_COUNT).getCount());
}
Aggregations