use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.
the class NodeJoinTests method testJoinFollowerWithHigherTerm.
public void testJoinFollowerWithHigherTerm() throws Exception {
DiscoveryNode node0 = newNode(0, true);
DiscoveryNode node1 = newNode(1, true);
long initialTerm = randomLongBetween(1, 10);
long initialVersion = randomLongBetween(1, 10);
setupFakeMasterServiceAndCoordinator(initialTerm, initialState(node0, initialTerm, initialVersion, VotingConfiguration.of(node0)), () -> new StatusInfo(HEALTHY, "healthy-info"));
long newTerm = initialTerm + randomLongBetween(1, 10);
handleStartJoinFrom(node1, newTerm);
handleFollowerCheckFrom(node1, newTerm);
long newerTerm = newTerm + randomLongBetween(1, 10);
joinNodeAndRun(new JoinRequest(node1, newerTerm, Optional.of(new Join(node1, node0, newerTerm, initialTerm, initialVersion))));
assertTrue(isLocalNodeElectedMaster());
}
use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.
the class NodeJoinTests method testJoinWithHigherTermElectsLeader.
public void testJoinWithHigherTermElectsLeader() {
DiscoveryNode node0 = newNode(0, true);
DiscoveryNode node1 = newNode(1, true);
long initialTerm = randomLongBetween(1, 10);
long initialVersion = randomLongBetween(1, 10);
setupFakeMasterServiceAndCoordinator(initialTerm, initialState(node0, initialTerm, initialVersion, VotingConfiguration.of(randomFrom(node0, node1))), () -> new StatusInfo(HEALTHY, "healthy-info"));
assertFalse(isLocalNodeElectedMaster());
assertNull(coordinator.getStateForMasterService().nodes().getMasterNodeId());
long newTerm = initialTerm + randomLongBetween(1, 10);
SimpleFuture fut = joinNodeAsync(new JoinRequest(node1, newTerm, Optional.of(new Join(node1, node0, newTerm, initialTerm, initialVersion))));
assertEquals(Coordinator.Mode.LEADER, coordinator.getMode());
assertNull(coordinator.getStateForMasterService().nodes().getMasterNodeId());
deterministicTaskQueue.runAllRunnableTasks();
assertTrue(fut.isDone());
assertTrue(isLocalNodeElectedMaster());
assertTrue(coordinator.getStateForMasterService().nodes().isLocalNodeElectedMaster());
}
use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.
the class FollowersCheckerTests method testFailureCounterResetsOnSuccess.
public void testFailureCounterResetsOnSuccess() {
final Settings settings = randomSettings();
final int retryCount = FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings);
final int maxRecoveries = randomIntBetween(3, 10);
// passes just enough checks to keep it alive, up to maxRecoveries, and then fails completely
testBehaviourOfFailingNode(settings, new Supplier<Empty>() {
private int checkIndex;
private int recoveries;
@Override
public Empty get() {
checkIndex++;
if (checkIndex % retryCount == 0 && recoveries < maxRecoveries) {
recoveries++;
return Empty.INSTANCE;
}
throw new OpenSearchException("simulated exception");
}
}, "followers check retry count exceeded", (FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) * (maxRecoveries + 1) - 1) * FOLLOWER_CHECK_INTERVAL_SETTING.get(settings).millis(), () -> new StatusInfo(HEALTHY, "healthy-info"));
}
use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.
the class LeaderCheckerTests method testFollowerFailsImmediatelyOnHealthCheckFailure.
public void testFollowerFailsImmediatelyOnHealthCheckFailure() {
final DiscoveryNode localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
final DiscoveryNode leader = new DiscoveryNode("leader", buildNewFakeTransportAddress(), Version.CURRENT);
final Response[] responseHolder = new Response[] { Response.SUCCESS };
final Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), localNode.getId()).build();
final DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue(settings, random());
final MockTransport mockTransport = new MockTransport() {
@Override
protected void onSendRequest(long requestId, String action, TransportRequest request, DiscoveryNode node) {
if (action.equals(HANDSHAKE_ACTION_NAME)) {
handleResponse(requestId, new TransportService.HandshakeResponse(node, ClusterName.DEFAULT, Version.CURRENT));
return;
}
assertThat(action, equalTo(LEADER_CHECK_ACTION_NAME));
assertEquals(node, leader);
final Response response = responseHolder[0];
deterministicTaskQueue.scheduleNow(new Runnable() {
@Override
public void run() {
switch(response) {
case SUCCESS:
handleResponse(requestId, Empty.INSTANCE);
break;
case REMOTE_ERROR:
handleRemoteError(requestId, new NodeHealthCheckFailureException("simulated error"));
break;
}
}
@Override
public String toString() {
return response + " response to request " + requestId;
}
});
}
};
final TransportService transportService = mockTransport.createTransportService(settings, deterministicTaskQueue.getThreadPool(), NOOP_TRANSPORT_INTERCEPTOR, boundTransportAddress -> localNode, null, emptySet());
transportService.start();
transportService.acceptIncomingRequests();
final AtomicBoolean leaderFailed = new AtomicBoolean();
final LeaderChecker leaderChecker = new LeaderChecker(settings, transportService, e -> {
assertThat(e.getMessage(), endsWith("failed health checks"));
assertTrue(leaderFailed.compareAndSet(false, true));
}, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"));
leaderChecker.updateLeader(leader);
{
while (deterministicTaskQueue.getCurrentTimeMillis() < 10 * LEADER_CHECK_INTERVAL_SETTING.get(Settings.EMPTY).millis()) {
deterministicTaskQueue.runAllRunnableTasks();
deterministicTaskQueue.advanceTime();
}
deterministicTaskQueue.runAllRunnableTasks();
assertFalse(leaderFailed.get());
responseHolder[0] = Response.REMOTE_ERROR;
deterministicTaskQueue.advanceTime();
deterministicTaskQueue.runAllRunnableTasks();
assertTrue(leaderFailed.get());
}
}
use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.
the class LeaderCheckerTests method testLeaderBehaviour.
public void testLeaderBehaviour() {
final DiscoveryNode localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
final DiscoveryNode otherNode = new DiscoveryNode("other-node", buildNewFakeTransportAddress(), Version.CURRENT);
final Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), localNode.getId()).build();
final DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue(settings, random());
final CapturingTransport capturingTransport = new CapturingTransport();
AtomicReference<StatusInfo> nodeHealthServiceStatus = new AtomicReference<>(new StatusInfo(UNHEALTHY, "unhealthy-info"));
final TransportService transportService = capturingTransport.createTransportService(settings, deterministicTaskQueue.getThreadPool(), NOOP_TRANSPORT_INTERCEPTOR, boundTransportAddress -> localNode, null, emptySet());
transportService.start();
transportService.acceptIncomingRequests();
final LeaderChecker leaderChecker = new LeaderChecker(settings, transportService, e -> fail("shouldn't be checking anything"), () -> nodeHealthServiceStatus.get());
final DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()).masterNodeId(localNode.getId()).build();
{
leaderChecker.setCurrentNodes(discoveryNodes);
final CapturingTransportResponseHandler handler = new CapturingTransportResponseHandler();
transportService.sendRequest(localNode, LEADER_CHECK_ACTION_NAME, new LeaderCheckRequest(otherNode), handler);
deterministicTaskQueue.runAllTasks();
assertFalse(handler.successfulResponseReceived);
assertThat(handler.transportException.getRootCause(), instanceOf(NodeHealthCheckFailureException.class));
NodeHealthCheckFailureException cause = (NodeHealthCheckFailureException) handler.transportException.getRootCause();
assertThat(cause.getMessage(), equalTo("rejecting leader check from [" + otherNode + "] since node is unhealthy [unhealthy-info]"));
}
nodeHealthServiceStatus.getAndSet(new StatusInfo(HEALTHY, "healthy-info"));
{
leaderChecker.setCurrentNodes(discoveryNodes);
final CapturingTransportResponseHandler handler = new CapturingTransportResponseHandler();
transportService.sendRequest(localNode, LEADER_CHECK_ACTION_NAME, new LeaderCheckRequest(otherNode), handler);
deterministicTaskQueue.runAllTasks();
assertFalse(handler.successfulResponseReceived);
assertThat(handler.transportException.getRootCause(), instanceOf(CoordinationStateRejectedException.class));
CoordinationStateRejectedException cause = (CoordinationStateRejectedException) handler.transportException.getRootCause();
assertThat(cause.getMessage(), equalTo("rejecting leader check since [" + otherNode + "] has been removed from the cluster"));
}
{
leaderChecker.setCurrentNodes(DiscoveryNodes.builder(discoveryNodes).add(otherNode).build());
final CapturingTransportResponseHandler handler = new CapturingTransportResponseHandler();
transportService.sendRequest(localNode, LEADER_CHECK_ACTION_NAME, new LeaderCheckRequest(otherNode), handler);
deterministicTaskQueue.runAllTasks();
assertTrue(handler.successfulResponseReceived);
assertThat(handler.transportException, nullValue());
}
{
leaderChecker.setCurrentNodes(DiscoveryNodes.builder(discoveryNodes).add(otherNode).masterNodeId(null).build());
final CapturingTransportResponseHandler handler = new CapturingTransportResponseHandler();
transportService.sendRequest(localNode, LEADER_CHECK_ACTION_NAME, new LeaderCheckRequest(otherNode), handler);
deterministicTaskQueue.runAllTasks();
assertFalse(handler.successfulResponseReceived);
assertThat(handler.transportException.getRootCause(), instanceOf(CoordinationStateRejectedException.class));
CoordinationStateRejectedException cause = (CoordinationStateRejectedException) handler.transportException.getRootCause();
assertThat(cause.getMessage(), equalTo("rejecting leader check from [" + otherNode + "] sent to a node that is no longer the master"));
}
}
Aggregations