use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.
the class LeaderCheckerTests method testFollowerFailsImmediatelyOnDisconnection.
public void testFollowerFailsImmediatelyOnDisconnection() {
final DiscoveryNode localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
final DiscoveryNode leader = new DiscoveryNode("leader", buildNewFakeTransportAddress(), Version.CURRENT);
final Response[] responseHolder = new Response[] { Response.SUCCESS };
final Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), localNode.getId()).build();
final DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue(settings, random());
final MockTransport mockTransport = new MockTransport() {
@Override
protected void onSendRequest(long requestId, String action, TransportRequest request, DiscoveryNode node) {
if (action.equals(HANDSHAKE_ACTION_NAME)) {
handleResponse(requestId, new TransportService.HandshakeResponse(node, ClusterName.DEFAULT, Version.CURRENT));
return;
}
assertThat(action, equalTo(LEADER_CHECK_ACTION_NAME));
assertEquals(node, leader);
final Response response = responseHolder[0];
deterministicTaskQueue.scheduleNow(new Runnable() {
@Override
public void run() {
switch(response) {
case SUCCESS:
handleResponse(requestId, Empty.INSTANCE);
break;
case REMOTE_ERROR:
handleRemoteError(requestId, new ConnectTransportException(leader, "simulated error"));
break;
case DIRECT_ERROR:
handleError(requestId, new ConnectTransportException(leader, "simulated error"));
}
}
@Override
public String toString() {
return response + " response to request " + requestId;
}
});
}
};
final TransportService transportService = mockTransport.createTransportService(settings, deterministicTaskQueue.getThreadPool(), NOOP_TRANSPORT_INTERCEPTOR, boundTransportAddress -> localNode, null, emptySet());
transportService.start();
transportService.acceptIncomingRequests();
final AtomicBoolean leaderFailed = new AtomicBoolean();
final LeaderChecker leaderChecker = new LeaderChecker(settings, transportService, e -> {
assertThat(e.getMessage(), anyOf(endsWith("disconnected"), endsWith("disconnected during check")));
assertTrue(leaderFailed.compareAndSet(false, true));
}, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"));
leaderChecker.updateLeader(leader);
{
while (deterministicTaskQueue.getCurrentTimeMillis() < 10 * LEADER_CHECK_INTERVAL_SETTING.get(Settings.EMPTY).millis()) {
deterministicTaskQueue.runAllRunnableTasks();
deterministicTaskQueue.advanceTime();
}
deterministicTaskQueue.runAllRunnableTasks();
assertFalse(leaderFailed.get());
responseHolder[0] = Response.REMOTE_ERROR;
deterministicTaskQueue.advanceTime();
deterministicTaskQueue.runAllRunnableTasks();
assertTrue(leaderFailed.get());
}
leaderChecker.updateLeader(null);
deterministicTaskQueue.runAllTasks();
leaderFailed.set(false);
responseHolder[0] = Response.SUCCESS;
leaderChecker.updateLeader(leader);
{
while (deterministicTaskQueue.getCurrentTimeMillis() < 10 * LEADER_CHECK_INTERVAL_SETTING.get(Settings.EMPTY).millis()) {
deterministicTaskQueue.runAllRunnableTasks();
deterministicTaskQueue.advanceTime();
}
deterministicTaskQueue.runAllRunnableTasks();
assertFalse(leaderFailed.get());
responseHolder[0] = Response.DIRECT_ERROR;
deterministicTaskQueue.advanceTime();
deterministicTaskQueue.runAllRunnableTasks();
assertTrue(leaderFailed.get());
}
deterministicTaskQueue.runAllTasks();
leaderFailed.set(false);
responseHolder[0] = Response.SUCCESS;
leaderChecker.updateLeader(leader);
{
// need to connect first for disconnect to have any effect
transportService.connectToNode(leader);
transportService.disconnectFromNode(leader);
deterministicTaskQueue.runAllRunnableTasks();
assertTrue(leaderFailed.get());
}
}
use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.
the class PreVoteCollectorTests method createObjects.
@Before
public void createObjects() {
Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), "node").build();
deterministicTaskQueue = new DeterministicTaskQueue(settings, random());
final MockTransport mockTransport = new MockTransport() {
@Override
protected void onSendRequest(final long requestId, final String action, final TransportRequest request, final DiscoveryNode node) {
super.onSendRequest(requestId, action, request, node);
assertThat(action, is(REQUEST_PRE_VOTE_ACTION_NAME));
assertThat(request, instanceOf(PreVoteRequest.class));
assertThat(node, not(equalTo(localNode)));
PreVoteRequest preVoteRequest = (PreVoteRequest) request;
assertThat(preVoteRequest.getSourceNode(), equalTo(localNode));
deterministicTaskQueue.scheduleNow(new Runnable() {
@Override
public void run() {
final PreVoteResponse response = responsesByNode.get(node);
if (response == null) {
handleRemoteError(requestId, new ConnectTransportException(node, "no response"));
} else {
handleResponse(requestId, response);
}
}
@Override
public String toString() {
return "response to " + request + " from " + node;
}
});
}
@Override
public void handleRemoteError(long requestId, Throwable t) {
logger.warn("Remote error", t);
}
};
lastAcceptedTerm = randomNonNegativeLong();
currentTerm = randomLongBetween(lastAcceptedTerm, Long.MAX_VALUE);
lastAcceptedVersion = randomNonNegativeLong();
localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
responsesByNode.put(localNode, new PreVoteResponse(currentTerm, lastAcceptedTerm, lastAcceptedVersion));
healthStatus = new StatusInfo(HEALTHY, "healthy-info");
transportService = mockTransport.createTransportService(settings, deterministicTaskQueue.getThreadPool(), TransportService.NOOP_TRANSPORT_INTERCEPTOR, boundTransportAddress -> localNode, null, emptySet());
transportService.start();
transportService.acceptIncomingRequests();
preVoteCollector = new PreVoteCollector(transportService, () -> {
assert electionOccurred == false;
electionOccurred = true;
}, l -> {
}, ElectionStrategy.DEFAULT_INSTANCE, () -> healthStatus);
preVoteCollector.update(getLocalPreVoteResponse(), null);
}
use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.
the class PreVoteCollectorTests method testUnhealthyNodeDoesNotOfferPreVote.
public void testUnhealthyNodeDoesNotOfferPreVote() {
final long term = randomNonNegativeLong();
healthStatus = new StatusInfo(UNHEALTHY, "unhealthy-info");
final DiscoveryNode otherNode = new DiscoveryNode("other-node", buildNewFakeTransportAddress(), Version.CURRENT);
RemoteTransportException remoteTransportException = expectThrows(RemoteTransportException.class, () -> handlePreVoteRequestViaTransportService(new PreVoteRequest(otherNode, term)));
assertThat(remoteTransportException.getCause(), instanceOf(NodeHealthCheckFailureException.class));
}
use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.
the class PreVoteCollectorTests method testNoElectionStartIfLocalNodeIsOnlyNodeAndUnhealthy.
public void testNoElectionStartIfLocalNodeIsOnlyNodeAndUnhealthy() {
healthStatus = new StatusInfo(UNHEALTHY, "unhealthy-info");
preVoteCollector.update(getLocalPreVoteResponse(), null);
startAndRunCollector(localNode);
assertFalse(electionOccurred);
}
use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.
the class CoordinatorTests method testUnhealthyNodesGetsRemoved.
public void testUnhealthyNodesGetsRemoved() {
AtomicReference<StatusInfo> healthStatusInfo = new AtomicReference<>(new StatusInfo(HEALTHY, "healthy-info"));
try (Cluster cluster = new Cluster(3)) {
cluster.runRandomly();
cluster.stabilise();
final ClusterNode leader = cluster.getAnyLeader();
logger.info("--> adding two new healthy nodes");
ClusterNode newNode1 = cluster.new ClusterNode(nextNodeIndex.getAndIncrement(), true, leader.nodeSettings, () -> healthStatusInfo.get());
ClusterNode newNode2 = cluster.new ClusterNode(nextNodeIndex.getAndIncrement(), true, leader.nodeSettings, () -> healthStatusInfo.get());
cluster.clusterNodes.add(newNode1);
cluster.clusterNodes.add(newNode2);
cluster.stabilise(// The first pinging discovers the master
defaultMillis(DISCOVERY_FIND_PEERS_INTERVAL_SETTING) + // One message delay to send a join
DEFAULT_DELAY_VARIABILITY + // followup reconfiguration
2 * 2 * DEFAULT_CLUSTER_STATE_UPDATE_DELAY);
{
assertThat(leader.coordinator.getMode(), is(Mode.LEADER));
final VotingConfiguration lastCommittedConfiguration = leader.getLastAppliedClusterState().getLastCommittedConfiguration();
assertThat(lastCommittedConfiguration + " should be all nodes", lastCommittedConfiguration.getNodeIds(), equalTo(cluster.clusterNodes.stream().map(ClusterNode::getId).collect(Collectors.toSet())));
}
logger.info("setting auto-shrink reconfiguration to true");
leader.submitSetAutoShrinkVotingConfiguration(true);
cluster.stabilise(DEFAULT_CLUSTER_STATE_UPDATE_DELAY);
assertTrue(CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION.get(leader.getLastAppliedClusterState().metadata().settings()));
logger.info("--> changing health of newly added nodes to unhealthy");
healthStatusInfo.getAndSet(new StatusInfo(UNHEALTHY, "unhealthy-info"));
cluster.stabilise(Math.max(// Each follower may have just sent a leader check, which receives no response
defaultMillis(LEADER_CHECK_TIMEOUT_SETTING) + // then wait for the follower to check the leader
defaultMillis(LEADER_CHECK_INTERVAL_SETTING) + // then wait for the exception response
DEFAULT_DELAY_VARIABILITY, // ALSO the leader may have just sent a follower check, which receives no response
defaultMillis(FOLLOWER_CHECK_TIMEOUT_SETTING) + // wait for the leader to check its followers
defaultMillis(FOLLOWER_CHECK_INTERVAL_SETTING) + // then wait for the exception response
DEFAULT_DELAY_VARIABILITY) + // wait for the removal to be committed
DEFAULT_CLUSTER_STATE_UPDATE_DELAY + // then wait for the followup reconfiguration
DEFAULT_CLUSTER_STATE_UPDATE_DELAY);
{
final ClusterNode newLeader = cluster.getAnyLeader();
final VotingConfiguration lastCommittedConfiguration = newLeader.getLastAppliedClusterState().getLastCommittedConfiguration();
assertThat(lastCommittedConfiguration + " should be 3 nodes", lastCommittedConfiguration.getNodeIds().size(), equalTo(3));
assertFalse(lastCommittedConfiguration.getNodeIds().contains(newNode1.getId()));
assertFalse(lastCommittedConfiguration.getNodeIds().contains(newNode2.getId()));
}
}
}
Aggregations