Search in sources :

Example 6 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class NodeJoinTests method testJoinFollowerWithHigherTerm.

public void testJoinFollowerWithHigherTerm() throws Exception {
    DiscoveryNode node0 = newNode(0, true);
    DiscoveryNode node1 = newNode(1, true);
    long initialTerm = randomLongBetween(1, 10);
    long initialVersion = randomLongBetween(1, 10);
    setupFakeMasterServiceAndCoordinator(initialTerm, initialState(node0, initialTerm, initialVersion, VotingConfiguration.of(node0)), () -> new StatusInfo(HEALTHY, "healthy-info"));
    long newTerm = initialTerm + randomLongBetween(1, 10);
    handleStartJoinFrom(node1, newTerm);
    handleFollowerCheckFrom(node1, newTerm);
    long newerTerm = newTerm + randomLongBetween(1, 10);
    joinNodeAndRun(new JoinRequest(node1, newerTerm, Optional.of(new Join(node1, node0, newerTerm, initialTerm, initialVersion))));
    assertTrue(isLocalNodeElectedMaster());
}
Also used : DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) StatusInfo(org.opensearch.monitor.StatusInfo)

Example 7 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class NodeJoinTests method testJoinWithHigherTermElectsLeader.

public void testJoinWithHigherTermElectsLeader() {
    DiscoveryNode node0 = newNode(0, true);
    DiscoveryNode node1 = newNode(1, true);
    long initialTerm = randomLongBetween(1, 10);
    long initialVersion = randomLongBetween(1, 10);
    setupFakeMasterServiceAndCoordinator(initialTerm, initialState(node0, initialTerm, initialVersion, VotingConfiguration.of(randomFrom(node0, node1))), () -> new StatusInfo(HEALTHY, "healthy-info"));
    assertFalse(isLocalNodeElectedMaster());
    assertNull(coordinator.getStateForMasterService().nodes().getMasterNodeId());
    long newTerm = initialTerm + randomLongBetween(1, 10);
    SimpleFuture fut = joinNodeAsync(new JoinRequest(node1, newTerm, Optional.of(new Join(node1, node0, newTerm, initialTerm, initialVersion))));
    assertEquals(Coordinator.Mode.LEADER, coordinator.getMode());
    assertNull(coordinator.getStateForMasterService().nodes().getMasterNodeId());
    deterministicTaskQueue.runAllRunnableTasks();
    assertTrue(fut.isDone());
    assertTrue(isLocalNodeElectedMaster());
    assertTrue(coordinator.getStateForMasterService().nodes().isLocalNodeElectedMaster());
}
Also used : DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) StatusInfo(org.opensearch.monitor.StatusInfo)

Example 8 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class FollowersCheckerTests method testFailureCounterResetsOnSuccess.

public void testFailureCounterResetsOnSuccess() {
    final Settings settings = randomSettings();
    final int retryCount = FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings);
    final int maxRecoveries = randomIntBetween(3, 10);
    // passes just enough checks to keep it alive, up to maxRecoveries, and then fails completely
    testBehaviourOfFailingNode(settings, new Supplier<Empty>() {

        private int checkIndex;

        private int recoveries;

        @Override
        public Empty get() {
            checkIndex++;
            if (checkIndex % retryCount == 0 && recoveries < maxRecoveries) {
                recoveries++;
                return Empty.INSTANCE;
            }
            throw new OpenSearchException("simulated exception");
        }
    }, "followers check retry count exceeded", (FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) * (maxRecoveries + 1) - 1) * FOLLOWER_CHECK_INTERVAL_SETTING.get(settings).millis(), () -> new StatusInfo(HEALTHY, "healthy-info"));
}
Also used : Empty(org.opensearch.transport.TransportResponse.Empty) StatusInfo(org.opensearch.monitor.StatusInfo) OpenSearchException(org.opensearch.OpenSearchException) Settings(org.opensearch.common.settings.Settings)

Example 9 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class LeaderCheckerTests method testFollowerFailsImmediatelyOnHealthCheckFailure.

public void testFollowerFailsImmediatelyOnHealthCheckFailure() {
    final DiscoveryNode localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
    final DiscoveryNode leader = new DiscoveryNode("leader", buildNewFakeTransportAddress(), Version.CURRENT);
    final Response[] responseHolder = new Response[] { Response.SUCCESS };
    final Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), localNode.getId()).build();
    final DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue(settings, random());
    final MockTransport mockTransport = new MockTransport() {

        @Override
        protected void onSendRequest(long requestId, String action, TransportRequest request, DiscoveryNode node) {
            if (action.equals(HANDSHAKE_ACTION_NAME)) {
                handleResponse(requestId, new TransportService.HandshakeResponse(node, ClusterName.DEFAULT, Version.CURRENT));
                return;
            }
            assertThat(action, equalTo(LEADER_CHECK_ACTION_NAME));
            assertEquals(node, leader);
            final Response response = responseHolder[0];
            deterministicTaskQueue.scheduleNow(new Runnable() {

                @Override
                public void run() {
                    switch(response) {
                        case SUCCESS:
                            handleResponse(requestId, Empty.INSTANCE);
                            break;
                        case REMOTE_ERROR:
                            handleRemoteError(requestId, new NodeHealthCheckFailureException("simulated error"));
                            break;
                    }
                }

                @Override
                public String toString() {
                    return response + " response to request " + requestId;
                }
            });
        }
    };
    final TransportService transportService = mockTransport.createTransportService(settings, deterministicTaskQueue.getThreadPool(), NOOP_TRANSPORT_INTERCEPTOR, boundTransportAddress -> localNode, null, emptySet());
    transportService.start();
    transportService.acceptIncomingRequests();
    final AtomicBoolean leaderFailed = new AtomicBoolean();
    final LeaderChecker leaderChecker = new LeaderChecker(settings, transportService, e -> {
        assertThat(e.getMessage(), endsWith("failed health checks"));
        assertTrue(leaderFailed.compareAndSet(false, true));
    }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"));
    leaderChecker.updateLeader(leader);
    {
        while (deterministicTaskQueue.getCurrentTimeMillis() < 10 * LEADER_CHECK_INTERVAL_SETTING.get(Settings.EMPTY).millis()) {
            deterministicTaskQueue.runAllRunnableTasks();
            deterministicTaskQueue.advanceTime();
        }
        deterministicTaskQueue.runAllRunnableTasks();
        assertFalse(leaderFailed.get());
        responseHolder[0] = Response.REMOTE_ERROR;
        deterministicTaskQueue.advanceTime();
        deterministicTaskQueue.runAllRunnableTasks();
        assertTrue(leaderFailed.get());
    }
}
Also used : DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) TransportRequest(org.opensearch.transport.TransportRequest) TransportResponse(org.opensearch.transport.TransportResponse) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TransportService(org.opensearch.transport.TransportService) StatusInfo(org.opensearch.monitor.StatusInfo) MockTransport(org.opensearch.test.transport.MockTransport) Settings(org.opensearch.common.settings.Settings)

Example 10 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class LeaderCheckerTests method testLeaderBehaviour.

public void testLeaderBehaviour() {
    final DiscoveryNode localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
    final DiscoveryNode otherNode = new DiscoveryNode("other-node", buildNewFakeTransportAddress(), Version.CURRENT);
    final Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), localNode.getId()).build();
    final DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue(settings, random());
    final CapturingTransport capturingTransport = new CapturingTransport();
    AtomicReference<StatusInfo> nodeHealthServiceStatus = new AtomicReference<>(new StatusInfo(UNHEALTHY, "unhealthy-info"));
    final TransportService transportService = capturingTransport.createTransportService(settings, deterministicTaskQueue.getThreadPool(), NOOP_TRANSPORT_INTERCEPTOR, boundTransportAddress -> localNode, null, emptySet());
    transportService.start();
    transportService.acceptIncomingRequests();
    final LeaderChecker leaderChecker = new LeaderChecker(settings, transportService, e -> fail("shouldn't be checking anything"), () -> nodeHealthServiceStatus.get());
    final DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()).masterNodeId(localNode.getId()).build();
    {
        leaderChecker.setCurrentNodes(discoveryNodes);
        final CapturingTransportResponseHandler handler = new CapturingTransportResponseHandler();
        transportService.sendRequest(localNode, LEADER_CHECK_ACTION_NAME, new LeaderCheckRequest(otherNode), handler);
        deterministicTaskQueue.runAllTasks();
        assertFalse(handler.successfulResponseReceived);
        assertThat(handler.transportException.getRootCause(), instanceOf(NodeHealthCheckFailureException.class));
        NodeHealthCheckFailureException cause = (NodeHealthCheckFailureException) handler.transportException.getRootCause();
        assertThat(cause.getMessage(), equalTo("rejecting leader check from [" + otherNode + "] since node is unhealthy [unhealthy-info]"));
    }
    nodeHealthServiceStatus.getAndSet(new StatusInfo(HEALTHY, "healthy-info"));
    {
        leaderChecker.setCurrentNodes(discoveryNodes);
        final CapturingTransportResponseHandler handler = new CapturingTransportResponseHandler();
        transportService.sendRequest(localNode, LEADER_CHECK_ACTION_NAME, new LeaderCheckRequest(otherNode), handler);
        deterministicTaskQueue.runAllTasks();
        assertFalse(handler.successfulResponseReceived);
        assertThat(handler.transportException.getRootCause(), instanceOf(CoordinationStateRejectedException.class));
        CoordinationStateRejectedException cause = (CoordinationStateRejectedException) handler.transportException.getRootCause();
        assertThat(cause.getMessage(), equalTo("rejecting leader check since [" + otherNode + "] has been removed from the cluster"));
    }
    {
        leaderChecker.setCurrentNodes(DiscoveryNodes.builder(discoveryNodes).add(otherNode).build());
        final CapturingTransportResponseHandler handler = new CapturingTransportResponseHandler();
        transportService.sendRequest(localNode, LEADER_CHECK_ACTION_NAME, new LeaderCheckRequest(otherNode), handler);
        deterministicTaskQueue.runAllTasks();
        assertTrue(handler.successfulResponseReceived);
        assertThat(handler.transportException, nullValue());
    }
    {
        leaderChecker.setCurrentNodes(DiscoveryNodes.builder(discoveryNodes).add(otherNode).masterNodeId(null).build());
        final CapturingTransportResponseHandler handler = new CapturingTransportResponseHandler();
        transportService.sendRequest(localNode, LEADER_CHECK_ACTION_NAME, new LeaderCheckRequest(otherNode), handler);
        deterministicTaskQueue.runAllTasks();
        assertFalse(handler.successfulResponseReceived);
        assertThat(handler.transportException.getRootCause(), instanceOf(CoordinationStateRejectedException.class));
        CoordinationStateRejectedException cause = (CoordinationStateRejectedException) handler.transportException.getRootCause();
        assertThat(cause.getMessage(), equalTo("rejecting leader check from [" + otherNode + "] sent to a node that is no longer the master"));
    }
}
Also used : DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) CapturingTransport(org.opensearch.test.transport.CapturingTransport) AtomicReference(java.util.concurrent.atomic.AtomicReference) LeaderCheckRequest(org.opensearch.cluster.coordination.LeaderChecker.LeaderCheckRequest) StatusInfo(org.opensearch.monitor.StatusInfo) TransportService(org.opensearch.transport.TransportService) Settings(org.opensearch.common.settings.Settings) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes)

Aggregations

StatusInfo (org.opensearch.monitor.StatusInfo)45 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)33 Settings (org.opensearch.common.settings.Settings)18 TransportService (org.opensearch.transport.TransportService)14 ClusterState (org.opensearch.cluster.ClusterState)11 AtomicReference (java.util.concurrent.atomic.AtomicReference)10 TransportRequest (org.opensearch.transport.TransportRequest)10 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)9 MockTransport (org.opensearch.test.transport.MockTransport)9 ClusterFormationState (org.opensearch.cluster.coordination.ClusterFormationFailureHelper.ClusterFormationState)8 DiscoveryNodes (org.opensearch.cluster.node.DiscoveryNodes)7 ConnectTransportException (org.opensearch.transport.ConnectTransportException)6 TransportException (org.opensearch.transport.TransportException)6 TransportResponse (org.opensearch.transport.TransportResponse)6 Empty (org.opensearch.transport.TransportResponse.Empty)6 HashSet (java.util.HashSet)5 VotingConfiguration (org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration)5 TransportAddress (org.opensearch.common.transport.TransportAddress)5 CapturingTransport (org.opensearch.test.transport.CapturingTransport)5 Set (java.util.Set)4