Search in sources :

Example 11 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class LeaderCheckerTests method testFollowerFailsImmediatelyOnDisconnection.

public void testFollowerFailsImmediatelyOnDisconnection() {
    final DiscoveryNode localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
    final DiscoveryNode leader = new DiscoveryNode("leader", buildNewFakeTransportAddress(), Version.CURRENT);
    final Response[] responseHolder = new Response[] { Response.SUCCESS };
    final Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), localNode.getId()).build();
    final DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue(settings, random());
    final MockTransport mockTransport = new MockTransport() {

        @Override
        protected void onSendRequest(long requestId, String action, TransportRequest request, DiscoveryNode node) {
            if (action.equals(HANDSHAKE_ACTION_NAME)) {
                handleResponse(requestId, new TransportService.HandshakeResponse(node, ClusterName.DEFAULT, Version.CURRENT));
                return;
            }
            assertThat(action, equalTo(LEADER_CHECK_ACTION_NAME));
            assertEquals(node, leader);
            final Response response = responseHolder[0];
            deterministicTaskQueue.scheduleNow(new Runnable() {

                @Override
                public void run() {
                    switch(response) {
                        case SUCCESS:
                            handleResponse(requestId, Empty.INSTANCE);
                            break;
                        case REMOTE_ERROR:
                            handleRemoteError(requestId, new ConnectTransportException(leader, "simulated error"));
                            break;
                        case DIRECT_ERROR:
                            handleError(requestId, new ConnectTransportException(leader, "simulated error"));
                    }
                }

                @Override
                public String toString() {
                    return response + " response to request " + requestId;
                }
            });
        }
    };
    final TransportService transportService = mockTransport.createTransportService(settings, deterministicTaskQueue.getThreadPool(), NOOP_TRANSPORT_INTERCEPTOR, boundTransportAddress -> localNode, null, emptySet());
    transportService.start();
    transportService.acceptIncomingRequests();
    final AtomicBoolean leaderFailed = new AtomicBoolean();
    final LeaderChecker leaderChecker = new LeaderChecker(settings, transportService, e -> {
        assertThat(e.getMessage(), anyOf(endsWith("disconnected"), endsWith("disconnected during check")));
        assertTrue(leaderFailed.compareAndSet(false, true));
    }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"));
    leaderChecker.updateLeader(leader);
    {
        while (deterministicTaskQueue.getCurrentTimeMillis() < 10 * LEADER_CHECK_INTERVAL_SETTING.get(Settings.EMPTY).millis()) {
            deterministicTaskQueue.runAllRunnableTasks();
            deterministicTaskQueue.advanceTime();
        }
        deterministicTaskQueue.runAllRunnableTasks();
        assertFalse(leaderFailed.get());
        responseHolder[0] = Response.REMOTE_ERROR;
        deterministicTaskQueue.advanceTime();
        deterministicTaskQueue.runAllRunnableTasks();
        assertTrue(leaderFailed.get());
    }
    leaderChecker.updateLeader(null);
    deterministicTaskQueue.runAllTasks();
    leaderFailed.set(false);
    responseHolder[0] = Response.SUCCESS;
    leaderChecker.updateLeader(leader);
    {
        while (deterministicTaskQueue.getCurrentTimeMillis() < 10 * LEADER_CHECK_INTERVAL_SETTING.get(Settings.EMPTY).millis()) {
            deterministicTaskQueue.runAllRunnableTasks();
            deterministicTaskQueue.advanceTime();
        }
        deterministicTaskQueue.runAllRunnableTasks();
        assertFalse(leaderFailed.get());
        responseHolder[0] = Response.DIRECT_ERROR;
        deterministicTaskQueue.advanceTime();
        deterministicTaskQueue.runAllRunnableTasks();
        assertTrue(leaderFailed.get());
    }
    deterministicTaskQueue.runAllTasks();
    leaderFailed.set(false);
    responseHolder[0] = Response.SUCCESS;
    leaderChecker.updateLeader(leader);
    {
        // need to connect first for disconnect to have any effect
        transportService.connectToNode(leader);
        transportService.disconnectFromNode(leader);
        deterministicTaskQueue.runAllRunnableTasks();
        assertTrue(leaderFailed.get());
    }
}
Also used : DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) TransportRequest(org.opensearch.transport.TransportRequest) TransportResponse(org.opensearch.transport.TransportResponse) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TransportService(org.opensearch.transport.TransportService) ConnectTransportException(org.opensearch.transport.ConnectTransportException) StatusInfo(org.opensearch.monitor.StatusInfo) MockTransport(org.opensearch.test.transport.MockTransport) Settings(org.opensearch.common.settings.Settings)

Example 12 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class PreVoteCollectorTests method createObjects.

@Before
public void createObjects() {
    Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), "node").build();
    deterministicTaskQueue = new DeterministicTaskQueue(settings, random());
    final MockTransport mockTransport = new MockTransport() {

        @Override
        protected void onSendRequest(final long requestId, final String action, final TransportRequest request, final DiscoveryNode node) {
            super.onSendRequest(requestId, action, request, node);
            assertThat(action, is(REQUEST_PRE_VOTE_ACTION_NAME));
            assertThat(request, instanceOf(PreVoteRequest.class));
            assertThat(node, not(equalTo(localNode)));
            PreVoteRequest preVoteRequest = (PreVoteRequest) request;
            assertThat(preVoteRequest.getSourceNode(), equalTo(localNode));
            deterministicTaskQueue.scheduleNow(new Runnable() {

                @Override
                public void run() {
                    final PreVoteResponse response = responsesByNode.get(node);
                    if (response == null) {
                        handleRemoteError(requestId, new ConnectTransportException(node, "no response"));
                    } else {
                        handleResponse(requestId, response);
                    }
                }

                @Override
                public String toString() {
                    return "response to " + request + " from " + node;
                }
            });
        }

        @Override
        public void handleRemoteError(long requestId, Throwable t) {
            logger.warn("Remote error", t);
        }
    };
    lastAcceptedTerm = randomNonNegativeLong();
    currentTerm = randomLongBetween(lastAcceptedTerm, Long.MAX_VALUE);
    lastAcceptedVersion = randomNonNegativeLong();
    localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
    responsesByNode.put(localNode, new PreVoteResponse(currentTerm, lastAcceptedTerm, lastAcceptedVersion));
    healthStatus = new StatusInfo(HEALTHY, "healthy-info");
    transportService = mockTransport.createTransportService(settings, deterministicTaskQueue.getThreadPool(), TransportService.NOOP_TRANSPORT_INTERCEPTOR, boundTransportAddress -> localNode, null, emptySet());
    transportService.start();
    transportService.acceptIncomingRequests();
    preVoteCollector = new PreVoteCollector(transportService, () -> {
        assert electionOccurred == false;
        electionOccurred = true;
    }, l -> {
    }, ElectionStrategy.DEFAULT_INSTANCE, () -> healthStatus);
    preVoteCollector.update(getLocalPreVoteResponse(), null);
}
Also used : Matchers.not(org.hamcrest.Matchers.not) Version(org.opensearch.Version) HashMap(java.util.HashMap) Releasable(org.opensearch.common.lease.Releasable) AtomicReference(java.util.concurrent.atomic.AtomicReference) HashSet(java.util.HashSet) ClusterState(org.opensearch.cluster.ClusterState) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) MockTransport(org.opensearch.test.transport.MockTransport) Map(java.util.Map) Matchers.nullValue(org.hamcrest.Matchers.nullValue) Before(org.junit.Before) UNHEALTHY(org.opensearch.monitor.StatusInfo.Status.UNHEALTHY) StreamInput(org.opensearch.common.io.stream.StreamInput) VotingConfiguration(org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration) TransportRequest(org.opensearch.transport.TransportRequest) Collections.emptySet(java.util.Collections.emptySet) OpenSearchTestCase(org.opensearch.test.OpenSearchTestCase) RemoteTransportException(org.opensearch.transport.RemoteTransportException) TransportResponseHandler(org.opensearch.transport.TransportResponseHandler) Set(java.util.Set) HEALTHY(org.opensearch.monitor.StatusInfo.Status.HEALTHY) Settings(org.opensearch.common.settings.Settings) IOException(java.io.IOException) TransportService(org.opensearch.transport.TransportService) Objects(java.util.Objects) Matchers.instanceOf(org.hamcrest.Matchers.instanceOf) SAME(org.opensearch.threadpool.ThreadPool.Names.SAME) NODE_NAME_SETTING(org.opensearch.node.Node.NODE_NAME_SETTING) Matchers.equalTo(org.hamcrest.Matchers.equalTo) REQUEST_PRE_VOTE_ACTION_NAME(org.opensearch.cluster.coordination.PreVoteCollector.REQUEST_PRE_VOTE_ACTION_NAME) StatusInfo(org.opensearch.monitor.StatusInfo) ConnectTransportException(org.opensearch.transport.ConnectTransportException) Matchers.is(org.hamcrest.Matchers.is) TransportException(org.opensearch.transport.TransportException) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) TransportRequest(org.opensearch.transport.TransportRequest) ConnectTransportException(org.opensearch.transport.ConnectTransportException) StatusInfo(org.opensearch.monitor.StatusInfo) MockTransport(org.opensearch.test.transport.MockTransport) Settings(org.opensearch.common.settings.Settings) Before(org.junit.Before)

Example 13 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class PreVoteCollectorTests method testUnhealthyNodeDoesNotOfferPreVote.

public void testUnhealthyNodeDoesNotOfferPreVote() {
    final long term = randomNonNegativeLong();
    healthStatus = new StatusInfo(UNHEALTHY, "unhealthy-info");
    final DiscoveryNode otherNode = new DiscoveryNode("other-node", buildNewFakeTransportAddress(), Version.CURRENT);
    RemoteTransportException remoteTransportException = expectThrows(RemoteTransportException.class, () -> handlePreVoteRequestViaTransportService(new PreVoteRequest(otherNode, term)));
    assertThat(remoteTransportException.getCause(), instanceOf(NodeHealthCheckFailureException.class));
}
Also used : RemoteTransportException(org.opensearch.transport.RemoteTransportException) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) StatusInfo(org.opensearch.monitor.StatusInfo)

Example 14 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class PreVoteCollectorTests method testNoElectionStartIfLocalNodeIsOnlyNodeAndUnhealthy.

public void testNoElectionStartIfLocalNodeIsOnlyNodeAndUnhealthy() {
    healthStatus = new StatusInfo(UNHEALTHY, "unhealthy-info");
    preVoteCollector.update(getLocalPreVoteResponse(), null);
    startAndRunCollector(localNode);
    assertFalse(electionOccurred);
}
Also used : StatusInfo(org.opensearch.monitor.StatusInfo)

Example 15 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class CoordinatorTests method testUnhealthyNodesGetsRemoved.

public void testUnhealthyNodesGetsRemoved() {
    AtomicReference<StatusInfo> healthStatusInfo = new AtomicReference<>(new StatusInfo(HEALTHY, "healthy-info"));
    try (Cluster cluster = new Cluster(3)) {
        cluster.runRandomly();
        cluster.stabilise();
        final ClusterNode leader = cluster.getAnyLeader();
        logger.info("--> adding two new healthy nodes");
        ClusterNode newNode1 = cluster.new ClusterNode(nextNodeIndex.getAndIncrement(), true, leader.nodeSettings, () -> healthStatusInfo.get());
        ClusterNode newNode2 = cluster.new ClusterNode(nextNodeIndex.getAndIncrement(), true, leader.nodeSettings, () -> healthStatusInfo.get());
        cluster.clusterNodes.add(newNode1);
        cluster.clusterNodes.add(newNode2);
        cluster.stabilise(// The first pinging discovers the master
        defaultMillis(DISCOVERY_FIND_PEERS_INTERVAL_SETTING) + // One message delay to send a join
        DEFAULT_DELAY_VARIABILITY + // followup reconfiguration
        2 * 2 * DEFAULT_CLUSTER_STATE_UPDATE_DELAY);
        {
            assertThat(leader.coordinator.getMode(), is(Mode.LEADER));
            final VotingConfiguration lastCommittedConfiguration = leader.getLastAppliedClusterState().getLastCommittedConfiguration();
            assertThat(lastCommittedConfiguration + " should be all nodes", lastCommittedConfiguration.getNodeIds(), equalTo(cluster.clusterNodes.stream().map(ClusterNode::getId).collect(Collectors.toSet())));
        }
        logger.info("setting auto-shrink reconfiguration to true");
        leader.submitSetAutoShrinkVotingConfiguration(true);
        cluster.stabilise(DEFAULT_CLUSTER_STATE_UPDATE_DELAY);
        assertTrue(CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION.get(leader.getLastAppliedClusterState().metadata().settings()));
        logger.info("--> changing health of newly added nodes to unhealthy");
        healthStatusInfo.getAndSet(new StatusInfo(UNHEALTHY, "unhealthy-info"));
        cluster.stabilise(Math.max(// Each follower may have just sent a leader check, which receives no response
        defaultMillis(LEADER_CHECK_TIMEOUT_SETTING) + // then wait for the follower to check the leader
        defaultMillis(LEADER_CHECK_INTERVAL_SETTING) + // then wait for the exception response
        DEFAULT_DELAY_VARIABILITY, // ALSO the leader may have just sent a follower check, which receives no response
        defaultMillis(FOLLOWER_CHECK_TIMEOUT_SETTING) + // wait for the leader to check its followers
        defaultMillis(FOLLOWER_CHECK_INTERVAL_SETTING) + // then wait for the exception response
        DEFAULT_DELAY_VARIABILITY) + // wait for the removal to be committed
        DEFAULT_CLUSTER_STATE_UPDATE_DELAY + // then wait for the followup reconfiguration
        DEFAULT_CLUSTER_STATE_UPDATE_DELAY);
        {
            final ClusterNode newLeader = cluster.getAnyLeader();
            final VotingConfiguration lastCommittedConfiguration = newLeader.getLastAppliedClusterState().getLastCommittedConfiguration();
            assertThat(lastCommittedConfiguration + " should be 3 nodes", lastCommittedConfiguration.getNodeIds().size(), equalTo(3));
            assertFalse(lastCommittedConfiguration.getNodeIds().contains(newNode1.getId()));
            assertFalse(lastCommittedConfiguration.getNodeIds().contains(newNode2.getId()));
        }
    }
}
Also used : ClusterNode(org.opensearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode) StatusInfo(org.opensearch.monitor.StatusInfo) AtomicReference(java.util.concurrent.atomic.AtomicReference) VotingConfiguration(org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration)

Aggregations

StatusInfo (org.opensearch.monitor.StatusInfo)45 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)33 Settings (org.opensearch.common.settings.Settings)18 TransportService (org.opensearch.transport.TransportService)14 ClusterState (org.opensearch.cluster.ClusterState)11 AtomicReference (java.util.concurrent.atomic.AtomicReference)10 TransportRequest (org.opensearch.transport.TransportRequest)10 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)9 MockTransport (org.opensearch.test.transport.MockTransport)9 ClusterFormationState (org.opensearch.cluster.coordination.ClusterFormationFailureHelper.ClusterFormationState)8 DiscoveryNodes (org.opensearch.cluster.node.DiscoveryNodes)7 ConnectTransportException (org.opensearch.transport.ConnectTransportException)6 TransportException (org.opensearch.transport.TransportException)6 TransportResponse (org.opensearch.transport.TransportResponse)6 Empty (org.opensearch.transport.TransportResponse.Empty)6 HashSet (java.util.HashSet)5 VotingConfiguration (org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration)5 TransportAddress (org.opensearch.common.transport.TransportAddress)5 CapturingTransport (org.opensearch.test.transport.CapturingTransport)5 Set (java.util.Set)4