Search in sources :

Example 26 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class Coordinator method startElectionScheduler.

private void startElectionScheduler() {
    assert electionScheduler == null : electionScheduler;
    if (getLocalNode().isMasterNode() == false) {
        return;
    }
    final TimeValue gracePeriod = TimeValue.ZERO;
    electionScheduler = electionSchedulerFactory.startElectionScheduler(gracePeriod, new Runnable() {

        @Override
        public void run() {
            synchronized (mutex) {
                if (mode == Mode.CANDIDATE) {
                    final ClusterState lastAcceptedState = coordinationState.get().getLastAcceptedState();
                    if (localNodeMayWinElection(lastAcceptedState) == false) {
                        logger.trace("skip prevoting as local node may not win election: {}", lastAcceptedState.coordinationMetadata());
                        return;
                    }
                    final StatusInfo statusInfo = nodeHealthService.getHealth();
                    if (statusInfo.getStatus() == UNHEALTHY) {
                        logger.debug("skip prevoting as local node is unhealthy: [{}]", statusInfo.getInfo());
                        return;
                    }
                    if (prevotingRound != null) {
                        prevotingRound.close();
                    }
                    final List<DiscoveryNode> discoveredNodes = getDiscoveredNodes().stream().filter(n -> isZen1Node(n) == false).collect(Collectors.toList());
                    prevotingRound = preVoteCollector.start(lastAcceptedState, discoveredNodes);
                }
            }
        }

        @Override
        public String toString() {
            return "scheduling of new prevoting round";
        }
    });
}
Also used : VotingConfigExclusion(org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion) Metadata(org.opensearch.cluster.metadata.Metadata) STATE_NOT_RECOVERED_BLOCK(org.opensearch.gateway.GatewayService.STATE_NOT_RECOVERED_BLOCK) AllocationService(org.opensearch.cluster.routing.allocation.AllocationService) Level(org.apache.logging.log4j.Level) Random(java.util.Random) Strings(org.opensearch.common.Strings) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) ActionListener(org.opensearch.action.ActionListener) UNHEALTHY(org.opensearch.monitor.StatusInfo.Status.UNHEALTHY) TimeValue(org.opensearch.common.unit.TimeValue) ClusterApplier(org.opensearch.cluster.service.ClusterApplier) MasterService(org.opensearch.cluster.service.MasterService) Collection(java.util.Collection) NodeHealthService(org.opensearch.monitor.NodeHealthService) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) HandshakingTransportAddressConnector(org.opensearch.discovery.HandshakingTransportAddressConnector) TransportService(org.opensearch.transport.TransportService) Collectors(java.util.stream.Collectors) Nullable(org.opensearch.common.Nullable) TransportAddress(org.opensearch.common.transport.TransportAddress) AbstractLifecycleComponent(org.opensearch.common.component.AbstractLifecycleComponent) List(java.util.List) Logger(org.apache.logging.log4j.Logger) Stream(java.util.stream.Stream) ClusterStateUpdateTask(org.opensearch.cluster.ClusterStateUpdateTask) StatusInfo(org.opensearch.monitor.StatusInfo) Optional(java.util.Optional) ListenableFuture(org.opensearch.common.util.concurrent.ListenableFuture) Scheduler(org.opensearch.threadpool.Scheduler) Names(org.opensearch.threadpool.ThreadPool.Names) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Priority(org.opensearch.common.Priority) Releasable(org.opensearch.common.lease.Releasable) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) OpenSearchExecutors(org.opensearch.common.util.concurrent.OpenSearchExecutors) Supplier(java.util.function.Supplier) SeedHostsResolver(org.opensearch.discovery.SeedHostsResolver) NamedWriteableRegistry(org.opensearch.common.io.stream.NamedWriteableRegistry) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ClusterState(org.opensearch.cluster.ClusterState) PeerFinder(org.opensearch.discovery.PeerFinder) LegacyESVersion(org.opensearch.LegacyESVersion) ClusterApplyListener(org.opensearch.cluster.service.ClusterApplier.ClusterApplyListener) DiscoveryStats(org.opensearch.discovery.DiscoveryStats) Booleans(org.opensearch.common.Booleans) RerouteService(org.opensearch.cluster.routing.RerouteService) BiConsumer(java.util.function.BiConsumer) ClusterStateTaskConfig(org.opensearch.cluster.ClusterStateTaskConfig) StreamSupport(java.util.stream.StreamSupport) VoteCollection(org.opensearch.cluster.coordination.CoordinationState.VoteCollection) ClusterSettings(org.opensearch.common.settings.ClusterSettings) ClusterBlocks(org.opensearch.cluster.block.ClusterBlocks) SeedHostsProvider(org.opensearch.discovery.SeedHostsProvider) Empty(org.opensearch.transport.TransportResponse.Empty) Setting(org.opensearch.common.settings.Setting) SetOnce(org.apache.lucene.util.SetOnce) ClusterFormationState(org.opensearch.cluster.coordination.ClusterFormationFailureHelper.ClusterFormationState) VotingConfiguration(org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration) InitialJoinAccumulator(org.opensearch.cluster.coordination.JoinHelper.InitialJoinAccumulator) Discovery(org.opensearch.discovery.Discovery) IOException(java.io.IOException) NO_MASTER_BLOCK_ID(org.opensearch.cluster.coordination.NoMasterBlockService.NO_MASTER_BLOCK_ID) LocalClusterUpdateTask(org.opensearch.cluster.LocalClusterUpdateTask) XContentHelper(org.opensearch.common.xcontent.XContentHelper) FollowerCheckRequest(org.opensearch.cluster.coordination.FollowersChecker.FollowerCheckRequest) ClusterStateUpdaters.hideStateIfNotRecovered(org.opensearch.gateway.ClusterStateUpdaters.hideStateIfNotRecovered) JsonXContent(org.opensearch.common.xcontent.json.JsonXContent) ClusterName(org.opensearch.cluster.ClusterName) LogManager(org.apache.logging.log4j.LogManager) Collections(java.util.Collections) DiscoveryModule(org.opensearch.discovery.DiscoveryModule) ClusterChangedEvent(org.opensearch.cluster.ClusterChangedEvent) ClusterState(org.opensearch.cluster.ClusterState) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) StatusInfo(org.opensearch.monitor.StatusInfo) TimeValue(org.opensearch.common.unit.TimeValue)

Example 27 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class LeaderChecker method handleLeaderCheck.

private void handleLeaderCheck(LeaderCheckRequest request) {
    final DiscoveryNodes discoveryNodes = this.discoveryNodes;
    assert discoveryNodes != null;
    final StatusInfo statusInfo = nodeHealthService.getHealth();
    if (statusInfo.getStatus() == UNHEALTHY) {
        final String message = "rejecting leader check from [" + request.getSender() + "] " + "since node is unhealthy [" + statusInfo.getInfo() + "]";
        logger.debug(message);
        throw new NodeHealthCheckFailureException(message);
    } else if (discoveryNodes.isLocalNodeElectedMaster() == false) {
        logger.debug("rejecting leader check on non-master {}", request);
        throw new CoordinationStateRejectedException("rejecting leader check from [" + request.getSender() + "] sent to a node that is no longer the master");
    } else if (discoveryNodes.nodeExists(request.getSender()) == false) {
        logger.debug("rejecting leader check from removed node: {}", request);
        throw new CoordinationStateRejectedException("rejecting leader check since [" + request.getSender() + "] has been removed from the cluster");
    } else {
        logger.trace("handling {}", request);
    }
}
Also used : StatusInfo(org.opensearch.monitor.StatusInfo) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes)

Example 28 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class FollowersChecker method handleFollowerCheck.

private void handleFollowerCheck(FollowerCheckRequest request, TransportChannel transportChannel) throws IOException {
    final StatusInfo statusInfo = nodeHealthService.getHealth();
    if (statusInfo.getStatus() == UNHEALTHY) {
        final String message = "handleFollowerCheck: node is unhealthy [" + statusInfo.getInfo() + "], rejecting " + statusInfo.getInfo();
        logger.debug(message);
        throw new NodeHealthCheckFailureException(message);
    }
    final FastResponseState responder = this.fastResponseState;
    if (responder.mode == Mode.FOLLOWER && responder.term == request.term) {
        logger.trace("responding to {} on fast path", request);
        transportChannel.sendResponse(Empty.INSTANCE);
        return;
    }
    if (request.term < responder.term) {
        throw new CoordinationStateRejectedException("rejecting " + request + " since local state is " + this);
    }
    transportService.getThreadPool().generic().execute(new AbstractRunnable() {

        @Override
        protected void doRun() throws IOException {
            logger.trace("responding to {} on slow path", request);
            try {
                handleRequestAndUpdateState.accept(request);
            } catch (Exception e) {
                transportChannel.sendResponse(e);
                return;
            }
            transportChannel.sendResponse(Empty.INSTANCE);
        }

        @Override
        public void onFailure(Exception e) {
            logger.debug(new ParameterizedMessage("exception while responding to {}", request), e);
        }

        @Override
        public String toString() {
            return "slow path response to " + request;
        }
    });
}
Also used : AbstractRunnable(org.opensearch.common.util.concurrent.AbstractRunnable) StatusInfo(org.opensearch.monitor.StatusInfo) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) IOException(java.io.IOException) IOException(java.io.IOException) ConnectTransportException(org.opensearch.transport.ConnectTransportException) TransportException(org.opensearch.transport.TransportException)

Example 29 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class PreVoteCollector method handlePreVoteRequest.

private PreVoteResponse handlePreVoteRequest(final PreVoteRequest request) {
    updateMaxTermSeen.accept(request.getCurrentTerm());
    Tuple<DiscoveryNode, PreVoteResponse> state = this.state;
    assert state != null : "received pre-vote request before fully initialised";
    final DiscoveryNode leader = state.v1();
    final PreVoteResponse response = state.v2();
    final StatusInfo statusInfo = nodeHealthService.getHealth();
    if (statusInfo.getStatus() == UNHEALTHY) {
        String message = "rejecting " + request + " on unhealthy node: [" + statusInfo.getInfo() + "]";
        logger.debug(message);
        throw new NodeHealthCheckFailureException(message);
    }
    if (leader == null) {
        return response;
    }
    if (leader.equals(request.getSourceNode())) {
        // to also detect its failure.
        return response;
    }
    throw new CoordinationStateRejectedException("rejecting " + request + " as there is already a leader");
}
Also used : DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) StatusInfo(org.opensearch.monitor.StatusInfo)

Example 30 with StatusInfo

use of org.opensearch.monitor.StatusInfo in project OpenSearch by opensearch-project.

the class ClusterFormationFailureHelperTests method testDescriptionBeforeBootstrapping.

public void testDescriptionBeforeBootstrapping() {
    final DiscoveryNode localNode = new DiscoveryNode("local", buildNewFakeTransportAddress(), Version.CURRENT);
    final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT).version(7L).metadata(Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(4L).build())).nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId())).build();
    assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), emptyList(), 1L, electionStrategy, new StatusInfo(HEALTHY, "healthy-info")).getDescription(), is("master not discovered yet, this node has not previously joined a bootstrapped cluster, and " + "[cluster.initial_master_nodes] is empty on this node: have discovered []; " + "discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state; node term 1, last-accepted version 7 in term 4"));
    final TransportAddress otherAddress = buildNewFakeTransportAddress();
    assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, singletonList(otherAddress), emptyList(), 2L, electionStrategy, new StatusInfo(HEALTHY, "healthy-info")).getDescription(), is("master not discovered yet, this node has not previously joined a bootstrapped cluster, and " + "[cluster.initial_master_nodes] is empty on this node: have discovered []; " + "discovery will continue using [" + otherAddress + "] from hosts providers and [" + localNode + "] from last-known cluster state; node term 2, last-accepted version 7 in term 4"));
    final DiscoveryNode otherNode = new DiscoveryNode("other", buildNewFakeTransportAddress(), Version.CURRENT);
    assertThat(new ClusterFormationState(Settings.EMPTY, clusterState, emptyList(), singletonList(otherNode), 3L, electionStrategy, new StatusInfo(HEALTHY, "healthy-info")).getDescription(), is("master not discovered yet, this node has not previously joined a bootstrapped cluster, and " + "[cluster.initial_master_nodes] is empty on this node: have discovered [" + otherNode + "]; " + "discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state; node term 3, last-accepted version 7 in term 4"));
    assertThat(new ClusterFormationState(Settings.builder().putList(INITIAL_MASTER_NODES_SETTING.getKey(), "other").build(), clusterState, emptyList(), emptyList(), 4L, electionStrategy, new StatusInfo(HEALTHY, "healthy-info")).getDescription(), is("master not discovered yet, this node has not previously joined a bootstrapped cluster, and " + "this node must discover master-eligible nodes [other] to bootstrap a cluster: have discovered []; " + "discovery will continue using [] from hosts providers and [" + localNode + "] from last-known cluster state; node term 4, last-accepted version 7 in term 4"));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) StatusInfo(org.opensearch.monitor.StatusInfo) TransportAddress(org.opensearch.common.transport.TransportAddress) ClusterFormationState(org.opensearch.cluster.coordination.ClusterFormationFailureHelper.ClusterFormationState)

Aggregations

StatusInfo (org.opensearch.monitor.StatusInfo)45 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)33 Settings (org.opensearch.common.settings.Settings)18 TransportService (org.opensearch.transport.TransportService)14 ClusterState (org.opensearch.cluster.ClusterState)11 AtomicReference (java.util.concurrent.atomic.AtomicReference)10 TransportRequest (org.opensearch.transport.TransportRequest)10 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)9 MockTransport (org.opensearch.test.transport.MockTransport)9 ClusterFormationState (org.opensearch.cluster.coordination.ClusterFormationFailureHelper.ClusterFormationState)8 DiscoveryNodes (org.opensearch.cluster.node.DiscoveryNodes)7 ConnectTransportException (org.opensearch.transport.ConnectTransportException)6 TransportException (org.opensearch.transport.TransportException)6 TransportResponse (org.opensearch.transport.TransportResponse)6 Empty (org.opensearch.transport.TransportResponse.Empty)6 HashSet (java.util.HashSet)5 VotingConfiguration (org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration)5 TransportAddress (org.opensearch.common.transport.TransportAddress)5 CapturingTransport (org.opensearch.test.transport.CapturingTransport)5 Set (java.util.Set)4