Search in sources :

Example 1 with VotingConfiguration

use of org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration in project OpenSearch by opensearch-project.

the class Coordinator method doStart.

@Override
protected void doStart() {
    synchronized (mutex) {
        CoordinationState.PersistedState persistedState = persistedStateSupplier.get();
        coordinationState.set(new CoordinationState(getLocalNode(), persistedState, electionStrategy));
        peerFinder.setCurrentTerm(getCurrentTerm());
        configuredHostsResolver.start();
        final ClusterState lastAcceptedState = coordinationState.get().getLastAcceptedState();
        if (lastAcceptedState.metadata().clusterUUIDCommitted()) {
            logger.info("cluster UUID [{}]", lastAcceptedState.metadata().clusterUUID());
        }
        final VotingConfiguration votingConfiguration = lastAcceptedState.getLastCommittedConfiguration();
        if (singleNodeDiscovery && votingConfiguration.isEmpty() == false && votingConfiguration.hasQuorum(Collections.singleton(getLocalNode().getId())) == false) {
            throw new IllegalStateException("cannot start with [" + DiscoveryModule.DISCOVERY_TYPE_SETTING.getKey() + "] set to [" + DiscoveryModule.SINGLE_NODE_DISCOVERY_TYPE + "] when local node " + getLocalNode() + " does not have quorum in voting configuration " + votingConfiguration);
        }
        ClusterState initialState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)).blocks(ClusterBlocks.builder().addGlobalBlock(STATE_NOT_RECOVERED_BLOCK).addGlobalBlock(noMasterBlockService.getNoMasterBlock())).nodes(DiscoveryNodes.builder().add(getLocalNode()).localNodeId(getLocalNode().getId())).build();
        applierState = initialState;
        clusterApplier.setInitialState(initialState);
    }
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) VotingConfiguration(org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration)

Example 2 with VotingConfiguration

use of org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration in project OpenSearch by opensearch-project.

the class Reconfigurator method reconfigure.

/**
 * Compute an optimal configuration for the cluster.
 *
 * @param liveNodes      The live nodes in the cluster. The optimal configuration prefers live nodes over non-live nodes as far as
 *                       possible.
 * @param retiredNodeIds Nodes that are leaving the cluster and which should not appear in the configuration if possible. Nodes that are
 *                       retired and not in the current configuration will never appear in the resulting configuration; this is useful
 *                       for shifting the vote in a 2-node cluster so one of the nodes can be restarted without harming availability.
 * @param currentMaster  The current master. Unless retired, we prefer to keep the current master in the config.
 * @param currentConfig  The current configuration. As far as possible, we prefer to keep the current config as-is.
 * @return An optimal configuration, or leave the current configuration unchanged if the optimal configuration has no live quorum.
 */
public VotingConfiguration reconfigure(Set<DiscoveryNode> liveNodes, Set<String> retiredNodeIds, DiscoveryNode currentMaster, VotingConfiguration currentConfig) {
    assert liveNodes.contains(currentMaster) : "liveNodes = " + liveNodes + " master = " + currentMaster;
    logger.trace("{} reconfiguring {} based on liveNodes={}, retiredNodeIds={}, currentMaster={}", this, currentConfig, liveNodes, retiredNodeIds, currentMaster);
    final Set<String> liveNodeIds = liveNodes.stream().filter(DiscoveryNode::isMasterNode).map(DiscoveryNode::getId).collect(Collectors.toSet());
    final Set<String> currentConfigNodeIds = currentConfig.getNodeIds();
    final Set<VotingConfigNode> orderedCandidateNodes = new TreeSet<>();
    liveNodes.stream().filter(DiscoveryNode::isMasterNode).filter(n -> retiredNodeIds.contains(n.getId()) == false).forEach(n -> orderedCandidateNodes.add(new VotingConfigNode(n.getId(), true, n.getId().equals(currentMaster.getId()), currentConfigNodeIds.contains(n.getId()))));
    currentConfigNodeIds.stream().filter(nid -> liveNodeIds.contains(nid) == false).filter(nid -> retiredNodeIds.contains(nid) == false).forEach(nid -> orderedCandidateNodes.add(new VotingConfigNode(nid, false, false, true)));
    /*
         * Now we work out how many nodes should be in the configuration:
         */
    final int nonRetiredConfigSize = Math.toIntExact(orderedCandidateNodes.stream().filter(n -> n.inCurrentConfig).count());
    final int minimumConfigEnforcedSize = autoShrinkVotingConfiguration ? (nonRetiredConfigSize < 3 ? 1 : 3) : nonRetiredConfigSize;
    final int nonRetiredLiveNodeCount = Math.toIntExact(orderedCandidateNodes.stream().filter(n -> n.live).count());
    final int targetSize = Math.max(roundDownToOdd(nonRetiredLiveNodeCount), minimumConfigEnforcedSize);
    final VotingConfiguration newConfig = new VotingConfiguration(orderedCandidateNodes.stream().limit(targetSize).map(n -> n.id).collect(Collectors.toSet()));
    // new configuration should have a quorum
    if (newConfig.hasQuorum(liveNodeIds)) {
        return newConfig;
    } else {
        // If there are not enough live nodes to form a quorum in the newly-proposed configuration, it's better to do nothing.
        return currentConfig;
    }
}
Also used : DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) Logger(org.apache.logging.log4j.Logger) Setting(org.opensearch.common.settings.Setting) VotingConfiguration(org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration) Property(org.opensearch.common.settings.Setting.Property) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) ClusterSettings(org.opensearch.common.settings.ClusterSettings) LogManager(org.apache.logging.log4j.LogManager) Collectors(java.util.stream.Collectors) TreeSet(java.util.TreeSet) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) TreeSet(java.util.TreeSet) VotingConfiguration(org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration)

Example 3 with VotingConfiguration

use of org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration in project OpenSearch by opensearch-project.

the class NodeJoinTests method testConcurrentJoining.

public void testConcurrentJoining() {
    List<DiscoveryNode> masterNodes = IntStream.rangeClosed(1, randomIntBetween(2, 5)).mapToObj(nodeId -> newNode(nodeId, true)).collect(Collectors.toList());
    List<DiscoveryNode> otherNodes = IntStream.rangeClosed(masterNodes.size() + 1, masterNodes.size() + 1 + randomIntBetween(0, 5)).mapToObj(nodeId -> newNode(nodeId, false)).collect(Collectors.toList());
    List<DiscoveryNode> allNodes = Stream.concat(masterNodes.stream(), otherNodes.stream()).collect(Collectors.toList());
    DiscoveryNode localNode = masterNodes.get(0);
    VotingConfiguration votingConfiguration = new VotingConfiguration(randomValueOtherThan(singletonList(localNode), () -> randomSubsetOf(randomIntBetween(1, masterNodes.size()), masterNodes)).stream().map(DiscoveryNode::getId).collect(Collectors.toSet()));
    logger.info("Voting configuration: {}", votingConfiguration);
    long initialTerm = randomLongBetween(1, 10);
    long initialVersion = randomLongBetween(1, 10);
    setupRealMasterServiceAndCoordinator(initialTerm, initialState(localNode, initialTerm, initialVersion, votingConfiguration));
    long newTerm = initialTerm + randomLongBetween(1, 10);
    // we need at least a quorum of voting nodes with a correct term and worse state
    List<DiscoveryNode> successfulNodes;
    do {
        successfulNodes = randomSubsetOf(allNodes);
    } while (votingConfiguration.hasQuorum(successfulNodes.stream().map(DiscoveryNode::getId).collect(Collectors.toList())) == false);
    logger.info("Successful voting nodes: {}", successfulNodes);
    List<JoinRequest> correctJoinRequests = successfulNodes.stream().map(node -> new JoinRequest(node, newTerm, Optional.of(new Join(node, localNode, newTerm, initialTerm, initialVersion)))).collect(Collectors.toList());
    List<DiscoveryNode> possiblyUnsuccessfulNodes = new ArrayList<>(allNodes);
    possiblyUnsuccessfulNodes.removeAll(successfulNodes);
    logger.info("Possibly unsuccessful voting nodes: {}", possiblyUnsuccessfulNodes);
    List<JoinRequest> possiblyFailingJoinRequests = possiblyUnsuccessfulNodes.stream().map(node -> {
        if (randomBoolean()) {
            // a correct request
            return new JoinRequest(node, newTerm, Optional.of(new Join(node, localNode, newTerm, initialTerm, initialVersion)));
        } else if (randomBoolean()) {
            // term too low
            return new JoinRequest(node, newTerm, Optional.of(new Join(node, localNode, randomLongBetween(0, initialTerm), initialTerm, initialVersion)));
        } else {
            // better state
            return new JoinRequest(node, newTerm, Optional.of(new Join(node, localNode, newTerm, initialTerm, initialVersion + randomLongBetween(1, 10))));
        }
    }).collect(Collectors.toList());
    // duplicate some requests, which will be unsuccessful
    possiblyFailingJoinRequests.addAll(randomSubsetOf(possiblyFailingJoinRequests));
    CyclicBarrier barrier = new CyclicBarrier(correctJoinRequests.size() + possiblyFailingJoinRequests.size() + 1);
    final Runnable awaitBarrier = () -> {
        try {
            barrier.await();
        } catch (InterruptedException | BrokenBarrierException e) {
            throw new RuntimeException(e);
        }
    };
    final AtomicBoolean stopAsserting = new AtomicBoolean();
    final Thread assertionThread = new Thread(() -> {
        awaitBarrier.run();
        while (stopAsserting.get() == false) {
            coordinator.invariant();
        }
    }, "assert invariants");
    final List<Thread> joinThreads = Stream.concat(correctJoinRequests.stream().map(joinRequest -> new Thread(() -> {
        awaitBarrier.run();
        joinNode(joinRequest);
    }, "process " + joinRequest)), possiblyFailingJoinRequests.stream().map(joinRequest -> new Thread(() -> {
        awaitBarrier.run();
        try {
            joinNode(joinRequest);
        } catch (CoordinationStateRejectedException e) {
        // ignore - these requests are expected to fail
        }
    }, "process " + joinRequest))).collect(Collectors.toList());
    assertionThread.start();
    joinThreads.forEach(Thread::start);
    joinThreads.forEach(t -> {
        try {
            t.join();
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    });
    stopAsserting.set(true);
    try {
        assertionThread.join();
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }
    assertTrue(MasterServiceTests.discoveryState(masterService).nodes().isLocalNodeElectedMaster());
    for (DiscoveryNode successfulNode : successfulNodes) {
        assertTrue(successfulNode + " joined cluster", clusterStateHasNode(successfulNode));
        assertFalse(successfulNode + " voted for master", coordinator.missingJoinVoteFrom(successfulNode));
    }
}
Also used : Metadata(org.opensearch.cluster.metadata.Metadata) TestThreadPool(org.opensearch.threadpool.TestThreadPool) HANDSHAKE_ACTION_NAME(org.opensearch.transport.TransportService.HANDSHAKE_ACTION_NAME) Version(org.opensearch.Version) Random(java.util.Random) FutureUtils(org.opensearch.common.util.concurrent.FutureUtils) TestTransportChannel(org.opensearch.transport.TestTransportChannel) Collections.singletonList(java.util.Collections.singletonList) Transport(org.opensearch.transport.Transport) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) Collections.singleton(java.util.Collections.singleton) OpenSearchAllocationTestCase(org.opensearch.cluster.OpenSearchAllocationTestCase) After(org.junit.After) ActionListener(org.opensearch.action.ActionListener) AfterClass(org.junit.AfterClass) CyclicBarrier(java.util.concurrent.CyclicBarrier) MasterService(org.opensearch.cluster.service.MasterService) Collections.emptyList(java.util.Collections.emptyList) OpenSearchTestCase(org.opensearch.test.OpenSearchTestCase) NodeHealthService(org.opensearch.monitor.NodeHealthService) Set(java.util.Set) HEALTHY(org.opensearch.monitor.StatusInfo.Status.HEALTHY) Settings(org.opensearch.common.settings.Settings) TransportResponse(org.opensearch.transport.TransportResponse) DiscoveryNodeRole(org.opensearch.cluster.node.DiscoveryNodeRole) TransportService(org.opensearch.transport.TransportService) Collectors(java.util.stream.Collectors) FakeThreadPoolMasterService(org.opensearch.cluster.service.FakeThreadPoolMasterService) List(java.util.List) Stream(java.util.stream.Stream) Randomness(org.opensearch.common.Randomness) Matchers.equalTo(org.hamcrest.Matchers.equalTo) StatusInfo(org.opensearch.monitor.StatusInfo) Optional(java.util.Optional) ClusterServiceUtils(org.opensearch.test.ClusterServiceUtils) Matchers.containsString(org.hamcrest.Matchers.containsString) IntStream(java.util.stream.IntStream) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes) BeforeClass(org.junit.BeforeClass) ThreadPool(org.opensearch.threadpool.ThreadPool) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Node(org.opensearch.node.Node) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) MasterServiceTests(org.opensearch.cluster.service.MasterServiceTests) ClusterState(org.opensearch.cluster.ClusterState) RequestHandlerRegistry(org.opensearch.transport.RequestHandlerRegistry) ClusterSettings(org.opensearch.common.settings.ClusterSettings) ClusterBlocks(org.opensearch.cluster.block.ClusterBlocks) Collections.emptyMap(java.util.Collections.emptyMap) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) VotingConfiguration(org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration) TransportRequest(org.opensearch.transport.TransportRequest) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) BaseFuture(org.opensearch.common.util.concurrent.BaseFuture) TimeUnit(java.util.concurrent.TimeUnit) ClusterName(org.opensearch.cluster.ClusterName) CapturingTransport(org.opensearch.test.transport.CapturingTransport) Collections(java.util.Collections) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) ArrayList(java.util.ArrayList) VotingConfiguration(org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration) CyclicBarrier(java.util.concurrent.CyclicBarrier) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean)

Example 4 with VotingConfiguration

use of org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration in project OpenSearch by opensearch-project.

the class CoordinatorTests method testCanShrinkFromFiveNodesToThree.

public void testCanShrinkFromFiveNodesToThree() {
    try (Cluster cluster = new Cluster(5)) {
        cluster.runRandomly();
        cluster.stabilise();
        {
            final ClusterNode leader = cluster.getAnyLeader();
            logger.info("setting auto-shrink reconfiguration to false");
            leader.submitSetAutoShrinkVotingConfiguration(false);
            cluster.stabilise(DEFAULT_CLUSTER_STATE_UPDATE_DELAY);
            assertFalse(CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION.get(leader.getLastAppliedClusterState().metadata().settings()));
        }
        final ClusterNode disconnect1 = cluster.getAnyNode();
        final ClusterNode disconnect2 = cluster.getAnyNodeExcept(disconnect1);
        logger.info("--> disconnecting {} and {}", disconnect1, disconnect2);
        disconnect1.disconnect();
        disconnect2.disconnect();
        cluster.stabilise();
        final ClusterNode leader = cluster.getAnyLeader();
        {
            final VotingConfiguration lastCommittedConfiguration = leader.getLastAppliedClusterState().getLastCommittedConfiguration();
            assertThat(lastCommittedConfiguration + " should be all nodes", lastCommittedConfiguration.getNodeIds(), equalTo(cluster.clusterNodes.stream().map(ClusterNode::getId).collect(Collectors.toSet())));
        }
        logger.info("setting auto-shrink reconfiguration to true");
        leader.submitSetAutoShrinkVotingConfiguration(true);
        // allow for a reconfiguration
        cluster.stabilise(DEFAULT_CLUSTER_STATE_UPDATE_DELAY * 2);
        assertTrue(CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION.get(leader.getLastAppliedClusterState().metadata().settings()));
        {
            final VotingConfiguration lastCommittedConfiguration = leader.getLastAppliedClusterState().getLastCommittedConfiguration();
            assertThat(lastCommittedConfiguration + " should be 3 nodes", lastCommittedConfiguration.getNodeIds().size(), equalTo(3));
            assertFalse(lastCommittedConfiguration.getNodeIds().contains(disconnect1.getId()));
            assertFalse(lastCommittedConfiguration.getNodeIds().contains(disconnect2.getId()));
        }
    }
}
Also used : ClusterNode(org.opensearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode) VotingConfiguration(org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration)

Example 5 with VotingConfiguration

use of org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration in project OpenSearch by opensearch-project.

the class CoordinatorTests method testUnhealthyNodesGetsRemoved.

public void testUnhealthyNodesGetsRemoved() {
    AtomicReference<StatusInfo> healthStatusInfo = new AtomicReference<>(new StatusInfo(HEALTHY, "healthy-info"));
    try (Cluster cluster = new Cluster(3)) {
        cluster.runRandomly();
        cluster.stabilise();
        final ClusterNode leader = cluster.getAnyLeader();
        logger.info("--> adding two new healthy nodes");
        ClusterNode newNode1 = cluster.new ClusterNode(nextNodeIndex.getAndIncrement(), true, leader.nodeSettings, () -> healthStatusInfo.get());
        ClusterNode newNode2 = cluster.new ClusterNode(nextNodeIndex.getAndIncrement(), true, leader.nodeSettings, () -> healthStatusInfo.get());
        cluster.clusterNodes.add(newNode1);
        cluster.clusterNodes.add(newNode2);
        cluster.stabilise(// The first pinging discovers the master
        defaultMillis(DISCOVERY_FIND_PEERS_INTERVAL_SETTING) + // One message delay to send a join
        DEFAULT_DELAY_VARIABILITY + // followup reconfiguration
        2 * 2 * DEFAULT_CLUSTER_STATE_UPDATE_DELAY);
        {
            assertThat(leader.coordinator.getMode(), is(Mode.LEADER));
            final VotingConfiguration lastCommittedConfiguration = leader.getLastAppliedClusterState().getLastCommittedConfiguration();
            assertThat(lastCommittedConfiguration + " should be all nodes", lastCommittedConfiguration.getNodeIds(), equalTo(cluster.clusterNodes.stream().map(ClusterNode::getId).collect(Collectors.toSet())));
        }
        logger.info("setting auto-shrink reconfiguration to true");
        leader.submitSetAutoShrinkVotingConfiguration(true);
        cluster.stabilise(DEFAULT_CLUSTER_STATE_UPDATE_DELAY);
        assertTrue(CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION.get(leader.getLastAppliedClusterState().metadata().settings()));
        logger.info("--> changing health of newly added nodes to unhealthy");
        healthStatusInfo.getAndSet(new StatusInfo(UNHEALTHY, "unhealthy-info"));
        cluster.stabilise(Math.max(// Each follower may have just sent a leader check, which receives no response
        defaultMillis(LEADER_CHECK_TIMEOUT_SETTING) + // then wait for the follower to check the leader
        defaultMillis(LEADER_CHECK_INTERVAL_SETTING) + // then wait for the exception response
        DEFAULT_DELAY_VARIABILITY, // ALSO the leader may have just sent a follower check, which receives no response
        defaultMillis(FOLLOWER_CHECK_TIMEOUT_SETTING) + // wait for the leader to check its followers
        defaultMillis(FOLLOWER_CHECK_INTERVAL_SETTING) + // then wait for the exception response
        DEFAULT_DELAY_VARIABILITY) + // wait for the removal to be committed
        DEFAULT_CLUSTER_STATE_UPDATE_DELAY + // then wait for the followup reconfiguration
        DEFAULT_CLUSTER_STATE_UPDATE_DELAY);
        {
            final ClusterNode newLeader = cluster.getAnyLeader();
            final VotingConfiguration lastCommittedConfiguration = newLeader.getLastAppliedClusterState().getLastCommittedConfiguration();
            assertThat(lastCommittedConfiguration + " should be 3 nodes", lastCommittedConfiguration.getNodeIds().size(), equalTo(3));
            assertFalse(lastCommittedConfiguration.getNodeIds().contains(newNode1.getId()));
            assertFalse(lastCommittedConfiguration.getNodeIds().contains(newNode2.getId()));
        }
    }
}
Also used : ClusterNode(org.opensearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode) StatusInfo(org.opensearch.monitor.StatusInfo) AtomicReference(java.util.concurrent.atomic.AtomicReference) VotingConfiguration(org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration)

Aggregations

VotingConfiguration (org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration)61 ClusterState (org.opensearch.cluster.ClusterState)42 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)12 DiscoveryNodes (org.opensearch.cluster.node.DiscoveryNodes)9 Set (java.util.Set)7 HashSet (java.util.HashSet)6 ClusterSettings (org.opensearch.common.settings.ClusterSettings)6 Settings (org.opensearch.common.settings.Settings)6 Collections (java.util.Collections)5 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)5 Collectors (java.util.stream.Collectors)5 OpenSearchTestCase (org.opensearch.test.OpenSearchTestCase)5 IOException (java.io.IOException)4 List (java.util.List)4 Optional (java.util.Optional)4 TimeUnit (java.util.concurrent.TimeUnit)4 Stream (java.util.stream.Stream)4 Matchers.containsString (org.hamcrest.Matchers.containsString)4 Matchers.equalTo (org.hamcrest.Matchers.equalTo)4 Version (org.opensearch.Version)4