Search in sources :

Example 1 with ClusterNode

use of org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode in project crate by crate.

the class CoordinatorTests method testLogsWarningPeriodicallyIfClusterNotFormed.

public void testLogsWarningPeriodicallyIfClusterNotFormed() throws IllegalAccessException {
    final long warningDelayMillis;
    final Settings settings;
    if (randomBoolean()) {
        settings = Settings.EMPTY;
        warningDelayMillis = ClusterFormationFailureHelper.DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING.get(settings).millis();
    } else {
        warningDelayMillis = randomLongBetween(1, 100000);
        settings = Settings.builder().put(ClusterFormationFailureHelper.DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING.getKey(), warningDelayMillis + "ms").build();
    }
    logger.info("--> emitting warnings every [{}ms]", warningDelayMillis);
    try (Cluster cluster = new Cluster(3, true, settings)) {
        cluster.runRandomly();
        cluster.stabilise();
        logger.info("--> disconnecting all nodes");
        for (final ClusterNode clusterNode : cluster.clusterNodes) {
            clusterNode.disconnect();
        }
        cluster.runFor(// to wait for any in-flight check to time out
        defaultMillis(LEADER_CHECK_TIMEOUT_SETTING) + // to wait for the next check to be sent
        defaultMillis(LEADER_CHECK_INTERVAL_SETTING) + // to send the failing check and receive the disconnection response
        2 * DEFAULT_DELAY_VARIABILITY, "waiting for leader failure");
        for (final ClusterNode clusterNode : cluster.clusterNodes) {
            assertThat(clusterNode.getId() + " is CANDIDATE", clusterNode.coordinator.getMode(), is(CANDIDATE));
        }
        for (int i = scaledRandomIntBetween(1, 10); i >= 0; i--) {
            final MockLogAppender mockLogAppender = new MockLogAppender();
            try {
                mockLogAppender.start();
                Loggers.addAppender(LogManager.getLogger(ClusterFormationFailureHelper.class), mockLogAppender);
                mockLogAppender.addExpectation(new MockLogAppender.LoggingExpectation() {

                    final Set<DiscoveryNode> nodesLogged = new HashSet<>();

                    @Override
                    public void match(LogEvent event) {
                        final String message = event.getMessage().getFormattedMessage();
                        assertThat(message, startsWith("master not discovered or elected yet, an election requires at least 2 nodes with ids from ["));
                        final List<ClusterNode> matchingNodes = cluster.clusterNodes.stream().filter(n -> event.getContextData().<String>getValue(NODE_ID_LOG_CONTEXT_KEY).equals(getNodeIdForLogContext(n.getLocalNode()))).collect(Collectors.toList());
                        assertThat(matchingNodes, hasSize(1));
                        assertTrue(Regex.simpleMatch("*have discovered *" + matchingNodes.get(0).toString() + "*discovery will continue*", message));
                        nodesLogged.add(matchingNodes.get(0).getLocalNode());
                    }

                    @Override
                    public void assertMatched() {
                        assertThat(nodesLogged + " vs " + cluster.clusterNodes, nodesLogged, equalTo(cluster.clusterNodes.stream().map(ClusterNode::getLocalNode).collect(Collectors.toSet())));
                    }
                });
                cluster.runFor(warningDelayMillis + DEFAULT_DELAY_VARIABILITY, "waiting for warning to be emitted");
                mockLogAppender.assertAllExpectationsMatched();
            } finally {
                Loggers.removeAppender(LogManager.getLogger(ClusterFormationFailureHelper.class), mockLogAppender);
                mockLogAppender.stop();
            }
        }
    }
}
Also used : ClusterNode(org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) MockLogAppender(org.elasticsearch.test.MockLogAppender) LogEvent(org.apache.logging.log4j.core.LogEvent) Matchers.containsString(org.hamcrest.Matchers.containsString) List(java.util.List) Settings(org.elasticsearch.common.settings.Settings) HashSet(java.util.HashSet)

Example 2 with ClusterNode

use of org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode in project crate by crate.

the class CoordinatorTests method testAppliesNoMasterBlock.

private void testAppliesNoMasterBlock(String noMasterBlockSetting, ClusterBlock expectedBlock) {
    try (Cluster cluster = new Cluster(3)) {
        cluster.runRandomly();
        cluster.stabilise();
        final ClusterNode leader = cluster.getAnyLeader();
        leader.submitUpdateTask("update NO_MASTER_BLOCK_SETTING", cs -> {
            final Builder settingsBuilder = Settings.builder().put(cs.metadata().persistentSettings());
            settingsBuilder.put(NO_MASTER_BLOCK_SETTING.getKey(), noMasterBlockSetting);
            return ClusterState.builder(cs).metadata(Metadata.builder(cs.metadata()).persistentSettings(settingsBuilder.build())).build();
        }, (source, e) -> {
        });
        cluster.runFor(DEFAULT_CLUSTER_STATE_UPDATE_DELAY, "committing setting update");
        leader.disconnect();
        cluster.runFor(defaultMillis(FOLLOWER_CHECK_TIMEOUT_SETTING) + defaultMillis(FOLLOWER_CHECK_INTERVAL_SETTING) + DEFAULT_CLUSTER_STATE_UPDATE_DELAY, "detecting disconnection");
        assertThat(leader.getLastAppliedClusterState().blocks().global(), hasItem(expectedBlock));
    // TODO reboot the leader and verify that the same block is applied when it restarts
    }
}
Also used : ClusterNode(org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode) XContentBuilder(org.elasticsearch.common.xcontent.XContentBuilder) Builder(org.elasticsearch.common.settings.Settings.Builder)

Example 3 with ClusterNode

use of org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode in project crate by crate.

the class CoordinatorTests method testDoesNotPerformElectionWhenRestartingFollower.

public void testDoesNotPerformElectionWhenRestartingFollower() {
    try (Cluster cluster = new Cluster(randomIntBetween(2, 5), false, Settings.EMPTY)) {
        cluster.runRandomly();
        cluster.stabilise();
        final ClusterNode leader = cluster.getAnyLeader();
        final long expectedTerm = leader.coordinator.getCurrentTerm();
        if (cluster.clusterNodes.stream().filter(n -> n.getLocalNode().isMasterEligibleNode()).count() == 2) {
            // in the 2-node case, auto-shrinking the voting configuration is required to reduce the voting configuration down to just
            // the leader, otherwise restarting the other master-eligible node triggers an election
            leader.submitSetAutoShrinkVotingConfiguration(true);
            // 1st delay for the setting update, 2nd for the reconfiguration
            cluster.stabilise(2 * DEFAULT_CLUSTER_STATE_UPDATE_DELAY);
        }
        for (final ClusterNode clusterNode : cluster.getAllNodesExcept(leader)) {
            logger.info("--> restarting {}", clusterNode);
            clusterNode.close();
            cluster.clusterNodes.replaceAll(cn -> cn == clusterNode ? cn.restartedNode(Function.identity(), Function.identity(), Settings.EMPTY) : cn);
            cluster.stabilise();
            assertThat("term should not change", cluster.getAnyNode().coordinator.getCurrentTerm(), is(expectedTerm));
        }
    }
}
Also used : ClusterNode(org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode)

Example 4 with ClusterNode

use of org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode in project crate by crate.

the class CoordinatorTests method testUnresponsiveLeaderDetectedEventually.

public void testUnresponsiveLeaderDetectedEventually() {
    try (Cluster cluster = new Cluster(randomIntBetween(3, 5))) {
        cluster.runRandomly();
        cluster.stabilise();
        final ClusterNode originalLeader = cluster.getAnyLeader();
        logger.info("--> blackholing leader {}", originalLeader);
        originalLeader.blackhole();
        // This stabilisation time bound is undesirably long. TODO try and reduce it.
        cluster.stabilise(Math.max(// first wait for all the followers to notice the leader has gone
        (defaultMillis(LEADER_CHECK_INTERVAL_SETTING) + defaultMillis(LEADER_CHECK_TIMEOUT_SETTING)) * defaultInt(LEADER_CHECK_RETRY_COUNT_SETTING) + // then wait for a follower to be promoted to leader
        DEFAULT_ELECTION_DELAY + // and the first publication times out because of the unresponsive node
        defaultMillis(PUBLISH_TIMEOUT_SETTING) + // there might be a term bump causing another election
        DEFAULT_ELECTION_DELAY + // then wait for both of:
        Math.max(// 1. the term bumping publication to time out
        defaultMillis(PUBLISH_TIMEOUT_SETTING), // 2. the new leader to notice that the old leader is unresponsive
        (defaultMillis(FOLLOWER_CHECK_INTERVAL_SETTING) + defaultMillis(FOLLOWER_CHECK_TIMEOUT_SETTING)) * defaultInt(FOLLOWER_CHECK_RETRY_COUNT_SETTING)) + // then wait for the new leader to commit a state without the old leader
        DEFAULT_CLUSTER_STATE_UPDATE_DELAY + // then wait for the followup reconfiguration
        DEFAULT_CLUSTER_STATE_UPDATE_DELAY, // ALSO wait for the leader to notice that its followers are unresponsive
        (defaultMillis(FOLLOWER_CHECK_INTERVAL_SETTING) + defaultMillis(FOLLOWER_CHECK_TIMEOUT_SETTING)) * defaultInt(FOLLOWER_CHECK_RETRY_COUNT_SETTING) + // then wait for the leader to try and commit a state removing them, causing it to stand down
        DEFAULT_CLUSTER_STATE_UPDATE_DELAY));
        assertThat(cluster.getAnyLeader().getId(), not(equalTo(originalLeader.getId())));
    }
}
Also used : ClusterNode(org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode)

Example 5 with ClusterNode

use of org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode in project crate by crate.

the class CoordinatorTests method testDoesNotShrinkConfigurationBelowThreeNodes.

public void testDoesNotShrinkConfigurationBelowThreeNodes() {
    try (Cluster cluster = new Cluster(3)) {
        cluster.runRandomly();
        cluster.stabilise();
        final ClusterNode disconnect1 = cluster.getAnyNode();
        logger.info("--> disconnecting {}", disconnect1);
        disconnect1.disconnect();
        cluster.stabilise();
        final ClusterNode disconnect2 = cluster.getAnyNodeExcept(disconnect1);
        logger.info("--> disconnecting {}", disconnect2);
        disconnect2.disconnect();
        cluster.runFor(DEFAULT_STABILISATION_TIME, "allowing time for fault detection");
        for (final ClusterNode clusterNode : cluster.clusterNodes) {
            assertThat(clusterNode.getId() + " should be a candidate", clusterNode.coordinator.getMode(), equalTo(Mode.CANDIDATE));
        }
        disconnect1.heal();
        // would not work if disconnect1 were removed from the configuration
        cluster.stabilise();
    }
}
Also used : ClusterNode(org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode)

Aggregations

ClusterNode (org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode)35 Matchers.containsString (org.hamcrest.Matchers.containsString)8 VotingConfiguration (org.elasticsearch.cluster.coordination.CoordinationMetadata.VotingConfiguration)5 MockLogAppender (org.elasticsearch.test.MockLogAppender)5 HashSet (java.util.HashSet)4 ClusterState (org.elasticsearch.cluster.ClusterState)4 List (java.util.List)3 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)3 Logger (org.apache.logging.log4j.Logger)3 LogEvent (org.apache.logging.log4j.core.LogEvent)3 ElasticsearchException (org.elasticsearch.ElasticsearchException)3 IOException (java.io.IOException)2 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2 Map (java.util.Map)2 Set (java.util.Set)2 Function (java.util.function.Function)2 Collectors (java.util.stream.Collectors)2 Level (org.apache.logging.log4j.Level)2 LogManager (org.apache.logging.log4j.LogManager)2