use of org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode in project crate by crate.
the class CoordinatorTests method testLogsWarningPeriodicallyIfClusterNotFormed.
public void testLogsWarningPeriodicallyIfClusterNotFormed() throws IllegalAccessException {
final long warningDelayMillis;
final Settings settings;
if (randomBoolean()) {
settings = Settings.EMPTY;
warningDelayMillis = ClusterFormationFailureHelper.DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING.get(settings).millis();
} else {
warningDelayMillis = randomLongBetween(1, 100000);
settings = Settings.builder().put(ClusterFormationFailureHelper.DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING.getKey(), warningDelayMillis + "ms").build();
}
logger.info("--> emitting warnings every [{}ms]", warningDelayMillis);
try (Cluster cluster = new Cluster(3, true, settings)) {
cluster.runRandomly();
cluster.stabilise();
logger.info("--> disconnecting all nodes");
for (final ClusterNode clusterNode : cluster.clusterNodes) {
clusterNode.disconnect();
}
cluster.runFor(// to wait for any in-flight check to time out
defaultMillis(LEADER_CHECK_TIMEOUT_SETTING) + // to wait for the next check to be sent
defaultMillis(LEADER_CHECK_INTERVAL_SETTING) + // to send the failing check and receive the disconnection response
2 * DEFAULT_DELAY_VARIABILITY, "waiting for leader failure");
for (final ClusterNode clusterNode : cluster.clusterNodes) {
assertThat(clusterNode.getId() + " is CANDIDATE", clusterNode.coordinator.getMode(), is(CANDIDATE));
}
for (int i = scaledRandomIntBetween(1, 10); i >= 0; i--) {
final MockLogAppender mockLogAppender = new MockLogAppender();
try {
mockLogAppender.start();
Loggers.addAppender(LogManager.getLogger(ClusterFormationFailureHelper.class), mockLogAppender);
mockLogAppender.addExpectation(new MockLogAppender.LoggingExpectation() {
final Set<DiscoveryNode> nodesLogged = new HashSet<>();
@Override
public void match(LogEvent event) {
final String message = event.getMessage().getFormattedMessage();
assertThat(message, startsWith("master not discovered or elected yet, an election requires at least 2 nodes with ids from ["));
final List<ClusterNode> matchingNodes = cluster.clusterNodes.stream().filter(n -> event.getContextData().<String>getValue(NODE_ID_LOG_CONTEXT_KEY).equals(getNodeIdForLogContext(n.getLocalNode()))).collect(Collectors.toList());
assertThat(matchingNodes, hasSize(1));
assertTrue(Regex.simpleMatch("*have discovered *" + matchingNodes.get(0).toString() + "*discovery will continue*", message));
nodesLogged.add(matchingNodes.get(0).getLocalNode());
}
@Override
public void assertMatched() {
assertThat(nodesLogged + " vs " + cluster.clusterNodes, nodesLogged, equalTo(cluster.clusterNodes.stream().map(ClusterNode::getLocalNode).collect(Collectors.toSet())));
}
});
cluster.runFor(warningDelayMillis + DEFAULT_DELAY_VARIABILITY, "waiting for warning to be emitted");
mockLogAppender.assertAllExpectationsMatched();
} finally {
Loggers.removeAppender(LogManager.getLogger(ClusterFormationFailureHelper.class), mockLogAppender);
mockLogAppender.stop();
}
}
}
}
use of org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode in project crate by crate.
the class CoordinatorTests method testAppliesNoMasterBlock.
private void testAppliesNoMasterBlock(String noMasterBlockSetting, ClusterBlock expectedBlock) {
try (Cluster cluster = new Cluster(3)) {
cluster.runRandomly();
cluster.stabilise();
final ClusterNode leader = cluster.getAnyLeader();
leader.submitUpdateTask("update NO_MASTER_BLOCK_SETTING", cs -> {
final Builder settingsBuilder = Settings.builder().put(cs.metadata().persistentSettings());
settingsBuilder.put(NO_MASTER_BLOCK_SETTING.getKey(), noMasterBlockSetting);
return ClusterState.builder(cs).metadata(Metadata.builder(cs.metadata()).persistentSettings(settingsBuilder.build())).build();
}, (source, e) -> {
});
cluster.runFor(DEFAULT_CLUSTER_STATE_UPDATE_DELAY, "committing setting update");
leader.disconnect();
cluster.runFor(defaultMillis(FOLLOWER_CHECK_TIMEOUT_SETTING) + defaultMillis(FOLLOWER_CHECK_INTERVAL_SETTING) + DEFAULT_CLUSTER_STATE_UPDATE_DELAY, "detecting disconnection");
assertThat(leader.getLastAppliedClusterState().blocks().global(), hasItem(expectedBlock));
// TODO reboot the leader and verify that the same block is applied when it restarts
}
}
use of org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode in project crate by crate.
the class CoordinatorTests method testDoesNotPerformElectionWhenRestartingFollower.
public void testDoesNotPerformElectionWhenRestartingFollower() {
try (Cluster cluster = new Cluster(randomIntBetween(2, 5), false, Settings.EMPTY)) {
cluster.runRandomly();
cluster.stabilise();
final ClusterNode leader = cluster.getAnyLeader();
final long expectedTerm = leader.coordinator.getCurrentTerm();
if (cluster.clusterNodes.stream().filter(n -> n.getLocalNode().isMasterEligibleNode()).count() == 2) {
// in the 2-node case, auto-shrinking the voting configuration is required to reduce the voting configuration down to just
// the leader, otherwise restarting the other master-eligible node triggers an election
leader.submitSetAutoShrinkVotingConfiguration(true);
// 1st delay for the setting update, 2nd for the reconfiguration
cluster.stabilise(2 * DEFAULT_CLUSTER_STATE_UPDATE_DELAY);
}
for (final ClusterNode clusterNode : cluster.getAllNodesExcept(leader)) {
logger.info("--> restarting {}", clusterNode);
clusterNode.close();
cluster.clusterNodes.replaceAll(cn -> cn == clusterNode ? cn.restartedNode(Function.identity(), Function.identity(), Settings.EMPTY) : cn);
cluster.stabilise();
assertThat("term should not change", cluster.getAnyNode().coordinator.getCurrentTerm(), is(expectedTerm));
}
}
}
use of org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode in project crate by crate.
the class CoordinatorTests method testUnresponsiveLeaderDetectedEventually.
public void testUnresponsiveLeaderDetectedEventually() {
try (Cluster cluster = new Cluster(randomIntBetween(3, 5))) {
cluster.runRandomly();
cluster.stabilise();
final ClusterNode originalLeader = cluster.getAnyLeader();
logger.info("--> blackholing leader {}", originalLeader);
originalLeader.blackhole();
// This stabilisation time bound is undesirably long. TODO try and reduce it.
cluster.stabilise(Math.max(// first wait for all the followers to notice the leader has gone
(defaultMillis(LEADER_CHECK_INTERVAL_SETTING) + defaultMillis(LEADER_CHECK_TIMEOUT_SETTING)) * defaultInt(LEADER_CHECK_RETRY_COUNT_SETTING) + // then wait for a follower to be promoted to leader
DEFAULT_ELECTION_DELAY + // and the first publication times out because of the unresponsive node
defaultMillis(PUBLISH_TIMEOUT_SETTING) + // there might be a term bump causing another election
DEFAULT_ELECTION_DELAY + // then wait for both of:
Math.max(// 1. the term bumping publication to time out
defaultMillis(PUBLISH_TIMEOUT_SETTING), // 2. the new leader to notice that the old leader is unresponsive
(defaultMillis(FOLLOWER_CHECK_INTERVAL_SETTING) + defaultMillis(FOLLOWER_CHECK_TIMEOUT_SETTING)) * defaultInt(FOLLOWER_CHECK_RETRY_COUNT_SETTING)) + // then wait for the new leader to commit a state without the old leader
DEFAULT_CLUSTER_STATE_UPDATE_DELAY + // then wait for the followup reconfiguration
DEFAULT_CLUSTER_STATE_UPDATE_DELAY, // ALSO wait for the leader to notice that its followers are unresponsive
(defaultMillis(FOLLOWER_CHECK_INTERVAL_SETTING) + defaultMillis(FOLLOWER_CHECK_TIMEOUT_SETTING)) * defaultInt(FOLLOWER_CHECK_RETRY_COUNT_SETTING) + // then wait for the leader to try and commit a state removing them, causing it to stand down
DEFAULT_CLUSTER_STATE_UPDATE_DELAY));
assertThat(cluster.getAnyLeader().getId(), not(equalTo(originalLeader.getId())));
}
}
use of org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode in project crate by crate.
the class CoordinatorTests method testDoesNotShrinkConfigurationBelowThreeNodes.
public void testDoesNotShrinkConfigurationBelowThreeNodes() {
try (Cluster cluster = new Cluster(3)) {
cluster.runRandomly();
cluster.stabilise();
final ClusterNode disconnect1 = cluster.getAnyNode();
logger.info("--> disconnecting {}", disconnect1);
disconnect1.disconnect();
cluster.stabilise();
final ClusterNode disconnect2 = cluster.getAnyNodeExcept(disconnect1);
logger.info("--> disconnecting {}", disconnect2);
disconnect2.disconnect();
cluster.runFor(DEFAULT_STABILISATION_TIME, "allowing time for fault detection");
for (final ClusterNode clusterNode : cluster.clusterNodes) {
assertThat(clusterNode.getId() + " should be a candidate", clusterNode.coordinator.getMode(), equalTo(Mode.CANDIDATE));
}
disconnect1.heal();
// would not work if disconnect1 were removed from the configuration
cluster.stabilise();
}
}
Aggregations