use of org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions in project elasticsearch by elastic.
the class NetworkDisruptionIT method testNetworkPartitionRemovalRestoresConnections.
public void testNetworkPartitionRemovalRestoresConnections() throws IOException {
Set<String> nodes = new HashSet<>();
nodes.addAll(Arrays.asList(internalCluster().getNodeNames()));
nodes.remove(internalCluster().getMasterName());
if (nodes.size() <= 2) {
internalCluster().ensureAtLeastNumDataNodes(3 - nodes.size());
nodes.addAll(Arrays.asList(internalCluster().getNodeNames()));
nodes.remove(internalCluster().getMasterName());
}
Set<String> side1 = new HashSet<>(randomSubsetOf(randomIntBetween(1, nodes.size() - 1), nodes));
Set<String> side2 = new HashSet<>(nodes);
side2.removeAll(side1);
assertThat(side2.size(), greaterThanOrEqualTo(1));
NetworkDisruption networkDisruption = new NetworkDisruption(new TwoPartitions(side1, side2), new NetworkDisruption.NetworkDisconnect());
internalCluster().setDisruptionScheme(networkDisruption);
networkDisruption.startDisrupting();
// sends some requests
client(randomFrom(side1)).admin().cluster().prepareNodesInfo().get();
client(randomFrom(side2)).admin().cluster().prepareNodesInfo().get();
internalCluster().clearDisruptionScheme();
// check all connections are restore
for (String nodeA : side1) {
for (String nodeB : side2) {
TransportService serviceA = internalCluster().getInstance(TransportService.class, nodeA);
TransportService serviceB = internalCluster().getInstance(TransportService.class, nodeB);
assertTrue(nodeA + " is not connected to " + nodeB, serviceA.nodeConnected(serviceB.getLocalNode()));
assertTrue(nodeB + " is not connected to " + nodeA, serviceB.nodeConnected(serviceA.getLocalNode()));
}
}
}
use of org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions in project elasticsearch by elastic.
the class DiscoveryWithServiceDisruptionsIT method testNodesFDAfterMasterReelection.
/**
* Verify that nodes fault detection works after master (re) election
*/
public void testNodesFDAfterMasterReelection() throws Exception {
startCluster(4);
logger.info("--> stopping current master");
internalCluster().stopCurrentMasterNode();
ensureStableCluster(3);
logger.info("--> reducing min master nodes to 2");
assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), 2)).get());
String master = internalCluster().getMasterName();
String nonMaster = null;
for (String node : internalCluster().getNodeNames()) {
if (!node.equals(master)) {
nonMaster = node;
}
}
logger.info("--> isolating [{}]", nonMaster);
TwoPartitions partitions = isolateNode(nonMaster);
NetworkDisruption networkDisruption = addRandomDisruptionType(partitions);
networkDisruption.startDisrupting();
logger.info("--> waiting for master to remove it");
ensureStableCluster(2, master);
}
use of org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions in project elasticsearch by elastic.
the class DiscoveryWithServiceDisruptionsIT method testUnicastSinglePingResponseContainsMaster.
/**
* A 4 node cluster with m_m_n set to 3 and each node has one unicast endpoint. One node partitions from the master node.
* The temporal unicast responses is empty. When partition is solved the one ping response contains a master node.
* The rejoining node should take this master node and connect.
*/
public void testUnicastSinglePingResponseContainsMaster() throws Exception {
List<String> nodes = startCluster(4, -1, new int[] { 0 });
// Figure out what is the elected master node
final String masterNode = internalCluster().getMasterName();
logger.info("---> legit elected master node={}", masterNode);
List<String> otherNodes = new ArrayList<>(nodes);
otherNodes.remove(masterNode);
// <-- Don't isolate the node that is in the unicast endpoint for all the other nodes.
otherNodes.remove(nodes.get(0));
final String isolatedNode = otherNodes.get(0);
// Forcefully clean temporal response lists on all nodes. Otherwise the node in the unicast host list
// includes all the other nodes that have pinged it and the issue doesn't manifest
ZenPing zenPing = ((TestZenDiscovery) internalCluster().getInstance(Discovery.class)).getZenPing();
if (zenPing instanceof UnicastZenPing) {
((UnicastZenPing) zenPing).clearTemporalResponses();
}
// Simulate a network issue between the unlucky node and elected master node in both directions.
NetworkDisruption networkDisconnect = new NetworkDisruption(new TwoPartitions(masterNode, isolatedNode), new NetworkDisconnect());
setDisruptionScheme(networkDisconnect);
networkDisconnect.startDisrupting();
// Wait until elected master has removed that the unlucky node...
ensureStableCluster(3, masterNode);
// The isolate master node must report no master, so it starts with pinging
assertNoMaster(isolatedNode);
networkDisconnect.stopDisrupting();
// Wait until the master node sees all 4 nodes again.
ensureStableCluster(4);
// The elected master shouldn't have changed, since the isolated node never could have elected himself as
// master since m_m_n of 3 could never be satisfied.
assertMaster(masterNode, nodes);
}
use of org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions in project elasticsearch by elastic.
the class DiscoveryWithServiceDisruptionsIT method testFailWithMinimumMasterNodesConfigured.
/**
* Test that no split brain occurs under partial network partition. See https://github.com/elastic/elasticsearch/issues/2488
*/
public void testFailWithMinimumMasterNodesConfigured() throws Exception {
List<String> nodes = startCluster(3);
// Figure out what is the elected master node
final String masterNode = internalCluster().getMasterName();
logger.info("---> legit elected master node={}", masterNode);
// Pick a node that isn't the elected master.
Set<String> nonMasters = new HashSet<>(nodes);
nonMasters.remove(masterNode);
final String unluckyNode = randomFrom(nonMasters.toArray(Strings.EMPTY_ARRAY));
// Simulate a network issue between the unlucky node and elected master node in both directions.
NetworkDisruption networkDisconnect = new NetworkDisruption(new TwoPartitions(masterNode, unluckyNode), new NetworkDisconnect());
setDisruptionScheme(networkDisconnect);
networkDisconnect.startDisrupting();
// Wait until elected master has removed that the unlucky node...
ensureStableCluster(2, masterNode);
// The unlucky node must report *no* master node, since it can't connect to master and in fact it should
// continuously ping until network failures have been resolved. However
// It may a take a bit before the node detects it has been cut off from the elected master
assertNoMaster(unluckyNode);
networkDisconnect.stopDisrupting();
// Wait until the master node sees all 3 nodes again.
ensureStableCluster(3);
// The elected master shouldn't have changed, since the unlucky node never could have elected himself as
// master since m_m_n of 2 could never be satisfied.
assertMaster(masterNode, nodes);
}
use of org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions in project elasticsearch by elastic.
the class DiscoveryWithServiceDisruptionsIT method testRejoinDocumentExistsInAllShardCopies.
/**
* Test that a document which is indexed on the majority side of a partition, is available from the minority side,
* once the partition is healed
*/
public void testRejoinDocumentExistsInAllShardCopies() throws Exception {
List<String> nodes = startCluster(3);
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)).get());
ensureGreen("test");
nodes = new ArrayList<>(nodes);
Collections.shuffle(nodes, random());
String isolatedNode = nodes.get(0);
String notIsolatedNode = nodes.get(1);
TwoPartitions partitions = isolateNode(isolatedNode);
NetworkDisruption scheme = addRandomDisruptionType(partitions);
scheme.startDisrupting();
ensureStableCluster(2, notIsolatedNode);
assertFalse(client(notIsolatedNode).admin().cluster().prepareHealth("test").setWaitForYellowStatus().get().isTimedOut());
IndexResponse indexResponse = internalCluster().client(notIsolatedNode).prepareIndex("test", "type").setSource("field", "value").get();
assertThat(indexResponse.getVersion(), equalTo(1L));
logger.info("Verifying if document exists via node[{}]", notIsolatedNode);
GetResponse getResponse = internalCluster().client(notIsolatedNode).prepareGet("test", "type", indexResponse.getId()).setPreference("_local").get();
assertThat(getResponse.isExists(), is(true));
assertThat(getResponse.getVersion(), equalTo(1L));
assertThat(getResponse.getId(), equalTo(indexResponse.getId()));
scheme.stopDisrupting();
ensureStableCluster(3);
ensureGreen("test");
for (String node : nodes) {
logger.info("Verifying if document exists after isolating node[{}] via node[{}]", isolatedNode, node);
getResponse = internalCluster().client(node).prepareGet("test", "type", indexResponse.getId()).setPreference("_local").get();
assertThat(getResponse.isExists(), is(true));
assertThat(getResponse.getVersion(), equalTo(1L));
assertThat(getResponse.getId(), equalTo(indexResponse.getId()));
}
}
Aggregations