use of org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive in project elasticsearch by elastic.
the class DiscoveryWithServiceDisruptionsIT method testSendingShardFailure.
// simulate handling of sending shard failure during an isolation
public void testSendingShardFailure() throws Exception {
List<String> nodes = startCluster(3, 2);
String masterNode = internalCluster().getMasterName();
List<String> nonMasterNodes = nodes.stream().filter(node -> !node.equals(masterNode)).collect(Collectors.toList());
String nonMasterNode = randomFrom(nonMasterNodes);
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 3).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)));
ensureGreen();
String nonMasterNodeId = internalCluster().clusterService(nonMasterNode).localNode().getId();
// fail a random shard
ShardRouting failedShard = randomFrom(clusterService().state().getRoutingNodes().node(nonMasterNodeId).shardsWithState(ShardRoutingState.STARTED));
ShardStateAction service = internalCluster().getInstance(ShardStateAction.class, nonMasterNode);
CountDownLatch latch = new CountDownLatch(1);
AtomicBoolean success = new AtomicBoolean();
String isolatedNode = randomBoolean() ? masterNode : nonMasterNode;
TwoPartitions partitions = isolateNode(isolatedNode);
// we cannot use the NetworkUnresponsive disruption type here as it will swallow the "shard failed" request, calling neither
// onSuccess nor onFailure on the provided listener.
NetworkLinkDisruptionType disruptionType = new NetworkDisconnect();
NetworkDisruption networkDisruption = new NetworkDisruption(partitions, disruptionType);
setDisruptionScheme(networkDisruption);
networkDisruption.startDisrupting();
service.localShardFailed(failedShard, "simulated", new CorruptIndexException("simulated", (String) null), new ShardStateAction.Listener() {
@Override
public void onSuccess() {
success.set(true);
latch.countDown();
}
@Override
public void onFailure(Exception e) {
success.set(false);
latch.countDown();
assert false;
}
});
if (isolatedNode.equals(nonMasterNode)) {
assertNoMaster(nonMasterNode);
} else {
ensureStableCluster(2, nonMasterNode);
}
// heal the partition
networkDisruption.removeAndEnsureHealthy(internalCluster());
// the cluster should stabilize
ensureStableCluster(3);
latch.await();
// the listener should be notified
assertTrue(success.get());
// the failed shard should be gone
List<ShardRouting> shards = clusterService().state().getRoutingTable().allShards("test");
for (ShardRouting shard : shards) {
assertThat(shard.allocationId(), not(equalTo(failedShard.allocationId())));
}
}
use of org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive in project elasticsearch by elastic.
the class DiscoveryWithServiceDisruptionsIT method addRandomDisruptionScheme.
private ServiceDisruptionScheme addRandomDisruptionScheme() {
// TODO: add partial partitions
final DisruptedLinks disruptedLinks;
if (randomBoolean()) {
disruptedLinks = TwoPartitions.random(random(), internalCluster().getNodeNames());
} else {
disruptedLinks = Bridge.random(random(), internalCluster().getNodeNames());
}
final NetworkLinkDisruptionType disruptionType;
switch(randomInt(2)) {
case 0:
disruptionType = new NetworkUnresponsive();
break;
case 1:
disruptionType = new NetworkDisconnect();
break;
case 2:
disruptionType = NetworkDelay.random(random());
break;
default:
throw new IllegalArgumentException();
}
final ServiceDisruptionScheme scheme;
if (rarely()) {
scheme = new SlowClusterStateProcessing(random());
} else {
scheme = new NetworkDisruption(disruptedLinks, disruptionType);
}
setDisruptionScheme(scheme);
return scheme;
}
use of org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive in project elasticsearch by elastic.
the class DiscoveryWithServiceDisruptionsIT method testIndicesDeleted.
/**
* Tests that indices are properly deleted even if there is a master transition in between.
* Test for https://github.com/elastic/elasticsearch/issues/11665
*/
public void testIndicesDeleted() throws Exception {
final Settings settings = Settings.builder().put(DEFAULT_SETTINGS).put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), // don't wait on isolated data node
"0s").put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), // wait till cluster state is committed
"30s").build();
final String idxName = "test";
configureCluster(settings, 3, null, 2);
final List<String> allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(2);
final String dataNode = internalCluster().startDataOnlyNode();
ensureStableCluster(3);
assertAcked(prepareCreate("test"));
final String masterNode1 = internalCluster().getMasterName();
NetworkDisruption networkDisruption = new NetworkDisruption(new TwoPartitions(masterNode1, dataNode), new NetworkUnresponsive());
internalCluster().setDisruptionScheme(networkDisruption);
networkDisruption.startDisrupting();
// We know this will time out due to the partition, we check manually below to not proceed until
// the delete has been applied to the master node and the master eligible node.
internalCluster().client(masterNode1).admin().indices().prepareDelete(idxName).setTimeout("0s").get();
// Don't restart the master node until we know the index deletion has taken effect on master and the master eligible node.
assertBusy(() -> {
for (String masterNode : allMasterEligibleNodes) {
final ClusterState masterState = internalCluster().clusterService(masterNode).state();
assertTrue("index not deleted on " + masterNode, masterState.metaData().hasIndex(idxName) == false);
}
});
internalCluster().restartNode(masterNode1, InternalTestCluster.EMPTY_CALLBACK);
ensureYellow();
assertFalse(client().admin().indices().prepareExists(idxName).get().isExists());
}
use of org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive in project elasticsearch by elastic.
the class DiscoveryWithServiceDisruptionsIT method addRandomDisruptionType.
protected NetworkDisruption addRandomDisruptionType(TwoPartitions partitions) {
final NetworkLinkDisruptionType disruptionType;
if (randomBoolean()) {
disruptionType = new NetworkUnresponsive();
} else {
disruptionType = new NetworkDisconnect();
}
NetworkDisruption partition = new NetworkDisruption(partitions, disruptionType);
setDisruptionScheme(partition);
return partition;
}
use of org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive in project elasticsearch by elastic.
the class NetworkDisruptionIT method testNetworkPartitionWithNodeShutdown.
public void testNetworkPartitionWithNodeShutdown() throws IOException {
internalCluster().ensureAtLeastNumDataNodes(2);
String[] nodeNames = internalCluster().getNodeNames();
NetworkDisruption networkDisruption = new NetworkDisruption(new TwoPartitions(nodeNames[0], nodeNames[1]), new NetworkUnresponsive());
internalCluster().setDisruptionScheme(networkDisruption);
networkDisruption.startDisrupting();
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodeNames[0]));
internalCluster().clearDisruptionScheme();
}
Aggregations