use of org.opensearch.test.disruption.ServiceDisruptionScheme in project OpenSearch by opensearch-project.
the class MasterDisruptionIT method testMappingTimeout.
public void testMappingTimeout() throws Exception {
startCluster(3);
createIndex("test", Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 1).put("index.routing.allocation.exclude._name", internalCluster().getMasterName()).build());
// create one field
index("test", "doc", "1", "{ \"f\": 1 }");
ensureGreen();
assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put("indices.mapping.dynamic_timeout", "1ms")));
ServiceDisruptionScheme disruption = new BlockMasterServiceOnMaster(random());
setDisruptionScheme(disruption);
disruption.startDisrupting();
BulkRequestBuilder bulk = client().prepareBulk();
bulk.add(client().prepareIndex("test").setId("2").setSource("{ \"f\": 1 }", XContentType.JSON));
bulk.add(client().prepareIndex("test").setId("3").setSource("{ \"g\": 1 }", XContentType.JSON));
bulk.add(client().prepareIndex("test").setId("4").setSource("{ \"f\": 1 }", XContentType.JSON));
BulkResponse bulkResponse = bulk.get();
assertTrue(bulkResponse.hasFailures());
disruption.stopDisrupting();
assertBusy(() -> {
IndicesStatsResponse stats = client().admin().indices().prepareStats("test").clear().get();
for (ShardStats shardStats : stats.getShards()) {
assertThat(shardStats.getShardRouting().toString(), shardStats.getSeqNoStats().getGlobalCheckpoint(), equalTo(shardStats.getSeqNoStats().getLocalCheckpoint()));
}
});
}
use of org.opensearch.test.disruption.ServiceDisruptionScheme in project OpenSearch by opensearch-project.
the class DedicatedClusterSnapshotRestoreIT method testDataNodeRestartWithBusyMasterDuringSnapshot.
public void testDataNodeRestartWithBusyMasterDuringSnapshot() throws Exception {
logger.info("--> starting a master node and two data nodes");
internalCluster().startMasterOnlyNode();
internalCluster().startDataOnlyNodes(2);
final Path repoPath = randomRepoPath();
createRepository("test-repo", "mock", repoPath);
maybeInitWithOldSnapshotVersion("test-repo", repoPath);
assertAcked(prepareCreate("test-idx", 0, indexSettingsNoReplicas(5)));
ensureGreen();
indexRandomDocs("test-idx", randomIntBetween(50, 100));
final String dataNode = blockNodeWithIndex("test-repo", "test-idx");
logger.info("--> snapshot");
ServiceDisruptionScheme disruption = new BusyMasterServiceDisruption(random(), Priority.HIGH);
setDisruptionScheme(disruption);
client(internalCluster().getMasterName()).admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
disruption.startDisrupting();
logger.info("--> restarting data node, which should cause primary shards to be failed");
internalCluster().restartNode(dataNode, InternalTestCluster.EMPTY_CALLBACK);
logger.info("--> wait for shard snapshots to show as failed");
assertBusy(() -> assertThat(clusterAdmin().prepareSnapshotStatus("test-repo").setSnapshots("test-snap").get().getSnapshots().get(0).getShardsStats().getFailedShards(), greaterThanOrEqualTo(1)), 60L, TimeUnit.SECONDS);
unblockNode("test-repo", dataNode);
disruption.stopDisrupting();
// check that snapshot completes
assertBusy(() -> {
GetSnapshotsResponse snapshotsStatusResponse = clusterAdmin().prepareGetSnapshots("test-repo").setSnapshots("test-snap").setIgnoreUnavailable(true).get();
assertEquals(1, snapshotsStatusResponse.getSnapshots().size());
SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0);
assertTrue(snapshotInfo.state().toString(), snapshotInfo.state().completed());
}, 60L, TimeUnit.SECONDS);
}
use of org.opensearch.test.disruption.ServiceDisruptionScheme in project OpenSearch by opensearch-project.
the class ConcurrentSeqNoVersioningIT method testSeqNoCASLinearizability.
// Test info: disrupt network for up to 8s in a number of rounds and check that we only get true positive CAS results when running
// multiple threads doing CAS updates.
// Wait up to 1 minute (+10s in thread to ensure it does not time out) for threads to complete previous round before initiating next
// round.
public void testSeqNoCASLinearizability() {
final int disruptTimeSeconds = scaledRandomIntBetween(1, 8);
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(indexSettings()).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2)).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, randomInt(3))));
ensureGreen();
int numberOfKeys = randomIntBetween(1, 10);
logger.info("--> Indexing initial doc for {} keys", numberOfKeys);
List<Partition> partitions = IntStream.range(0, numberOfKeys).mapToObj(i -> client().prepareIndex("test").setId("ID:" + i).setSource("value", -1).get()).map(response -> new Partition(response.getId(), new Version(response.getPrimaryTerm(), response.getSeqNo()))).collect(Collectors.toList());
int threadCount = randomIntBetween(3, 20);
// +1 for main thread.
CyclicBarrier roundBarrier = new CyclicBarrier(threadCount + 1);
List<CASUpdateThread> threads = IntStream.range(0, threadCount).mapToObj(i -> new CASUpdateThread(i, roundBarrier, partitions, disruptTimeSeconds + 1)).collect(Collectors.toList());
logger.info("--> Starting {} threads", threadCount);
threads.forEach(Thread::start);
try {
int rounds = randomIntBetween(2, 5);
logger.info("--> Running {} rounds", rounds);
for (int i = 0; i < rounds; ++i) {
ServiceDisruptionScheme disruptionScheme = addRandomDisruptionScheme();
roundBarrier.await(1, TimeUnit.MINUTES);
disruptionScheme.startDisrupting();
logger.info("--> round {}", i);
try {
roundBarrier.await(disruptTimeSeconds, TimeUnit.SECONDS);
} catch (TimeoutException e) {
roundBarrier.reset();
}
internalCluster().clearDisruptionScheme(false);
// heal cluster faster to reduce test time.
ensureFullyConnectedCluster();
}
} catch (InterruptedException | BrokenBarrierException | TimeoutException e) {
logger.error("Timed out, dumping stack traces of all threads:");
threads.forEach(thread -> logger.info(thread.toString() + ":\n" + ExceptionsHelper.formatStackTrace(thread.getStackTrace())));
throw new RuntimeException(e);
} finally {
logger.info("--> terminating test");
threads.forEach(CASUpdateThread::terminate);
threads.forEach(CASUpdateThread::await);
threads.stream().filter(Thread::isAlive).forEach(t -> fail("Thread still alive: " + t));
}
partitions.forEach(Partition::assertLinearizable);
}
use of org.opensearch.test.disruption.ServiceDisruptionScheme in project OpenSearch by opensearch-project.
the class AbstractDisruptionTestCase method addRandomDisruptionScheme.
public ServiceDisruptionScheme addRandomDisruptionScheme() {
// TODO: add partial partitions
final DisruptedLinks disruptedLinks;
if (randomBoolean()) {
disruptedLinks = TwoPartitions.random(random(), internalCluster().getNodeNames());
} else {
disruptedLinks = Bridge.random(random(), internalCluster().getNodeNames());
}
final NetworkLinkDisruptionType disruptionType;
switch(randomInt(2)) {
case 0:
disruptionType = NetworkDisruption.UNRESPONSIVE;
break;
case 1:
disruptionType = NetworkDisruption.DISCONNECT;
break;
case 2:
disruptionType = NetworkDisruption.NetworkDelay.random(random());
break;
default:
throw new IllegalArgumentException();
}
final ServiceDisruptionScheme scheme;
if (rarely()) {
scheme = new SlowClusterStateProcessing(random());
} else {
scheme = new NetworkDisruption(disruptedLinks, disruptionType);
}
setDisruptionScheme(scheme);
return scheme;
}
use of org.opensearch.test.disruption.ServiceDisruptionScheme in project OpenSearch by opensearch-project.
the class DiscoveryDisruptionIT method testElectMasterWithLatestVersion.
public void testElectMasterWithLatestVersion() throws Exception {
final Set<String> nodes = new HashSet<>(internalCluster().startNodes(3));
ensureStableCluster(3);
ServiceDisruptionScheme isolateAllNodes = new NetworkDisruption(new NetworkDisruption.IsolateAllNodes(nodes), NetworkDisruption.DISCONNECT);
internalCluster().setDisruptionScheme(isolateAllNodes);
logger.info("--> forcing a complete election to make sure \"preferred\" master is elected");
isolateAllNodes.startDisrupting();
for (String node : nodes) {
assertNoMaster(node);
}
internalCluster().clearDisruptionScheme();
ensureStableCluster(3);
final String preferredMasterName = internalCluster().getMasterName();
final DiscoveryNode preferredMaster = internalCluster().clusterService(preferredMasterName).localNode();
logger.info("--> preferred master is {}", preferredMaster);
final Set<String> nonPreferredNodes = new HashSet<>(nodes);
nonPreferredNodes.remove(preferredMasterName);
final ServiceDisruptionScheme isolatePreferredMaster = isolateMasterDisruption(NetworkDisruption.DISCONNECT);
internalCluster().setDisruptionScheme(isolatePreferredMaster);
isolatePreferredMaster.startDisrupting();
client(randomFrom(nonPreferredNodes)).admin().indices().prepareCreate("test").setSettings(Settings.builder().put(INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1).put(INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0)).get();
internalCluster().clearDisruptionScheme(false);
internalCluster().setDisruptionScheme(isolateAllNodes);
logger.info("--> forcing a complete election again");
isolateAllNodes.startDisrupting();
for (String node : nodes) {
assertNoMaster(node);
}
isolateAllNodes.stopDisrupting();
final ClusterState state = client().admin().cluster().prepareState().get().getState();
if (state.metadata().hasIndex("test") == false) {
fail("index 'test' was lost. current cluster state: " + state);
}
}
Aggregations