Search in sources :

Example 1 with ServiceDisruptionScheme

use of org.opensearch.test.disruption.ServiceDisruptionScheme in project OpenSearch by opensearch-project.

the class MasterDisruptionIT method testMappingTimeout.

public void testMappingTimeout() throws Exception {
    startCluster(3);
    createIndex("test", Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 1).put("index.routing.allocation.exclude._name", internalCluster().getMasterName()).build());
    // create one field
    index("test", "doc", "1", "{ \"f\": 1 }");
    ensureGreen();
    assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put("indices.mapping.dynamic_timeout", "1ms")));
    ServiceDisruptionScheme disruption = new BlockMasterServiceOnMaster(random());
    setDisruptionScheme(disruption);
    disruption.startDisrupting();
    BulkRequestBuilder bulk = client().prepareBulk();
    bulk.add(client().prepareIndex("test").setId("2").setSource("{ \"f\": 1 }", XContentType.JSON));
    bulk.add(client().prepareIndex("test").setId("3").setSource("{ \"g\": 1 }", XContentType.JSON));
    bulk.add(client().prepareIndex("test").setId("4").setSource("{ \"f\": 1 }", XContentType.JSON));
    BulkResponse bulkResponse = bulk.get();
    assertTrue(bulkResponse.hasFailures());
    disruption.stopDisrupting();
    assertBusy(() -> {
        IndicesStatsResponse stats = client().admin().indices().prepareStats("test").clear().get();
        for (ShardStats shardStats : stats.getShards()) {
            assertThat(shardStats.getShardRouting().toString(), shardStats.getSeqNoStats().getGlobalCheckpoint(), equalTo(shardStats.getSeqNoStats().getLocalCheckpoint()));
        }
    });
}
Also used : ShardStats(org.opensearch.action.admin.indices.stats.ShardStats) IndicesStatsResponse(org.opensearch.action.admin.indices.stats.IndicesStatsResponse) ServiceDisruptionScheme(org.opensearch.test.disruption.ServiceDisruptionScheme) BulkResponse(org.opensearch.action.bulk.BulkResponse) BulkRequestBuilder(org.opensearch.action.bulk.BulkRequestBuilder) BlockMasterServiceOnMaster(org.opensearch.test.disruption.BlockMasterServiceOnMaster)

Example 2 with ServiceDisruptionScheme

use of org.opensearch.test.disruption.ServiceDisruptionScheme in project OpenSearch by opensearch-project.

the class DedicatedClusterSnapshotRestoreIT method testDataNodeRestartWithBusyMasterDuringSnapshot.

public void testDataNodeRestartWithBusyMasterDuringSnapshot() throws Exception {
    logger.info("-->  starting a master node and two data nodes");
    internalCluster().startMasterOnlyNode();
    internalCluster().startDataOnlyNodes(2);
    final Path repoPath = randomRepoPath();
    createRepository("test-repo", "mock", repoPath);
    maybeInitWithOldSnapshotVersion("test-repo", repoPath);
    assertAcked(prepareCreate("test-idx", 0, indexSettingsNoReplicas(5)));
    ensureGreen();
    indexRandomDocs("test-idx", randomIntBetween(50, 100));
    final String dataNode = blockNodeWithIndex("test-repo", "test-idx");
    logger.info("-->  snapshot");
    ServiceDisruptionScheme disruption = new BusyMasterServiceDisruption(random(), Priority.HIGH);
    setDisruptionScheme(disruption);
    client(internalCluster().getMasterName()).admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
    disruption.startDisrupting();
    logger.info("-->  restarting data node, which should cause primary shards to be failed");
    internalCluster().restartNode(dataNode, InternalTestCluster.EMPTY_CALLBACK);
    logger.info("-->  wait for shard snapshots to show as failed");
    assertBusy(() -> assertThat(clusterAdmin().prepareSnapshotStatus("test-repo").setSnapshots("test-snap").get().getSnapshots().get(0).getShardsStats().getFailedShards(), greaterThanOrEqualTo(1)), 60L, TimeUnit.SECONDS);
    unblockNode("test-repo", dataNode);
    disruption.stopDisrupting();
    // check that snapshot completes
    assertBusy(() -> {
        GetSnapshotsResponse snapshotsStatusResponse = clusterAdmin().prepareGetSnapshots("test-repo").setSnapshots("test-snap").setIgnoreUnavailable(true).get();
        assertEquals(1, snapshotsStatusResponse.getSnapshots().size());
        SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0);
        assertTrue(snapshotInfo.state().toString(), snapshotInfo.state().completed());
    }, 60L, TimeUnit.SECONDS);
}
Also used : Path(java.nio.file.Path) BusyMasterServiceDisruption(org.opensearch.test.disruption.BusyMasterServiceDisruption) GetSnapshotsResponse(org.opensearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse) ServiceDisruptionScheme(org.opensearch.test.disruption.ServiceDisruptionScheme) Matchers.containsString(org.hamcrest.Matchers.containsString)

Example 3 with ServiceDisruptionScheme

use of org.opensearch.test.disruption.ServiceDisruptionScheme in project OpenSearch by opensearch-project.

the class ConcurrentSeqNoVersioningIT method testSeqNoCASLinearizability.

// Test info: disrupt network for up to 8s in a number of rounds and check that we only get true positive CAS results when running
// multiple threads doing CAS updates.
// Wait up to 1 minute (+10s in thread to ensure it does not time out) for threads to complete previous round before initiating next
// round.
public void testSeqNoCASLinearizability() {
    final int disruptTimeSeconds = scaledRandomIntBetween(1, 8);
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(indexSettings()).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2)).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, randomInt(3))));
    ensureGreen();
    int numberOfKeys = randomIntBetween(1, 10);
    logger.info("--> Indexing initial doc for {} keys", numberOfKeys);
    List<Partition> partitions = IntStream.range(0, numberOfKeys).mapToObj(i -> client().prepareIndex("test").setId("ID:" + i).setSource("value", -1).get()).map(response -> new Partition(response.getId(), new Version(response.getPrimaryTerm(), response.getSeqNo()))).collect(Collectors.toList());
    int threadCount = randomIntBetween(3, 20);
    // +1 for main thread.
    CyclicBarrier roundBarrier = new CyclicBarrier(threadCount + 1);
    List<CASUpdateThread> threads = IntStream.range(0, threadCount).mapToObj(i -> new CASUpdateThread(i, roundBarrier, partitions, disruptTimeSeconds + 1)).collect(Collectors.toList());
    logger.info("--> Starting {} threads", threadCount);
    threads.forEach(Thread::start);
    try {
        int rounds = randomIntBetween(2, 5);
        logger.info("--> Running {} rounds", rounds);
        for (int i = 0; i < rounds; ++i) {
            ServiceDisruptionScheme disruptionScheme = addRandomDisruptionScheme();
            roundBarrier.await(1, TimeUnit.MINUTES);
            disruptionScheme.startDisrupting();
            logger.info("--> round {}", i);
            try {
                roundBarrier.await(disruptTimeSeconds, TimeUnit.SECONDS);
            } catch (TimeoutException e) {
                roundBarrier.reset();
            }
            internalCluster().clearDisruptionScheme(false);
            // heal cluster faster to reduce test time.
            ensureFullyConnectedCluster();
        }
    } catch (InterruptedException | BrokenBarrierException | TimeoutException e) {
        logger.error("Timed out, dumping stack traces of all threads:");
        threads.forEach(thread -> logger.info(thread.toString() + ":\n" + ExceptionsHelper.formatStackTrace(thread.getStackTrace())));
        throw new RuntimeException(e);
    } finally {
        logger.info("--> terminating test");
        threads.forEach(CASUpdateThread::terminate);
        threads.forEach(CASUpdateThread::await);
        threads.stream().filter(Thread::isAlive).forEach(t -> fail("Thread still alive: " + t));
    }
    partitions.forEach(Partition::assertLinearizable);
}
Also used : NamedWriteable(org.opensearch.common.io.stream.NamedWriteable) IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) LinearizabilityChecker(org.opensearch.cluster.coordination.LinearizabilityChecker) BytesReference(org.opensearch.common.bytes.BytesReference) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) IndexResponse(org.opensearch.action.index.IndexResponse) ThreadPool(org.opensearch.threadpool.ThreadPool) AbstractDisruptionTestCase(org.opensearch.discovery.AbstractDisruptionTestCase) TimeoutException(java.util.concurrent.TimeoutException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) StreamOutput(org.opensearch.common.io.stream.StreamOutput) Random(java.util.Random) InputStreamStreamInput(org.opensearch.common.io.stream.InputStreamStreamInput) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) NamedWriteableRegistry(org.opensearch.common.io.stream.NamedWriteableRegistry) ArrayList(java.util.ArrayList) Matcher(java.util.regex.Matcher) NamedWriteableAwareStreamInput(org.opensearch.common.io.stream.NamedWriteableAwareStreamInput) StreamInput(org.opensearch.common.io.stream.StreamInput) OpenSearchAssertions.assertAcked(org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked) SuppressForbidden(org.opensearch.common.SuppressForbidden) CyclicBarrier(java.util.concurrent.CyclicBarrier) VersionConflictEngineException(org.opensearch.index.engine.VersionConflictEngineException) ExceptionsHelper(org.opensearch.ExceptionsHelper) Settings(org.opensearch.common.settings.Settings) IOException(java.io.IOException) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ScheduledThreadPoolExecutor(java.util.concurrent.ScheduledThreadPoolExecutor) FileInputStream(java.io.FileInputStream) BytesStreamOutput(org.opensearch.common.io.stream.BytesStreamOutput) ServiceDisruptionScheme(org.opensearch.test.disruption.ServiceDisruptionScheme) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) TimeUnit(java.util.concurrent.TimeUnit) Consumer(java.util.function.Consumer) Base64(java.util.Base64) List(java.util.List) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Optional(java.util.Optional) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Pattern(java.util.regex.Pattern) IndexRequest(org.opensearch.action.index.IndexRequest) OpenSearchIntegTestCase(org.opensearch.test.OpenSearchIntegTestCase) Scheduler(org.opensearch.threadpool.Scheduler) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) ServiceDisruptionScheme(org.opensearch.test.disruption.ServiceDisruptionScheme) CyclicBarrier(java.util.concurrent.CyclicBarrier) TimeoutException(java.util.concurrent.TimeoutException)

Example 4 with ServiceDisruptionScheme

use of org.opensearch.test.disruption.ServiceDisruptionScheme in project OpenSearch by opensearch-project.

the class AbstractDisruptionTestCase method addRandomDisruptionScheme.

public ServiceDisruptionScheme addRandomDisruptionScheme() {
    // TODO: add partial partitions
    final DisruptedLinks disruptedLinks;
    if (randomBoolean()) {
        disruptedLinks = TwoPartitions.random(random(), internalCluster().getNodeNames());
    } else {
        disruptedLinks = Bridge.random(random(), internalCluster().getNodeNames());
    }
    final NetworkLinkDisruptionType disruptionType;
    switch(randomInt(2)) {
        case 0:
            disruptionType = NetworkDisruption.UNRESPONSIVE;
            break;
        case 1:
            disruptionType = NetworkDisruption.DISCONNECT;
            break;
        case 2:
            disruptionType = NetworkDisruption.NetworkDelay.random(random());
            break;
        default:
            throw new IllegalArgumentException();
    }
    final ServiceDisruptionScheme scheme;
    if (rarely()) {
        scheme = new SlowClusterStateProcessing(random());
    } else {
        scheme = new NetworkDisruption(disruptedLinks, disruptionType);
    }
    setDisruptionScheme(scheme);
    return scheme;
}
Also used : SlowClusterStateProcessing(org.opensearch.test.disruption.SlowClusterStateProcessing) NetworkLinkDisruptionType(org.opensearch.test.disruption.NetworkDisruption.NetworkLinkDisruptionType) DisruptedLinks(org.opensearch.test.disruption.NetworkDisruption.DisruptedLinks) ServiceDisruptionScheme(org.opensearch.test.disruption.ServiceDisruptionScheme) NetworkDisruption(org.opensearch.test.disruption.NetworkDisruption)

Example 5 with ServiceDisruptionScheme

use of org.opensearch.test.disruption.ServiceDisruptionScheme in project OpenSearch by opensearch-project.

the class DiscoveryDisruptionIT method testElectMasterWithLatestVersion.

public void testElectMasterWithLatestVersion() throws Exception {
    final Set<String> nodes = new HashSet<>(internalCluster().startNodes(3));
    ensureStableCluster(3);
    ServiceDisruptionScheme isolateAllNodes = new NetworkDisruption(new NetworkDisruption.IsolateAllNodes(nodes), NetworkDisruption.DISCONNECT);
    internalCluster().setDisruptionScheme(isolateAllNodes);
    logger.info("--> forcing a complete election to make sure \"preferred\" master is elected");
    isolateAllNodes.startDisrupting();
    for (String node : nodes) {
        assertNoMaster(node);
    }
    internalCluster().clearDisruptionScheme();
    ensureStableCluster(3);
    final String preferredMasterName = internalCluster().getMasterName();
    final DiscoveryNode preferredMaster = internalCluster().clusterService(preferredMasterName).localNode();
    logger.info("--> preferred master is {}", preferredMaster);
    final Set<String> nonPreferredNodes = new HashSet<>(nodes);
    nonPreferredNodes.remove(preferredMasterName);
    final ServiceDisruptionScheme isolatePreferredMaster = isolateMasterDisruption(NetworkDisruption.DISCONNECT);
    internalCluster().setDisruptionScheme(isolatePreferredMaster);
    isolatePreferredMaster.startDisrupting();
    client(randomFrom(nonPreferredNodes)).admin().indices().prepareCreate("test").setSettings(Settings.builder().put(INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1).put(INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0)).get();
    internalCluster().clearDisruptionScheme(false);
    internalCluster().setDisruptionScheme(isolateAllNodes);
    logger.info("--> forcing a complete election again");
    isolateAllNodes.startDisrupting();
    for (String node : nodes) {
        assertNoMaster(node);
    }
    isolateAllNodes.stopDisrupting();
    final ClusterState state = client().admin().cluster().prepareState().get().getState();
    if (state.metadata().hasIndex("test") == false) {
        fail("index 'test' was lost. current cluster state: " + state);
    }
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) ServiceDisruptionScheme(org.opensearch.test.disruption.ServiceDisruptionScheme) NetworkDisruption(org.opensearch.test.disruption.NetworkDisruption) HashSet(java.util.HashSet)

Aggregations

ServiceDisruptionScheme (org.opensearch.test.disruption.ServiceDisruptionScheme)6 ArrayList (java.util.ArrayList)2 List (java.util.List)2 TimeUnit (java.util.concurrent.TimeUnit)2 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 Collectors (java.util.stream.Collectors)2 IntStream (java.util.stream.IntStream)2 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)2 Matchers.equalTo (org.hamcrest.Matchers.equalTo)2 IndexResponse (org.opensearch.action.index.IndexResponse)2 ClusterState (org.opensearch.cluster.ClusterState)2 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)2 NetworkDisruption (org.opensearch.test.disruption.NetworkDisruption)2 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 Path (java.nio.file.Path)1 Arrays (java.util.Arrays)1 Base64 (java.util.Base64)1 Collections (java.util.Collections)1