Search in sources :

Example 26 with NetworkDisruption

use of org.elasticsearch.test.disruption.NetworkDisruption in project crate by crate.

the class MasterDisruptionIT method testIsolateMasterAndVerifyClusterStateConsensus.

/**
 * This test isolates the master from rest of the cluster, waits for a new master to be elected, restores the partition
 * and verifies that all node agree on the new cluster state
 */
@TestLogging("_root:DEBUG," + "org.elasticsearch.cluster.service:TRACE," + "org.elasticsearch.gateway:TRACE," + "org.elasticsearch.indices.store:TRACE")
@Test
public void testIsolateMasterAndVerifyClusterStateConsensus() throws Exception {
    final List<String> nodes = startCluster(3);
    int numberOfShards = 1 + randomInt(2);
    int numberOfReplicas = randomInt(2);
    logger.info("creating table t with {} shards and {} replicas", numberOfShards, numberOfReplicas);
    execute("create table t (id int primary key, x string) clustered into " + numberOfShards + " shards with " + "(number_of_replicas = " + numberOfReplicas + " )");
    ensureGreen();
    String isolatedNode = internalCluster().getMasterName();
    TwoPartitions partitions = isolateNode(isolatedNode);
    NetworkDisruption networkDisruption = addRandomDisruptionType(partitions);
    networkDisruption.startDisrupting();
    String nonIsolatedNode = partitions.getMajoritySide().iterator().next();
    // make sure cluster reforms
    ensureStableCluster(2, nonIsolatedNode);
    // make sure isolated need picks up on things.
    assertNoMaster(isolatedNode, TimeValue.timeValueSeconds(40));
    // restore isolation
    networkDisruption.stopDisrupting();
    for (String node : nodes) {
        ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + networkDisruption.expectedTimeToHeal().millis()), true, node);
    }
    logger.info("issue a reroute");
    // trigger a reroute now, instead of waiting for the background reroute of RerouteService
    execute("ALTER CLUSTER REROUTE RETRY FAILED");
    // and wait for it to finish and for the cluster to stabilize
    ensureGreen();
    // verify all cluster states are the same
    // use assert busy to wait for cluster states to be applied (as publish_timeout has low value)
    assertBusy(() -> {
        ClusterState state = null;
        for (String node : nodes) {
            ClusterState nodeState = getNodeClusterState(node);
            if (state == null) {
                state = nodeState;
                continue;
            }
            // assert nodes are identical
            try {
                assertEquals("unequal versions", state.version(), nodeState.version());
                assertEquals("unequal node count", state.nodes().getSize(), nodeState.nodes().getSize());
                assertEquals("different masters ", state.nodes().getMasterNodeId(), nodeState.nodes().getMasterNodeId());
                assertEquals("different meta data version", state.metadata().version(), nodeState.metadata().version());
                assertEquals("different routing", state.routingTable().toString(), nodeState.routingTable().toString());
            } catch (AssertionError t) {
                fail("failed comparing cluster state: " + t.getMessage() + "\n" + "--- cluster state of node [" + nodes.get(0) + "]: ---\n" + state + "\n--- cluster state [" + node + "]: ---\n" + nodeState);
            }
        }
    });
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) TimeValue(io.crate.common.unit.TimeValue) TestLogging(org.elasticsearch.test.junit.annotations.TestLogging) Test(org.junit.Test)

Example 27 with NetworkDisruption

use of org.elasticsearch.test.disruption.NetworkDisruption in project crate by crate.

the class SnapshotShardsServiceIT method testRetryPostingSnapshotStatusMessages.

public void testRetryPostingSnapshotStatusMessages() throws Exception {
    String masterNode = internalCluster().startMasterOnlyNode();
    String dataNode = internalCluster().startDataOnlyNode();
    logger.info("-->  creating repository");
    assertAcked(client().admin().cluster().preparePutRepository("repo").setType("mock").setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", randomBoolean()).put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
    final int shards = between(1, 10);
    execute("create table doc.test(x integer) clustered into ? shards with (number_of_replicas=0)", new Object[] { shards });
    ensureGreen();
    final int numDocs = scaledRandomIntBetween(50, 100);
    for (int i = 0; i < numDocs; i++) {
        execute("insert into doc.test values(?)", new Object[] { i });
    }
    logger.info("--> blocking repository");
    String blockedNode = blockNodeWithIndex("repo", "test");
    dataNodeClient().admin().cluster().prepareCreateSnapshot("repo", "snapshot").setWaitForCompletion(false).setIndices("test").get();
    waitForBlock(blockedNode, "repo", TimeValue.timeValueSeconds(60));
    final SnapshotId snapshotId = client().admin().cluster().prepareGetSnapshots("repo").setSnapshots("snapshot").get().getSnapshots().get(0).snapshotId();
    logger.info("--> start disrupting cluster");
    final NetworkDisruption networkDisruption = new NetworkDisruption(new NetworkDisruption.TwoPartitions(masterNode, dataNode), NetworkDisruption.NetworkDelay.random(random()));
    internalCluster().setDisruptionScheme(networkDisruption);
    networkDisruption.startDisrupting();
    logger.info("--> unblocking repository");
    unblockNode("repo", blockedNode);
    // Retrieve snapshot status from the data node.
    SnapshotShardsService snapshotShardsService = internalCluster().getInstance(SnapshotShardsService.class, blockedNode);
    assertBusy(() -> {
        final Snapshot snapshot = new Snapshot("repo", snapshotId);
        List<IndexShardSnapshotStatus.Stage> stages = snapshotShardsService.currentSnapshotShards(snapshot).values().stream().map(status -> status.asCopy().getStage()).collect(Collectors.toList());
        assertThat(stages, hasSize(shards));
        assertThat(stages, everyItem(equalTo(IndexShardSnapshotStatus.Stage.DONE)));
    }, 30L, TimeUnit.SECONDS);
    logger.info("--> stop disrupting cluster");
    networkDisruption.stopDisrupting();
    internalCluster().clearDisruptionScheme(true);
    assertBusy(() -> {
        execute("select state, array_length(failures,0) from sys.snapshots where name='snapshot'");
        assertThat(response.rowCount(), is(1L));
        assertThat(response.rows()[0][0], is("SUCCESS"));
        assertThat(response.rows()[0][1], is(nullValue()));
    }, 30L, TimeUnit.SECONDS);
}
Also used : ByteSizeUnit(org.elasticsearch.common.unit.ByteSizeUnit) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) Collection(java.util.Collection) IndexShardSnapshotStatus(org.elasticsearch.index.snapshots.IndexShardSnapshotStatus) Plugin(org.elasticsearch.plugins.Plugin) MockRepository(org.elasticsearch.snapshots.mockstore.MockRepository) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Settings(org.elasticsearch.common.settings.Settings) Matchers.everyItem(org.hamcrest.Matchers.everyItem) ESIntegTestCase(org.elasticsearch.test.ESIntegTestCase) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Matchers.nullValue(org.hamcrest.Matchers.nullValue) TimeValue(io.crate.common.unit.TimeValue) Matchers.hasSize(org.hamcrest.Matchers.hasSize) Matchers.is(org.hamcrest.Matchers.is) MockTransportService(org.elasticsearch.test.transport.MockTransportService) ElasticsearchAssertions.assertAcked(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption)

Aggregations

NetworkDisruption (org.elasticsearch.test.disruption.NetworkDisruption)27 TwoPartitions (org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions)17 NetworkDisconnect (org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect)14 ClusterState (org.elasticsearch.cluster.ClusterState)12 HashSet (java.util.HashSet)8 Test (org.junit.Test)8 ServiceDisruptionScheme (org.elasticsearch.test.disruption.ServiceDisruptionScheme)7 Settings (org.elasticsearch.common.settings.Settings)6 NetworkLinkDisruptionType (org.elasticsearch.test.disruption.NetworkDisruption.NetworkLinkDisruptionType)6 TestLogging (org.elasticsearch.test.junit.annotations.TestLogging)6 ArrayList (java.util.ArrayList)5 TimeValue (io.crate.common.unit.TimeValue)4 CountDownLatch (java.util.concurrent.CountDownLatch)4 AtomicReference (java.util.concurrent.atomic.AtomicReference)4 NetworkUnresponsive (org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive)4 Collection (java.util.Collection)3 List (java.util.List)3 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)3 Semaphore (java.util.concurrent.Semaphore)3 TimeUnit (java.util.concurrent.TimeUnit)3