Search in sources :

Example 11 with TwoPartitions

use of org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method testIsolateMasterAndVerifyClusterStateConsensus.

/**
     * This test isolates the master from rest of the cluster, waits for a new master to be elected, restores the partition
     * and verifies that all node agree on the new cluster state
     */
@TestLogging("_root:DEBUG,org.elasticsearch.cluster.service:TRACE,org.elasticsearch.gateway:TRACE,org.elasticsearch.indices.store:TRACE")
public void testIsolateMasterAndVerifyClusterStateConsensus() throws Exception {
    final List<String> nodes = startCluster(3);
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2)).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(2))));
    ensureGreen();
    String isolatedNode = internalCluster().getMasterName();
    TwoPartitions partitions = isolateNode(isolatedNode);
    NetworkDisruption networkDisruption = addRandomDisruptionType(partitions);
    networkDisruption.startDisrupting();
    String nonIsolatedNode = partitions.getMajoritySide().iterator().next();
    // make sure cluster reforms
    ensureStableCluster(2, nonIsolatedNode);
    // make sure isolated need picks up on things.
    assertNoMaster(isolatedNode, TimeValue.timeValueSeconds(40));
    // restore isolation
    networkDisruption.stopDisrupting();
    for (String node : nodes) {
        ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + networkDisruption.expectedTimeToHeal().millis()), true, node);
    }
    logger.info("issue a reroute");
    // trigger a reroute now, instead of waiting for the background reroute of RerouteService
    assertAcked(client().admin().cluster().prepareReroute());
    // and wait for it to finish and for the cluster to stabilize
    ensureGreen("test");
    // verify all cluster states are the same
    ClusterState state = null;
    for (String node : nodes) {
        ClusterState nodeState = getNodeClusterState(node);
        if (state == null) {
            state = nodeState;
            continue;
        }
        // assert nodes are identical
        try {
            assertEquals("unequal versions", state.version(), nodeState.version());
            assertEquals("unequal node count", state.nodes().getSize(), nodeState.nodes().getSize());
            assertEquals("different masters ", state.nodes().getMasterNodeId(), nodeState.nodes().getMasterNodeId());
            assertEquals("different meta data version", state.metaData().version(), nodeState.metaData().version());
            if (!state.routingTable().toString().equals(nodeState.routingTable().toString())) {
                fail("different routing");
            }
        } catch (AssertionError t) {
            fail("failed comparing cluster state: " + t.getMessage() + "\n" + "--- cluster state of node [" + nodes.get(0) + "]: ---\n" + state + "\n--- cluster state [" + node + "]: ---\n" + nodeState);
        }
    }
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) TimeValue(org.elasticsearch.common.unit.TimeValue) TestLogging(org.elasticsearch.test.junit.annotations.TestLogging)

Example 12 with TwoPartitions

use of org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method testVerifyApiBlocksDuringPartition.

/**
     * Verify that the proper block is applied when nodes loose their master
     */
public void testVerifyApiBlocksDuringPartition() throws Exception {
    startCluster(3);
    // Makes sure that the get request can be executed on each node locally:
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)));
    // Everything is stable now, it is now time to simulate evil...
    // but first make sure we have no initializing shards and all is green
    // (waiting for green here, because indexing / search in a yellow index is fine as long as no other nodes go down)
    ensureGreen("test");
    TwoPartitions partitions = TwoPartitions.random(random(), internalCluster().getNodeNames());
    NetworkDisruption networkDisruption = addRandomDisruptionType(partitions);
    assertEquals(1, partitions.getMinoritySide().size());
    final String isolatedNode = partitions.getMinoritySide().iterator().next();
    assertEquals(2, partitions.getMajoritySide().size());
    final String nonIsolatedNode = partitions.getMajoritySide().iterator().next();
    // Simulate a network issue between the unlucky node and the rest of the cluster.
    networkDisruption.startDisrupting();
    // The unlucky node must report *no* master node, since it can't connect to master and in fact it should
    // continuously ping until network failures have been resolved. However
    // It may a take a bit before the node detects it has been cut off from the elected master
    logger.info("waiting for isolated node [{}] to have no master", isolatedNode);
    assertNoMaster(isolatedNode, DiscoverySettings.NO_MASTER_BLOCK_WRITES, TimeValue.timeValueSeconds(10));
    logger.info("wait until elected master has been removed and a new 2 node cluster was from (via [{}])", isolatedNode);
    ensureStableCluster(2, nonIsolatedNode);
    for (String node : partitions.getMajoritySide()) {
        ClusterState nodeState = getNodeClusterState(node);
        boolean success = true;
        if (nodeState.nodes().getMasterNode() == null) {
            success = false;
        }
        if (!nodeState.blocks().global().isEmpty()) {
            success = false;
        }
        if (!success) {
            fail("node [" + node + "] has no master or has blocks, despite of being on the right side of the partition. State dump:\n" + nodeState);
        }
    }
    networkDisruption.stopDisrupting();
    // Wait until the master node sees al 3 nodes again.
    ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + networkDisruption.expectedTimeToHeal().millis()));
    logger.info("Verify no master block with {} set to {}", DiscoverySettings.NO_MASTER_BLOCK_SETTING.getKey(), "all");
    client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(DiscoverySettings.NO_MASTER_BLOCK_SETTING.getKey(), "all")).get();
    networkDisruption.startDisrupting();
    // The unlucky node must report *no* master node, since it can't connect to master and in fact it should
    // continuously ping until network failures have been resolved. However
    // It may a take a bit before the node detects it has been cut off from the elected master
    logger.info("waiting for isolated node [{}] to have no master", isolatedNode);
    assertNoMaster(isolatedNode, DiscoverySettings.NO_MASTER_BLOCK_ALL, TimeValue.timeValueSeconds(10));
    // make sure we have stable cluster & cross partition recoveries are canceled by the removal of the missing node
    // the unresponsive partition causes recoveries to only time out after 15m (default) and these will cause
    // the test to fail due to unfreed resources
    ensureStableCluster(2, nonIsolatedNode);
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) TimeValue(org.elasticsearch.common.unit.TimeValue)

Example 13 with TwoPartitions

use of org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method testIndicesDeleted.

/**
     * Tests that indices are properly deleted even if there is a master transition in between.
     * Test for https://github.com/elastic/elasticsearch/issues/11665
     */
public void testIndicesDeleted() throws Exception {
    final Settings settings = Settings.builder().put(DEFAULT_SETTINGS).put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), // don't wait on isolated data node
    "0s").put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), // wait till cluster state is committed
    "30s").build();
    final String idxName = "test";
    configureCluster(settings, 3, null, 2);
    final List<String> allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(2);
    final String dataNode = internalCluster().startDataOnlyNode();
    ensureStableCluster(3);
    assertAcked(prepareCreate("test"));
    final String masterNode1 = internalCluster().getMasterName();
    NetworkDisruption networkDisruption = new NetworkDisruption(new TwoPartitions(masterNode1, dataNode), new NetworkUnresponsive());
    internalCluster().setDisruptionScheme(networkDisruption);
    networkDisruption.startDisrupting();
    // We know this will time out due to the partition, we check manually below to not proceed until
    // the delete has been applied to the master node and the master eligible node.
    internalCluster().client(masterNode1).admin().indices().prepareDelete(idxName).setTimeout("0s").get();
    // Don't restart the master node until we know the index deletion has taken effect on master and the master eligible node.
    assertBusy(() -> {
        for (String masterNode : allMasterEligibleNodes) {
            final ClusterState masterState = internalCluster().clusterService(masterNode).state();
            assertTrue("index not deleted on " + masterNode, masterState.metaData().hasIndex(idxName) == false);
        }
    });
    internalCluster().restartNode(masterNode1, InternalTestCluster.EMPTY_CALLBACK);
    ensureYellow();
    assertFalse(client().admin().indices().prepareExists(idxName).get().isExists());
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) NetworkUnresponsive(org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings)

Example 14 with TwoPartitions

use of org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions in project elasticsearch by elastic.

the class NetworkDisruptionIT method testNetworkPartitionWithNodeShutdown.

public void testNetworkPartitionWithNodeShutdown() throws IOException {
    internalCluster().ensureAtLeastNumDataNodes(2);
    String[] nodeNames = internalCluster().getNodeNames();
    NetworkDisruption networkDisruption = new NetworkDisruption(new TwoPartitions(nodeNames[0], nodeNames[1]), new NetworkUnresponsive());
    internalCluster().setDisruptionScheme(networkDisruption);
    networkDisruption.startDisrupting();
    internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodeNames[0]));
    internalCluster().clearDisruptionScheme();
}
Also used : TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) NetworkUnresponsive(org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive)

Example 15 with TwoPartitions

use of org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions in project elasticsearch by elastic.

the class NetworkDisruptionTests method testTwoPartitions.

public void testTwoPartitions() {
    Set<String> partition1 = generateRandomStringSet(1, 10);
    Set<String> partition2 = generateRandomStringSet(1, 10);
    TwoPartitions topology = new TwoPartitions(partition1, partition2);
    checkTwoPartitions(topology, partition1, partition2);
}
Also used : TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions)

Aggregations

TwoPartitions (org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions)16 NetworkDisruption (org.elasticsearch.test.disruption.NetworkDisruption)12 HashSet (java.util.HashSet)6 NetworkDisconnect (org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect)6 ClusterState (org.elasticsearch.cluster.ClusterState)5 Settings (org.elasticsearch.common.settings.Settings)4 IndexResponse (org.elasticsearch.action.index.IndexResponse)3 TimeValue (org.elasticsearch.common.unit.TimeValue)3 UnicastZenPing (org.elasticsearch.discovery.zen.UnicastZenPing)3 ZenDiscovery (org.elasticsearch.discovery.zen.ZenDiscovery)3 ZenPing (org.elasticsearch.discovery.zen.ZenPing)3 TestZenDiscovery (org.elasticsearch.test.discovery.TestZenDiscovery)3 NetworkUnresponsive (org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive)3 ArrayList (java.util.ArrayList)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 ExecutionException (java.util.concurrent.ExecutionException)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 IndexSettings (org.elasticsearch.index.IndexSettings)2 TestLogging (org.elasticsearch.test.junit.annotations.TestLogging)2 IOException (java.io.IOException)1