Search in sources :

Example 1 with NetworkDisconnect

use of org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method testElectMasterWithLatestVersion.

public void testElectMasterWithLatestVersion() throws Exception {
    configureCluster(3, null, 2);
    final Set<String> nodes = new HashSet<>(internalCluster().startNodes(3));
    ensureStableCluster(3);
    ServiceDisruptionScheme isolateAllNodes = new NetworkDisruption(new NetworkDisruption.IsolateAllNodes(nodes), new NetworkDisconnect());
    internalCluster().setDisruptionScheme(isolateAllNodes);
    logger.info("--> forcing a complete election to make sure \"preferred\" master is elected");
    isolateAllNodes.startDisrupting();
    for (String node : nodes) {
        assertNoMaster(node);
    }
    internalCluster().clearDisruptionScheme();
    ensureStableCluster(3);
    final String preferredMasterName = internalCluster().getMasterName();
    final DiscoveryNode preferredMaster = internalCluster().clusterService(preferredMasterName).localNode();
    for (String node : nodes) {
        DiscoveryNode discoveryNode = internalCluster().clusterService(node).localNode();
        assertThat(discoveryNode.getId(), greaterThanOrEqualTo(preferredMaster.getId()));
    }
    logger.info("--> preferred master is {}", preferredMaster);
    final Set<String> nonPreferredNodes = new HashSet<>(nodes);
    nonPreferredNodes.remove(preferredMasterName);
    final ServiceDisruptionScheme isolatePreferredMaster = new NetworkDisruption(new NetworkDisruption.TwoPartitions(Collections.singleton(preferredMasterName), nonPreferredNodes), new NetworkDisconnect());
    internalCluster().setDisruptionScheme(isolatePreferredMaster);
    isolatePreferredMaster.startDisrupting();
    assertAcked(client(randomFrom(nonPreferredNodes)).admin().indices().prepareCreate("test").setSettings(INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1, INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 0));
    internalCluster().clearDisruptionScheme(false);
    internalCluster().setDisruptionScheme(isolateAllNodes);
    logger.info("--> forcing a complete election again");
    isolateAllNodes.startDisrupting();
    for (String node : nodes) {
        assertNoMaster(node);
    }
    isolateAllNodes.stopDisrupting();
    final ClusterState state = client().admin().cluster().prepareState().get().getState();
    if (state.metaData().hasIndex("test") == false) {
        fail("index 'test' was lost. current cluster state: " + state);
    }
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) ServiceDisruptionScheme(org.elasticsearch.test.disruption.ServiceDisruptionScheme) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) NetworkDisconnect(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect) HashSet(java.util.HashSet)

Example 2 with NetworkDisconnect

use of org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method testUnicastSinglePingResponseContainsMaster.

/**
     * A 4 node cluster with m_m_n set to 3 and each node has one unicast endpoint. One node partitions from the master node.
     * The temporal unicast responses is empty. When partition is solved the one ping response contains a master node.
     * The rejoining node should take this master node and connect.
     */
public void testUnicastSinglePingResponseContainsMaster() throws Exception {
    List<String> nodes = startCluster(4, -1, new int[] { 0 });
    // Figure out what is the elected master node
    final String masterNode = internalCluster().getMasterName();
    logger.info("---> legit elected master node={}", masterNode);
    List<String> otherNodes = new ArrayList<>(nodes);
    otherNodes.remove(masterNode);
    // <-- Don't isolate the node that is in the unicast endpoint for all the other nodes.
    otherNodes.remove(nodes.get(0));
    final String isolatedNode = otherNodes.get(0);
    // Forcefully clean temporal response lists on all nodes. Otherwise the node in the unicast host list
    // includes all the other nodes that have pinged it and the issue doesn't manifest
    ZenPing zenPing = ((TestZenDiscovery) internalCluster().getInstance(Discovery.class)).getZenPing();
    if (zenPing instanceof UnicastZenPing) {
        ((UnicastZenPing) zenPing).clearTemporalResponses();
    }
    // Simulate a network issue between the unlucky node and elected master node in both directions.
    NetworkDisruption networkDisconnect = new NetworkDisruption(new TwoPartitions(masterNode, isolatedNode), new NetworkDisconnect());
    setDisruptionScheme(networkDisconnect);
    networkDisconnect.startDisrupting();
    // Wait until elected master has removed that the unlucky node...
    ensureStableCluster(3, masterNode);
    // The isolate master node must report no master, so it starts with pinging
    assertNoMaster(isolatedNode);
    networkDisconnect.stopDisrupting();
    // Wait until the master node sees all 4 nodes again.
    ensureStableCluster(4);
    // The elected master shouldn't have changed, since the isolated node never could have elected himself as
    // master since m_m_n of 3 could never be satisfied.
    assertMaster(masterNode, nodes);
}
Also used : UnicastZenPing(org.elasticsearch.discovery.zen.UnicastZenPing) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) ArrayList(java.util.ArrayList) ZenDiscovery(org.elasticsearch.discovery.zen.ZenDiscovery) TestZenDiscovery(org.elasticsearch.test.discovery.TestZenDiscovery) ZenPing(org.elasticsearch.discovery.zen.ZenPing) UnicastZenPing(org.elasticsearch.discovery.zen.UnicastZenPing) TestZenDiscovery(org.elasticsearch.test.discovery.TestZenDiscovery) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) NetworkDisconnect(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect)

Example 3 with NetworkDisconnect

use of org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method testFailWithMinimumMasterNodesConfigured.

/**
     * Test that no split brain occurs under partial network partition. See https://github.com/elastic/elasticsearch/issues/2488
     */
public void testFailWithMinimumMasterNodesConfigured() throws Exception {
    List<String> nodes = startCluster(3);
    // Figure out what is the elected master node
    final String masterNode = internalCluster().getMasterName();
    logger.info("---> legit elected master node={}", masterNode);
    // Pick a node that isn't the elected master.
    Set<String> nonMasters = new HashSet<>(nodes);
    nonMasters.remove(masterNode);
    final String unluckyNode = randomFrom(nonMasters.toArray(Strings.EMPTY_ARRAY));
    // Simulate a network issue between the unlucky node and elected master node in both directions.
    NetworkDisruption networkDisconnect = new NetworkDisruption(new TwoPartitions(masterNode, unluckyNode), new NetworkDisconnect());
    setDisruptionScheme(networkDisconnect);
    networkDisconnect.startDisrupting();
    // Wait until elected master has removed that the unlucky node...
    ensureStableCluster(2, masterNode);
    // The unlucky node must report *no* master node, since it can't connect to master and in fact it should
    // continuously ping until network failures have been resolved. However
    // It may a take a bit before the node detects it has been cut off from the elected master
    assertNoMaster(unluckyNode);
    networkDisconnect.stopDisrupting();
    // Wait until the master node sees all 3 nodes again.
    ensureStableCluster(3);
    // The elected master shouldn't have changed, since the unlucky node never could have elected himself as
    // master since m_m_n of 2 could never be satisfied.
    assertMaster(masterNode, nodes);
}
Also used : TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) NetworkDisconnect(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect) HashSet(java.util.HashSet)

Example 4 with NetworkDisconnect

use of org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect in project elasticsearch by elastic.

the class PrimaryAllocationIT method createStaleReplicaScenario.

private void createStaleReplicaScenario() throws Exception {
    logger.info("--> starting 3 nodes, 1 master, 2 data");
    String master = internalCluster().startMasterOnlyNode(Settings.EMPTY);
    internalCluster().startDataOnlyNodes(2);
    assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 1)).get());
    ensureGreen();
    logger.info("--> indexing...");
    client().prepareIndex("test", "type1").setSource(jsonBuilder().startObject().field("field", "value1").endObject()).get();
    refresh();
    ClusterState state = client().admin().cluster().prepareState().all().get().getState();
    List<ShardRouting> shards = state.routingTable().allShards("test");
    assertThat(shards.size(), equalTo(2));
    final String primaryNode;
    final String replicaNode;
    if (shards.get(0).primary()) {
        primaryNode = state.getRoutingNodes().node(shards.get(0).currentNodeId()).node().getName();
        replicaNode = state.getRoutingNodes().node(shards.get(1).currentNodeId()).node().getName();
    } else {
        primaryNode = state.getRoutingNodes().node(shards.get(1).currentNodeId()).node().getName();
        replicaNode = state.getRoutingNodes().node(shards.get(0).currentNodeId()).node().getName();
    }
    NetworkDisruption partition = new NetworkDisruption(new TwoPartitions(Sets.newHashSet(master, replicaNode), Collections.singleton(primaryNode)), new NetworkDisconnect());
    internalCluster().setDisruptionScheme(partition);
    logger.info("--> partitioning node with primary shard from rest of cluster");
    partition.startDisrupting();
    ensureStableCluster(2, master);
    logger.info("--> index a document into previous replica shard (that is now primary)");
    client(replicaNode).prepareIndex("test", "type1").setSource(jsonBuilder().startObject().field("field", "value1").endObject()).get();
    logger.info("--> shut down node that has new acknowledged document");
    internalCluster().stopRandomNode(InternalTestCluster.nameFilter(replicaNode));
    ensureStableCluster(1, master);
    partition.stopDisrupting();
    logger.info("--> waiting for node with old primary shard to rejoin the cluster");
    ensureStableCluster(2, master);
    logger.info("--> check that old primary shard does not get promoted to primary again");
    // kick reroute and wait for all shard states to be fetched
    client(master).admin().cluster().prepareReroute().get();
    assertBusy(() -> assertThat(internalCluster().getInstance(GatewayAllocator.class, master).getNumberOfInFlightFetch(), equalTo(0)));
    // kick reroute a second time and check that all shards are unassigned
    assertThat(client(master).admin().cluster().prepareReroute().get().getState().getRoutingNodes().unassigned().size(), equalTo(2));
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) GatewayAllocator(org.elasticsearch.gateway.GatewayAllocator) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) NetworkDisconnect(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect)

Example 5 with NetworkDisconnect

use of org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect in project elasticsearch by elastic.

the class IndexingMasterFailoverIT method testMasterFailoverDuringIndexingWithMappingChanges.

/**
     * Indexing operations which entail mapping changes require a blocking request to the master node to update the mapping.
     * If the master node is being disrupted or if it cannot commit cluster state changes, it needs to retry within timeout limits.
     * This retry logic is implemented in TransportMasterNodeAction and tested by the following master failover scenario.
     */
public void testMasterFailoverDuringIndexingWithMappingChanges() throws Throwable {
    logger.info("--> start 4 nodes, 3 master, 1 data");
    final Settings sharedSettings = Settings.builder().put(FaultDetection.PING_TIMEOUT_SETTING.getKey(), // for hitting simulated network failures quickly
    "1s").put(FaultDetection.PING_RETRIES_SETTING.getKey(), // for hitting simulated network failures quickly
    "1").put("discovery.zen.join_timeout", // still long to induce failures but to long so test won't time out
    "10s").put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), // <-- for hitting simulated network failures quickly
    "1s").put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), 2).build();
    internalCluster().startMasterOnlyNodes(3, sharedSettings);
    String dataNode = internalCluster().startDataOnlyNode(sharedSettings);
    logger.info("--> wait for all nodes to join the cluster");
    ensureStableCluster(4);
    // We index data with mapping changes into cluster and have master failover at same time
    client().admin().indices().prepareCreate("myindex").setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0)).get();
    ensureGreen("myindex");
    final CyclicBarrier barrier = new CyclicBarrier(2);
    Thread indexingThread = new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                barrier.await();
            } catch (InterruptedException e) {
                logger.warn("Barrier interrupted", e);
                return;
            } catch (BrokenBarrierException e) {
                logger.warn("Broken barrier", e);
                return;
            }
            for (int i = 0; i < 10; i++) {
                // index data with mapping changes
                IndexResponse response = client(dataNode).prepareIndex("myindex", "mytype").setSource("field_" + i, "val").get();
                assertEquals(DocWriteResponse.Result.CREATED, response.getResult());
            }
        }
    });
    indexingThread.setName("indexingThread");
    indexingThread.start();
    barrier.await();
    // interrupt communication between master and other nodes in cluster
    String master = internalCluster().getMasterName();
    Set<String> otherNodes = new HashSet<>(Arrays.asList(internalCluster().getNodeNames()));
    otherNodes.remove(master);
    NetworkDisruption partition = new NetworkDisruption(new TwoPartitions(Collections.singleton(master), otherNodes), new NetworkDisconnect());
    internalCluster().setDisruptionScheme(partition);
    logger.info("--> disrupting network");
    partition.startDisrupting();
    logger.info("--> waiting for new master to be elected");
    ensureStableCluster(3, dataNode);
    partition.stopDisrupting();
    logger.info("--> waiting to heal");
    ensureStableCluster(4);
    indexingThread.join();
    ensureGreen("myindex");
    refresh();
    assertThat(client().prepareSearch("myindex").get().getHits().getTotalHits(), equalTo(10L));
}
Also used : BrokenBarrierException(java.util.concurrent.BrokenBarrierException) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) NetworkDisconnect(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect) CyclicBarrier(java.util.concurrent.CyclicBarrier) IndexResponse(org.elasticsearch.action.index.IndexResponse) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) DiscoverySettings(org.elasticsearch.discovery.DiscoverySettings) Settings(org.elasticsearch.common.settings.Settings) HashSet(java.util.HashSet)

Aggregations

NetworkDisruption (org.elasticsearch.test.disruption.NetworkDisruption)9 NetworkDisconnect (org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect)9 TwoPartitions (org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions)7 HashSet (java.util.HashSet)5 ClusterState (org.elasticsearch.cluster.ClusterState)3 UnicastZenPing (org.elasticsearch.discovery.zen.UnicastZenPing)3 ZenDiscovery (org.elasticsearch.discovery.zen.ZenDiscovery)3 ZenPing (org.elasticsearch.discovery.zen.ZenPing)3 TestZenDiscovery (org.elasticsearch.test.discovery.TestZenDiscovery)3 NetworkLinkDisruptionType (org.elasticsearch.test.disruption.NetworkDisruption.NetworkLinkDisruptionType)3 NetworkUnresponsive (org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive)3 ServiceDisruptionScheme (org.elasticsearch.test.disruption.ServiceDisruptionScheme)3 ArrayList (java.util.ArrayList)2 IndexResponse (org.elasticsearch.action.index.IndexResponse)2 Settings (org.elasticsearch.common.settings.Settings)2 DisruptedLinks (org.elasticsearch.test.disruption.NetworkDisruption.DisruptedLinks)2 SlowClusterStateProcessing (org.elasticsearch.test.disruption.SlowClusterStateProcessing)2 IOException (java.io.IOException)1 Arrays (java.util.Arrays)1 Collection (java.util.Collection)1