Search in sources :

Example 6 with NetworkDisruption

use of org.elasticsearch.test.disruption.NetworkDisruption in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method testRejoinDocumentExistsInAllShardCopies.

/**
     * Test that a document which is indexed on the majority side of a partition, is available from the minority side,
     * once the partition is healed
     */
public void testRejoinDocumentExistsInAllShardCopies() throws Exception {
    List<String> nodes = startCluster(3);
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)).get());
    ensureGreen("test");
    nodes = new ArrayList<>(nodes);
    Collections.shuffle(nodes, random());
    String isolatedNode = nodes.get(0);
    String notIsolatedNode = nodes.get(1);
    TwoPartitions partitions = isolateNode(isolatedNode);
    NetworkDisruption scheme = addRandomDisruptionType(partitions);
    scheme.startDisrupting();
    ensureStableCluster(2, notIsolatedNode);
    assertFalse(client(notIsolatedNode).admin().cluster().prepareHealth("test").setWaitForYellowStatus().get().isTimedOut());
    IndexResponse indexResponse = internalCluster().client(notIsolatedNode).prepareIndex("test", "type").setSource("field", "value").get();
    assertThat(indexResponse.getVersion(), equalTo(1L));
    logger.info("Verifying if document exists via node[{}]", notIsolatedNode);
    GetResponse getResponse = internalCluster().client(notIsolatedNode).prepareGet("test", "type", indexResponse.getId()).setPreference("_local").get();
    assertThat(getResponse.isExists(), is(true));
    assertThat(getResponse.getVersion(), equalTo(1L));
    assertThat(getResponse.getId(), equalTo(indexResponse.getId()));
    scheme.stopDisrupting();
    ensureStableCluster(3);
    ensureGreen("test");
    for (String node : nodes) {
        logger.info("Verifying if document exists after isolating node[{}] via node[{}]", isolatedNode, node);
        getResponse = internalCluster().client(node).prepareGet("test", "type", indexResponse.getId()).setPreference("_local").get();
        assertThat(getResponse.isExists(), is(true));
        assertThat(getResponse.getVersion(), equalTo(1L));
        assertThat(getResponse.getId(), equalTo(indexResponse.getId()));
    }
}
Also used : TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) IndexResponse(org.elasticsearch.action.index.IndexResponse) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) GetResponse(org.elasticsearch.action.get.GetResponse)

Example 7 with NetworkDisruption

use of org.elasticsearch.test.disruption.NetworkDisruption in project elasticsearch by elastic.

the class PrimaryAllocationIT method createStaleReplicaScenario.

private void createStaleReplicaScenario() throws Exception {
    logger.info("--> starting 3 nodes, 1 master, 2 data");
    String master = internalCluster().startMasterOnlyNode(Settings.EMPTY);
    internalCluster().startDataOnlyNodes(2);
    assertAcked(client().admin().indices().prepareCreate("test").setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 1)).get());
    ensureGreen();
    logger.info("--> indexing...");
    client().prepareIndex("test", "type1").setSource(jsonBuilder().startObject().field("field", "value1").endObject()).get();
    refresh();
    ClusterState state = client().admin().cluster().prepareState().all().get().getState();
    List<ShardRouting> shards = state.routingTable().allShards("test");
    assertThat(shards.size(), equalTo(2));
    final String primaryNode;
    final String replicaNode;
    if (shards.get(0).primary()) {
        primaryNode = state.getRoutingNodes().node(shards.get(0).currentNodeId()).node().getName();
        replicaNode = state.getRoutingNodes().node(shards.get(1).currentNodeId()).node().getName();
    } else {
        primaryNode = state.getRoutingNodes().node(shards.get(1).currentNodeId()).node().getName();
        replicaNode = state.getRoutingNodes().node(shards.get(0).currentNodeId()).node().getName();
    }
    NetworkDisruption partition = new NetworkDisruption(new TwoPartitions(Sets.newHashSet(master, replicaNode), Collections.singleton(primaryNode)), new NetworkDisconnect());
    internalCluster().setDisruptionScheme(partition);
    logger.info("--> partitioning node with primary shard from rest of cluster");
    partition.startDisrupting();
    ensureStableCluster(2, master);
    logger.info("--> index a document into previous replica shard (that is now primary)");
    client(replicaNode).prepareIndex("test", "type1").setSource(jsonBuilder().startObject().field("field", "value1").endObject()).get();
    logger.info("--> shut down node that has new acknowledged document");
    internalCluster().stopRandomNode(InternalTestCluster.nameFilter(replicaNode));
    ensureStableCluster(1, master);
    partition.stopDisrupting();
    logger.info("--> waiting for node with old primary shard to rejoin the cluster");
    ensureStableCluster(2, master);
    logger.info("--> check that old primary shard does not get promoted to primary again");
    // kick reroute and wait for all shard states to be fetched
    client(master).admin().cluster().prepareReroute().get();
    assertBusy(() -> assertThat(internalCluster().getInstance(GatewayAllocator.class, master).getNumberOfInFlightFetch(), equalTo(0)));
    // kick reroute a second time and check that all shards are unassigned
    assertThat(client(master).admin().cluster().prepareReroute().get().getState().getRoutingNodes().unassigned().size(), equalTo(2));
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) GatewayAllocator(org.elasticsearch.gateway.GatewayAllocator) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) NetworkDisconnect(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect)

Example 8 with NetworkDisruption

use of org.elasticsearch.test.disruption.NetworkDisruption in project elasticsearch by elastic.

the class IndexingMasterFailoverIT method testMasterFailoverDuringIndexingWithMappingChanges.

/**
     * Indexing operations which entail mapping changes require a blocking request to the master node to update the mapping.
     * If the master node is being disrupted or if it cannot commit cluster state changes, it needs to retry within timeout limits.
     * This retry logic is implemented in TransportMasterNodeAction and tested by the following master failover scenario.
     */
public void testMasterFailoverDuringIndexingWithMappingChanges() throws Throwable {
    logger.info("--> start 4 nodes, 3 master, 1 data");
    final Settings sharedSettings = Settings.builder().put(FaultDetection.PING_TIMEOUT_SETTING.getKey(), // for hitting simulated network failures quickly
    "1s").put(FaultDetection.PING_RETRIES_SETTING.getKey(), // for hitting simulated network failures quickly
    "1").put("discovery.zen.join_timeout", // still long to induce failures but to long so test won't time out
    "10s").put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), // <-- for hitting simulated network failures quickly
    "1s").put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), 2).build();
    internalCluster().startMasterOnlyNodes(3, sharedSettings);
    String dataNode = internalCluster().startDataOnlyNode(sharedSettings);
    logger.info("--> wait for all nodes to join the cluster");
    ensureStableCluster(4);
    // We index data with mapping changes into cluster and have master failover at same time
    client().admin().indices().prepareCreate("myindex").setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0)).get();
    ensureGreen("myindex");
    final CyclicBarrier barrier = new CyclicBarrier(2);
    Thread indexingThread = new Thread(new Runnable() {

        @Override
        public void run() {
            try {
                barrier.await();
            } catch (InterruptedException e) {
                logger.warn("Barrier interrupted", e);
                return;
            } catch (BrokenBarrierException e) {
                logger.warn("Broken barrier", e);
                return;
            }
            for (int i = 0; i < 10; i++) {
                // index data with mapping changes
                IndexResponse response = client(dataNode).prepareIndex("myindex", "mytype").setSource("field_" + i, "val").get();
                assertEquals(DocWriteResponse.Result.CREATED, response.getResult());
            }
        }
    });
    indexingThread.setName("indexingThread");
    indexingThread.start();
    barrier.await();
    // interrupt communication between master and other nodes in cluster
    String master = internalCluster().getMasterName();
    Set<String> otherNodes = new HashSet<>(Arrays.asList(internalCluster().getNodeNames()));
    otherNodes.remove(master);
    NetworkDisruption partition = new NetworkDisruption(new TwoPartitions(Collections.singleton(master), otherNodes), new NetworkDisconnect());
    internalCluster().setDisruptionScheme(partition);
    logger.info("--> disrupting network");
    partition.startDisrupting();
    logger.info("--> waiting for new master to be elected");
    ensureStableCluster(3, dataNode);
    partition.stopDisrupting();
    logger.info("--> waiting to heal");
    ensureStableCluster(4);
    indexingThread.join();
    ensureGreen("myindex");
    refresh();
    assertThat(client().prepareSearch("myindex").get().getHits().getTotalHits(), equalTo(10L));
}
Also used : BrokenBarrierException(java.util.concurrent.BrokenBarrierException) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) NetworkDisconnect(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect) CyclicBarrier(java.util.concurrent.CyclicBarrier) IndexResponse(org.elasticsearch.action.index.IndexResponse) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) DiscoverySettings(org.elasticsearch.discovery.DiscoverySettings) Settings(org.elasticsearch.common.settings.Settings) HashSet(java.util.HashSet)

Example 9 with NetworkDisruption

use of org.elasticsearch.test.disruption.NetworkDisruption in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method testSendingShardFailure.

// simulate handling of sending shard failure during an isolation
public void testSendingShardFailure() throws Exception {
    List<String> nodes = startCluster(3, 2);
    String masterNode = internalCluster().getMasterName();
    List<String> nonMasterNodes = nodes.stream().filter(node -> !node.equals(masterNode)).collect(Collectors.toList());
    String nonMasterNode = randomFrom(nonMasterNodes);
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 3).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)));
    ensureGreen();
    String nonMasterNodeId = internalCluster().clusterService(nonMasterNode).localNode().getId();
    // fail a random shard
    ShardRouting failedShard = randomFrom(clusterService().state().getRoutingNodes().node(nonMasterNodeId).shardsWithState(ShardRoutingState.STARTED));
    ShardStateAction service = internalCluster().getInstance(ShardStateAction.class, nonMasterNode);
    CountDownLatch latch = new CountDownLatch(1);
    AtomicBoolean success = new AtomicBoolean();
    String isolatedNode = randomBoolean() ? masterNode : nonMasterNode;
    TwoPartitions partitions = isolateNode(isolatedNode);
    // we cannot use the NetworkUnresponsive disruption type here as it will swallow the "shard failed" request, calling neither
    // onSuccess nor onFailure on the provided listener.
    NetworkLinkDisruptionType disruptionType = new NetworkDisconnect();
    NetworkDisruption networkDisruption = new NetworkDisruption(partitions, disruptionType);
    setDisruptionScheme(networkDisruption);
    networkDisruption.startDisrupting();
    service.localShardFailed(failedShard, "simulated", new CorruptIndexException("simulated", (String) null), new ShardStateAction.Listener() {

        @Override
        public void onSuccess() {
            success.set(true);
            latch.countDown();
        }

        @Override
        public void onFailure(Exception e) {
            success.set(false);
            latch.countDown();
            assert false;
        }
    });
    if (isolatedNode.equals(nonMasterNode)) {
        assertNoMaster(nonMasterNode);
    } else {
        ensureStableCluster(2, nonMasterNode);
    }
    // heal the partition
    networkDisruption.removeAndEnsureHealthy(internalCluster());
    // the cluster should stabilize
    ensureStableCluster(3);
    latch.await();
    // the listener should be notified
    assertTrue(success.get());
    // the failed shard should be gone
    List<ShardRouting> shards = clusterService().state().getRoutingTable().allShards("test");
    for (ShardRouting shard : shards) {
        assertThat(shard.allocationId(), not(equalTo(failedShard.allocationId())));
    }
}
Also used : Arrays(java.util.Arrays) Nullable(org.elasticsearch.common.Nullable) ZenDiscovery(org.elasticsearch.discovery.zen.ZenDiscovery) Matchers.not(org.hamcrest.Matchers.not) ZenPing(org.elasticsearch.discovery.zen.ZenPing) ClusterState(org.elasticsearch.cluster.ClusterState) ClusterStateUpdateTask(org.elasticsearch.cluster.ClusterStateUpdateTask) Scope(org.elasticsearch.test.ESIntegTestCase.Scope) ClusterBlock(org.elasticsearch.cluster.block.ClusterBlock) SlowClusterStateProcessing(org.elasticsearch.test.disruption.SlowClusterStateProcessing) Map(java.util.Map) Matchers.nullValue(org.hamcrest.Matchers.nullValue) ServiceDisruptionScheme(org.elasticsearch.test.disruption.ServiceDisruptionScheme) Priority(org.elasticsearch.common.Priority) TestLogging(org.elasticsearch.test.junit.annotations.TestLogging) Set(java.util.Set) ClusterDiscoveryConfiguration(org.elasticsearch.test.discovery.ClusterDiscoveryConfiguration) CountDownLatch(java.util.concurrent.CountDownLatch) Supplier(org.apache.logging.log4j.util.Supplier) IndexMetaData(org.elasticsearch.cluster.metadata.IndexMetaData) ShardStateAction(org.elasticsearch.cluster.action.shard.ShardStateAction) IntermittentLongGCDisruption(org.elasticsearch.test.disruption.IntermittentLongGCDisruption) Matchers.is(org.hamcrest.Matchers.is) TransportRequestOptions(org.elasticsearch.transport.TransportRequestOptions) TestZenDiscovery(org.elasticsearch.test.discovery.TestZenDiscovery) ClusterService(org.elasticsearch.cluster.service.ClusterService) ShardRoutingState(org.elasticsearch.cluster.routing.ShardRoutingState) MembershipAction(org.elasticsearch.discovery.zen.MembershipAction) ArrayList(java.util.ArrayList) DiscoveryNode(org.elasticsearch.cluster.node.DiscoveryNode) TcpTransport(org.elasticsearch.transport.TcpTransport) IndicesStoreIntegrationIT(org.elasticsearch.indices.store.IndicesStoreIntegrationIT) TransportService(org.elasticsearch.transport.TransportService) Before(org.junit.Before) ConnectionProfile(org.elasticsearch.transport.ConnectionProfile) DiscoveryNodes(org.elasticsearch.cluster.node.DiscoveryNodes) ClusterScope(org.elasticsearch.test.ESIntegTestCase.ClusterScope) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) Client(org.elasticsearch.client.Client) IOException(java.io.IOException) DocWriteResponse(org.elasticsearch.action.DocWriteResponse) ExecutionException(java.util.concurrent.ExecutionException) NetworkDisconnect(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect) Tuple(org.elasticsearch.common.collect.Tuple) NetworkDelay(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDelay) ElasticsearchAssertions.assertAcked(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked) ElasticsearchException(org.elasticsearch.ElasticsearchException) GetResponse(org.elasticsearch.action.get.GetResponse) UnicastZenPing(org.elasticsearch.discovery.zen.UnicastZenPing) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) TransportRequest(org.elasticsearch.transport.TransportRequest) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) Settings(org.elasticsearch.common.settings.Settings) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) LongGCDisruption(org.elasticsearch.test.disruption.LongGCDisruption) XContentFactory.jsonBuilder(org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder) Bridge(org.elasticsearch.test.disruption.NetworkDisruption.Bridge) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HotThreads(org.elasticsearch.monitor.jvm.HotThreads) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) IndexRequestBuilder(org.elasticsearch.action.index.IndexRequestBuilder) ESIntegTestCase(org.elasticsearch.test.ESIntegTestCase) NetworkUnresponsive(org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive) INDEX_NUMBER_OF_REPLICAS_SETTING(org.elasticsearch.cluster.metadata.IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING) PublishClusterStateAction(org.elasticsearch.discovery.zen.PublishClusterStateAction) Matchers.equalTo(org.hamcrest.Matchers.equalTo) ElectMasterService(org.elasticsearch.discovery.zen.ElectMasterService) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) InternalTestCluster(org.elasticsearch.test.InternalTestCluster) XContentType(org.elasticsearch.common.xcontent.XContentType) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) FaultDetection(org.elasticsearch.discovery.zen.FaultDetection) AtomicReference(java.util.concurrent.atomic.AtomicReference) Strings(org.elasticsearch.common.Strings) HashSet(java.util.HashSet) TimeValue(org.elasticsearch.common.unit.TimeValue) IndexSettings(org.elasticsearch.index.IndexSettings) IndexResponse(org.elasticsearch.action.index.IndexResponse) MockTransportService(org.elasticsearch.test.transport.MockTransportService) ClusterBlockLevel(org.elasticsearch.cluster.block.ClusterBlockLevel) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) INDEX_NUMBER_OF_SHARDS_SETTING(org.elasticsearch.cluster.metadata.IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING) DisruptedLinks(org.elasticsearch.test.disruption.NetworkDisruption.DisruptedLinks) Semaphore(java.util.concurrent.Semaphore) NetworkLinkDisruptionType(org.elasticsearch.test.disruption.NetworkDisruption.NetworkLinkDisruptionType) Plugin(org.elasticsearch.plugins.Plugin) NoShardAvailableActionException(org.elasticsearch.action.NoShardAvailableActionException) Murmur3HashFunction(org.elasticsearch.cluster.routing.Murmur3HashFunction) TimeUnit(java.util.concurrent.TimeUnit) SingleNodeDisruption(org.elasticsearch.test.disruption.SingleNodeDisruption) NodeEnvironment(org.elasticsearch.env.NodeEnvironment) Collections(java.util.Collections) NetworkLinkDisruptionType(org.elasticsearch.test.disruption.NetworkDisruption.NetworkLinkDisruptionType) TwoPartitions(org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) ShardStateAction(org.elasticsearch.cluster.action.shard.ShardStateAction) CountDownLatch(java.util.concurrent.CountDownLatch) NetworkDisconnect(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) ElasticsearchException(org.elasticsearch.ElasticsearchException) CorruptIndexException(org.apache.lucene.index.CorruptIndexException) NoShardAvailableActionException(org.elasticsearch.action.NoShardAvailableActionException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption)

Example 10 with NetworkDisruption

use of org.elasticsearch.test.disruption.NetworkDisruption in project elasticsearch by elastic.

the class DiscoveryWithServiceDisruptionsIT method addRandomDisruptionScheme.

private ServiceDisruptionScheme addRandomDisruptionScheme() {
    // TODO: add partial partitions
    final DisruptedLinks disruptedLinks;
    if (randomBoolean()) {
        disruptedLinks = TwoPartitions.random(random(), internalCluster().getNodeNames());
    } else {
        disruptedLinks = Bridge.random(random(), internalCluster().getNodeNames());
    }
    final NetworkLinkDisruptionType disruptionType;
    switch(randomInt(2)) {
        case 0:
            disruptionType = new NetworkUnresponsive();
            break;
        case 1:
            disruptionType = new NetworkDisconnect();
            break;
        case 2:
            disruptionType = NetworkDelay.random(random());
            break;
        default:
            throw new IllegalArgumentException();
    }
    final ServiceDisruptionScheme scheme;
    if (rarely()) {
        scheme = new SlowClusterStateProcessing(random());
    } else {
        scheme = new NetworkDisruption(disruptedLinks, disruptionType);
    }
    setDisruptionScheme(scheme);
    return scheme;
}
Also used : SlowClusterStateProcessing(org.elasticsearch.test.disruption.SlowClusterStateProcessing) NetworkLinkDisruptionType(org.elasticsearch.test.disruption.NetworkDisruption.NetworkLinkDisruptionType) DisruptedLinks(org.elasticsearch.test.disruption.NetworkDisruption.DisruptedLinks) NetworkUnresponsive(org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive) ServiceDisruptionScheme(org.elasticsearch.test.disruption.ServiceDisruptionScheme) NetworkDisruption(org.elasticsearch.test.disruption.NetworkDisruption) NetworkDisconnect(org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect)

Aggregations

NetworkDisruption (org.elasticsearch.test.disruption.NetworkDisruption)15 TwoPartitions (org.elasticsearch.test.disruption.NetworkDisruption.TwoPartitions)13 NetworkDisconnect (org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect)9 HashSet (java.util.HashSet)6 ClusterState (org.elasticsearch.cluster.ClusterState)6 Settings (org.elasticsearch.common.settings.Settings)4 NetworkUnresponsive (org.elasticsearch.test.disruption.NetworkDisruption.NetworkUnresponsive)4 IndexResponse (org.elasticsearch.action.index.IndexResponse)3 TimeValue (org.elasticsearch.common.unit.TimeValue)3 UnicastZenPing (org.elasticsearch.discovery.zen.UnicastZenPing)3 ZenDiscovery (org.elasticsearch.discovery.zen.ZenDiscovery)3 ZenPing (org.elasticsearch.discovery.zen.ZenPing)3 TestZenDiscovery (org.elasticsearch.test.discovery.TestZenDiscovery)3 NetworkLinkDisruptionType (org.elasticsearch.test.disruption.NetworkDisruption.NetworkLinkDisruptionType)3 ServiceDisruptionScheme (org.elasticsearch.test.disruption.ServiceDisruptionScheme)3 ArrayList (java.util.ArrayList)2 CountDownLatch (java.util.concurrent.CountDownLatch)2 ExecutionException (java.util.concurrent.ExecutionException)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 IndexSettings (org.elasticsearch.index.IndexSettings)2