Search in sources :

Example 1 with BlockClusterStateProcessing

use of org.elasticsearch.test.disruption.BlockClusterStateProcessing in project elasticsearch by elastic.

the class RareClusterStateIT method testDeleteCreateInOneBulk.

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/14932")
public void testDeleteCreateInOneBulk() throws Exception {
    internalCluster().startNodes(2);
    assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
    prepareCreate("test").setSettings(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, true).addMapping("type").get();
    ensureGreen("test");
    // now that the cluster is stable, remove publishing timeout
    assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "0")));
    Set<String> nodes = new HashSet<>(Arrays.asList(internalCluster().getNodeNames()));
    nodes.remove(internalCluster().getMasterName());
    // block none master node.
    BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(nodes.iterator().next(), random());
    internalCluster().setDisruptionScheme(disruption);
    logger.info("--> indexing a doc");
    index("test", "type", "1");
    refresh();
    disruption.startDisrupting();
    logger.info("--> delete index and recreate it");
    assertFalse(client().admin().indices().prepareDelete("test").setTimeout("200ms").get().isAcknowledged());
    assertFalse(prepareCreate("test").setTimeout("200ms").setSettings(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, true).get().isAcknowledged());
    logger.info("--> letting cluster proceed");
    disruption.stopDisrupting();
    ensureGreen(TimeValue.timeValueMinutes(30), "test");
    assertHitCount(client().prepareSearch("test").get(), 0);
}
Also used : BlockClusterStateProcessing(org.elasticsearch.test.disruption.BlockClusterStateProcessing) HashSet(java.util.HashSet)

Example 2 with BlockClusterStateProcessing

use of org.elasticsearch.test.disruption.BlockClusterStateProcessing in project elasticsearch by elastic.

the class RareClusterStateIT method testDelayedMappingPropagationOnPrimary.

public void testDelayedMappingPropagationOnPrimary() throws Exception {
    // Here we want to test that things go well if there is a first request
    // that adds mappings but before mappings are propagated to all nodes
    // another index request introduces the same mapping. The master node
    // will reply immediately since it did not change the cluster state
    // but the change might not be on the node that performed the indexing
    // operation yet
    Settings settings = Settings.builder().put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), // explicitly set so it won't default to publish timeout
    "30s").put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), // don't wait post commit as we are blocking things by design
    "0s").build();
    final List<String> nodeNames = internalCluster().startNodes(2, settings);
    assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
    final String master = internalCluster().getMasterName();
    assertThat(nodeNames, hasItem(master));
    String otherNode = null;
    for (String node : nodeNames) {
        if (node.equals(master) == false) {
            otherNode = node;
            break;
        }
    }
    assertNotNull(otherNode);
    // Don't allocate the shard on the master node
    assertAcked(prepareCreate("index").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0).put("index.routing.allocation.exclude._name", master)).get());
    ensureGreen();
    // Check routing tables
    ClusterState state = client().admin().cluster().prepareState().get().getState();
    assertEquals(master, state.nodes().getMasterNode().getName());
    List<ShardRouting> shards = state.routingTable().allShards("index");
    assertThat(shards, hasSize(1));
    for (ShardRouting shard : shards) {
        if (shard.primary()) {
            // primary must not be on the master node
            assertFalse(state.nodes().getMasterNodeId().equals(shard.currentNodeId()));
        } else {
            // only primaries
            fail();
        }
    }
    // Block cluster state processing where our shard is
    BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(otherNode, random());
    internalCluster().setDisruptionScheme(disruption);
    disruption.startDisrupting();
    // Add a new mapping...
    final AtomicReference<Object> putMappingResponse = new AtomicReference<>();
    client().admin().indices().preparePutMapping("index").setType("type").setSource("field", "type=long").execute(new ActionListener<PutMappingResponse>() {

        @Override
        public void onResponse(PutMappingResponse response) {
            putMappingResponse.set(response);
        }

        @Override
        public void onFailure(Exception e) {
            putMappingResponse.set(e);
        }
    });
    // ...and wait for mappings to be available on master
    assertBusy(new Runnable() {

        @Override
        public void run() {
            ImmutableOpenMap<String, MappingMetaData> indexMappings = client().admin().indices().prepareGetMappings("index").get().getMappings().get("index");
            assertNotNull(indexMappings);
            MappingMetaData typeMappings = indexMappings.get("type");
            assertNotNull(typeMappings);
            Object properties;
            try {
                properties = typeMappings.getSourceAsMap().get("properties");
            } catch (IOException e) {
                throw new AssertionError(e);
            }
            assertNotNull(properties);
            Object fieldMapping = ((Map<String, Object>) properties).get("field");
            assertNotNull(fieldMapping);
        }
    });
    final AtomicReference<Object> docIndexResponse = new AtomicReference<>();
    client().prepareIndex("index", "type", "1").setSource("field", 42).execute(new ActionListener<IndexResponse>() {

        @Override
        public void onResponse(IndexResponse response) {
            docIndexResponse.set(response);
        }

        @Override
        public void onFailure(Exception e) {
            docIndexResponse.set(e);
        }
    });
    // Wait a bit to make sure that the reason why we did not get a response
    // is that cluster state processing is blocked and not just that it takes
    // time to process the indexing request
    Thread.sleep(100);
    assertThat(putMappingResponse.get(), equalTo(null));
    assertThat(docIndexResponse.get(), equalTo(null));
    // Now make sure the indexing request finishes successfully
    disruption.stopDisrupting();
    assertBusy(new Runnable() {

        @Override
        public void run() {
            assertThat(putMappingResponse.get(), instanceOf(PutMappingResponse.class));
            PutMappingResponse resp = (PutMappingResponse) putMappingResponse.get();
            assertTrue(resp.isAcknowledged());
            assertThat(docIndexResponse.get(), instanceOf(IndexResponse.class));
            IndexResponse docResp = (IndexResponse) docIndexResponse.get();
            assertEquals(Arrays.toString(docResp.getShardInfo().getFailures()), 1, docResp.getShardInfo().getTotal());
        }
    });
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) BlockClusterStateProcessing(org.elasticsearch.test.disruption.BlockClusterStateProcessing) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ImmutableOpenMap(org.elasticsearch.common.collect.ImmutableOpenMap) MappingMetaData(org.elasticsearch.cluster.metadata.MappingMetaData) IOException(java.io.IOException) IndexResponse(org.elasticsearch.action.index.IndexResponse) PutMappingResponse(org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) Settings(org.elasticsearch.common.settings.Settings) DiscoverySettings(org.elasticsearch.discovery.DiscoverySettings)

Example 3 with BlockClusterStateProcessing

use of org.elasticsearch.test.disruption.BlockClusterStateProcessing in project elasticsearch by elastic.

the class IndicesStoreIntegrationIT method testIndexCleanup.

public void testIndexCleanup() throws Exception {
    final String masterNode = internalCluster().startNode(Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false));
    final String node_1 = internalCluster().startNode(Settings.builder().put(Node.NODE_MASTER_SETTING.getKey(), false));
    final String node_2 = internalCluster().startNode(Settings.builder().put(Node.NODE_MASTER_SETTING.getKey(), false));
    logger.info("--> creating index [test] with one shard and on replica");
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(indexSettings()).put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)));
    ensureGreen("test");
    ClusterState state = client().admin().cluster().prepareState().get().getState();
    Index index = state.metaData().index("test").getIndex();
    logger.info("--> making sure that shard and its replica are allocated on node_1 and node_2");
    assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_1, index)), equalTo(true));
    assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
    logger.info("--> starting node server3");
    final String node_3 = internalCluster().startNode(Settings.builder().put(Node.NODE_MASTER_SETTING.getKey(), false));
    logger.info("--> running cluster_health");
    ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForNodes("4").setWaitForNoRelocatingShards(true).get();
    assertThat(clusterHealth.isTimedOut(), equalTo(false));
    assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_1, index)), equalTo(true));
    assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
    assertThat(Files.exists(shardDirectory(node_3, index, 0)), equalTo(false));
    assertThat(Files.exists(indexDirectory(node_3, index)), equalTo(false));
    logger.info("--> move shard from node_1 to node_3, and wait for relocation to finish");
    if (randomBoolean()) {
        // sometimes add cluster-state delay to trigger observers in IndicesStore.ShardActiveRequestHandler
        BlockClusterStateProcessing disruption = relocateAndBlockCompletion(logger, "test", 0, node_1, node_3);
        // wait a little so that cluster state observer is registered
        sleep(50);
        logger.info("--> stopping disruption");
        disruption.stopDisrupting();
    } else {
        internalCluster().client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, node_1, node_3)).get();
    }
    clusterHealth = client().admin().cluster().prepareHealth().setWaitForNoRelocatingShards(true).get();
    assertThat(clusterHealth.isTimedOut(), equalTo(false));
    assertThat(waitForShardDeletion(node_1, index, 0), equalTo(false));
    assertThat(waitForIndexDeletion(node_1, index), equalTo(false));
    assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
    assertThat(Files.exists(shardDirectory(node_3, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_3, index)), equalTo(true));
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) BlockClusterStateProcessing(org.elasticsearch.test.disruption.BlockClusterStateProcessing) ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) MoveAllocationCommand(org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand) Index(org.elasticsearch.index.Index)

Example 4 with BlockClusterStateProcessing

use of org.elasticsearch.test.disruption.BlockClusterStateProcessing in project elasticsearch by elastic.

the class IndicesStoreIntegrationIT method relocateAndBlockCompletion.

/**
     * relocate a shard and block cluster state processing on the relocation target node to activate the shard
     */
public static BlockClusterStateProcessing relocateAndBlockCompletion(Logger logger, String index, int shard, String nodeFrom, String nodeTo) throws InterruptedException {
    BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(nodeTo, random());
    internalCluster().setDisruptionScheme(disruption);
    MockTransportService transportService = (MockTransportService) internalCluster().getInstance(TransportService.class, nodeTo);
    ClusterService clusterService = internalCluster().getInstance(ClusterService.class, nodeTo);
    CountDownLatch beginRelocationLatch = new CountDownLatch(1);
    CountDownLatch receivedShardExistsRequestLatch = new CountDownLatch(1);
    // use a tracer on the target node to track relocation start and end
    transportService.addTracer(new MockTransportService.Tracer() {

        @Override
        public void receivedRequest(long requestId, String action) {
            if (action.equals(PeerRecoveryTargetService.Actions.FILES_INFO)) {
                logger.info("received: {}, relocation starts", action);
                beginRelocationLatch.countDown();
            } else if (action.equals(IndicesStore.ACTION_SHARD_EXISTS)) {
                // Whenever a node deletes a shard because it was relocated somewhere else, it first
                // checks if enough other copies are started somewhere else. The node sends a ShardActiveRequest
                // to the other nodes that should have a copy according to cluster state.
                receivedShardExistsRequestLatch.countDown();
                logger.info("received: {}, relocation done", action);
            } else if (action.equals(PeerRecoveryTargetService.Actions.WAIT_CLUSTERSTATE)) {
                logger.info("received: {}, waiting on cluster state", action);
                // a race with the BlockClusterStateProcessing block that is added below.
                try {
                    assertBusy(() -> assertTrue(clusterService.state().routingTable().index(index).shard(shard).primaryShard().relocating()));
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }
        }
    });
    internalCluster().client().admin().cluster().prepareReroute().add(new MoveAllocationCommand(index, shard, nodeFrom, nodeTo)).get();
    logger.info("--> waiting for relocation to start");
    beginRelocationLatch.await();
    logger.info("--> starting disruption");
    disruption.startDisrupting();
    logger.info("--> waiting for relocation to finish");
    receivedShardExistsRequestLatch.await();
    logger.info("--> relocation completed (but cluster state processing block still in place)");
    return disruption;
}
Also used : BlockClusterStateProcessing(org.elasticsearch.test.disruption.BlockClusterStateProcessing) ClusterService(org.elasticsearch.cluster.service.ClusterService) MockTransportService(org.elasticsearch.test.transport.MockTransportService) MockTransportService(org.elasticsearch.test.transport.MockTransportService) TransportService(org.elasticsearch.transport.TransportService) MoveAllocationCommand(org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand) CountDownLatch(java.util.concurrent.CountDownLatch) ConnectTransportException(org.elasticsearch.transport.ConnectTransportException) IOException(java.io.IOException)

Example 5 with BlockClusterStateProcessing

use of org.elasticsearch.test.disruption.BlockClusterStateProcessing in project elasticsearch by elastic.

the class RareClusterStateIT method testDelayedMappingPropagationOnReplica.

public void testDelayedMappingPropagationOnReplica() throws Exception {
    // This is essentially the same thing as testDelayedMappingPropagationOnPrimary
    // but for replicas
    // Here we want to test that everything goes well if the mappings that
    // are needed for a document are not available on the replica at the
    // time of indexing it
    final List<String> nodeNames = internalCluster().startNodes(2, Settings.builder().put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), // explicitly set so it won't default to publish timeout
    "30s").put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), // don't wait post commit as we are blocking things by design
    "0s").build());
    assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
    final String master = internalCluster().getMasterName();
    assertThat(nodeNames, hasItem(master));
    String otherNode = null;
    for (String node : nodeNames) {
        if (node.equals(master) == false) {
            otherNode = node;
            break;
        }
    }
    assertNotNull(otherNode);
    // Force allocation of the primary on the master node by first only allocating on the master
    // and then allowing all nodes so that the replica gets allocated on the other node
    assertAcked(prepareCreate("index").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1).put("index.routing.allocation.include._name", master)).get());
    assertAcked(client().admin().indices().prepareUpdateSettings("index").setSettings(Settings.builder().put("index.routing.allocation.include._name", "")).get());
    ensureGreen();
    // Check routing tables
    ClusterState state = client().admin().cluster().prepareState().get().getState();
    assertEquals(master, state.nodes().getMasterNode().getName());
    List<ShardRouting> shards = state.routingTable().allShards("index");
    assertThat(shards, hasSize(2));
    for (ShardRouting shard : shards) {
        if (shard.primary()) {
            // primary must be on the master
            assertEquals(state.nodes().getMasterNodeId(), shard.currentNodeId());
        } else {
            assertTrue(shard.active());
        }
    }
    // Block cluster state processing on the replica
    BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(otherNode, random());
    internalCluster().setDisruptionScheme(disruption);
    disruption.startDisrupting();
    final AtomicReference<Object> putMappingResponse = new AtomicReference<>();
    client().admin().indices().preparePutMapping("index").setType("type").setSource("field", "type=long").execute(new ActionListener<PutMappingResponse>() {

        @Override
        public void onResponse(PutMappingResponse response) {
            putMappingResponse.set(response);
        }

        @Override
        public void onFailure(Exception e) {
            putMappingResponse.set(e);
        }
    });
    final Index index = resolveIndex("index");
    // Wait for mappings to be available on master
    assertBusy(new Runnable() {

        @Override
        public void run() {
            final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master);
            final IndexService indexService = indicesService.indexServiceSafe(index);
            assertNotNull(indexService);
            final MapperService mapperService = indexService.mapperService();
            DocumentMapper mapper = mapperService.documentMapper("type");
            assertNotNull(mapper);
            assertNotNull(mapper.mappers().getMapper("field"));
        }
    });
    final AtomicReference<Object> docIndexResponse = new AtomicReference<>();
    client().prepareIndex("index", "type", "1").setSource("field", 42).execute(new ActionListener<IndexResponse>() {

        @Override
        public void onResponse(IndexResponse response) {
            docIndexResponse.set(response);
        }

        @Override
        public void onFailure(Exception e) {
            docIndexResponse.set(e);
        }
    });
    // Wait for document to be indexed on primary
    assertBusy(new Runnable() {

        @Override
        public void run() {
            assertTrue(client().prepareGet("index", "type", "1").setPreference("_primary").get().isExists());
        }
    });
    // The mappings have not been propagated to the replica yet as a consequence the document count not be indexed
    // We wait on purpose to make sure that the document is not indexed because the shard operation is stalled
    // and not just because it takes time to replicate the indexing request to the replica
    Thread.sleep(100);
    assertThat(putMappingResponse.get(), equalTo(null));
    assertThat(docIndexResponse.get(), equalTo(null));
    // Now make sure the indexing request finishes successfully
    disruption.stopDisrupting();
    assertBusy(new Runnable() {

        @Override
        public void run() {
            assertThat(putMappingResponse.get(), instanceOf(PutMappingResponse.class));
            PutMappingResponse resp = (PutMappingResponse) putMappingResponse.get();
            assertTrue(resp.isAcknowledged());
            assertThat(docIndexResponse.get(), instanceOf(IndexResponse.class));
            IndexResponse docResp = (IndexResponse) docIndexResponse.get();
            assertEquals(Arrays.toString(docResp.getShardInfo().getFailures()), 2, // both shards should have succeeded
            docResp.getShardInfo().getTotal());
        }
    });
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) BlockClusterStateProcessing(org.elasticsearch.test.disruption.BlockClusterStateProcessing) IndexService(org.elasticsearch.index.IndexService) DocumentMapper(org.elasticsearch.index.mapper.DocumentMapper) IndicesService(org.elasticsearch.indices.IndicesService) AtomicReference(java.util.concurrent.atomic.AtomicReference) Index(org.elasticsearch.index.Index) IOException(java.io.IOException) IndexResponse(org.elasticsearch.action.index.IndexResponse) PutMappingResponse(org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) MapperService(org.elasticsearch.index.mapper.MapperService)

Aggregations

BlockClusterStateProcessing (org.elasticsearch.test.disruption.BlockClusterStateProcessing)5 IOException (java.io.IOException)3 ClusterState (org.elasticsearch.cluster.ClusterState)3 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 PutMappingResponse (org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse)2 IndexResponse (org.elasticsearch.action.index.IndexResponse)2 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)2 MoveAllocationCommand (org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand)2 Index (org.elasticsearch.index.Index)2 HashSet (java.util.HashSet)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 ClusterHealthResponse (org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse)1 MappingMetaData (org.elasticsearch.cluster.metadata.MappingMetaData)1 ClusterService (org.elasticsearch.cluster.service.ClusterService)1 ImmutableOpenMap (org.elasticsearch.common.collect.ImmutableOpenMap)1 Settings (org.elasticsearch.common.settings.Settings)1 DiscoverySettings (org.elasticsearch.discovery.DiscoverySettings)1 IndexService (org.elasticsearch.index.IndexService)1 DocumentMapper (org.elasticsearch.index.mapper.DocumentMapper)1 MapperService (org.elasticsearch.index.mapper.MapperService)1