Search in sources :

Example 1 with BlockClusterStateProcessing

use of org.opensearch.test.disruption.BlockClusterStateProcessing in project OpenSearch by opensearch-project.

the class RareClusterStateIT method testDelayedMappingPropagationOnPrimary.

public void testDelayedMappingPropagationOnPrimary() throws Exception {
    // Here we want to test that things go well if there is a first request
    // that adds mappings but before mappings are propagated to all nodes
    // another index request introduces the same mapping. The master node
    // will reply immediately since it did not change the cluster state
    // but the change might not be on the node that performed the indexing
    // operation yet
    final List<String> nodeNames = internalCluster().startNodes(2);
    assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
    final String master = internalCluster().getMasterName();
    assertThat(nodeNames, hasItem(master));
    String otherNode = null;
    for (String node : nodeNames) {
        if (node.equals(master) == false) {
            otherNode = node;
            break;
        }
    }
    assertNotNull(otherNode);
    // Don't allocate the shard on the master node
    assertAcked(prepareCreate("index").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put("index.routing.allocation.exclude._name", master)).get());
    ensureGreen();
    // Check routing tables
    ClusterState state = client().admin().cluster().prepareState().get().getState();
    assertEquals(master, state.nodes().getMasterNode().getName());
    List<ShardRouting> shards = state.routingTable().allShards("index");
    assertThat(shards, hasSize(1));
    for (ShardRouting shard : shards) {
        if (shard.primary()) {
            // primary must not be on the master node
            assertFalse(state.nodes().getMasterNodeId().equals(shard.currentNodeId()));
        } else {
            // only primaries
            fail();
        }
    }
    // Block cluster state processing where our shard is
    BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(otherNode, random());
    internalCluster().setDisruptionScheme(disruption);
    disruption.startDisrupting();
    // Add a new mapping...
    ActionFuture<AcknowledgedResponse> putMappingResponse = executeAndCancelCommittedPublication(client().admin().indices().preparePutMapping("index").setSource("field", "type=long"));
    // ...and wait for mappings to be available on master
    assertBusy(() -> {
        MappingMetadata typeMappings = client().admin().indices().prepareGetMappings("index").get().getMappings().get("index");
        assertNotNull(typeMappings);
        Object properties;
        try {
            properties = typeMappings.getSourceAsMap().get("properties");
        } catch (OpenSearchParseException e) {
            throw new AssertionError(e);
        }
        assertNotNull(properties);
        Object fieldMapping = ((Map<String, Object>) properties).get("field");
        assertNotNull(fieldMapping);
    });
    // this request does not change the cluster state, because mapping is already created,
    // we don't await and cancel committed publication
    ActionFuture<IndexResponse> docIndexResponse = client().prepareIndex("index").setId("1").setSource("field", 42).execute();
    // Wait a bit to make sure that the reason why we did not get a response
    // is that cluster state processing is blocked and not just that it takes
    // time to process the indexing request
    Thread.sleep(100);
    assertFalse(putMappingResponse.isDone());
    assertFalse(docIndexResponse.isDone());
    // Now make sure the indexing request finishes successfully
    disruption.stopDisrupting();
    assertTrue(putMappingResponse.get(10, TimeUnit.SECONDS).isAcknowledged());
    assertThat(docIndexResponse.get(10, TimeUnit.SECONDS), instanceOf(IndexResponse.class));
    assertEquals(1, docIndexResponse.get(10, TimeUnit.SECONDS).getShardInfo().getTotal());
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) BlockClusterStateProcessing(org.opensearch.test.disruption.BlockClusterStateProcessing) AcknowledgedResponse(org.opensearch.action.support.master.AcknowledgedResponse) OpenSearchParseException(org.opensearch.OpenSearchParseException) IndexResponse(org.opensearch.action.index.IndexResponse) ShardRouting(org.opensearch.cluster.routing.ShardRouting) MappingMetadata(org.opensearch.cluster.metadata.MappingMetadata) Map(java.util.Map) Collections.emptyMap(java.util.Collections.emptyMap)

Example 2 with BlockClusterStateProcessing

use of org.opensearch.test.disruption.BlockClusterStateProcessing in project OpenSearch by opensearch-project.

the class RareClusterStateIT method testDelayedMappingPropagationOnReplica.

public void testDelayedMappingPropagationOnReplica() throws Exception {
    // This is essentially the same thing as testDelayedMappingPropagationOnPrimary
    // but for replicas
    // Here we want to test that everything goes well if the mappings that
    // are needed for a document are not available on the replica at the
    // time of indexing it
    final List<String> nodeNames = internalCluster().startNodes(2);
    assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
    final String master = internalCluster().getMasterName();
    assertThat(nodeNames, hasItem(master));
    String otherNode = null;
    for (String node : nodeNames) {
        if (node.equals(master) == false) {
            otherNode = node;
            break;
        }
    }
    assertNotNull(otherNode);
    // Force allocation of the primary on the master node by first only allocating on the master
    // and then allowing all nodes so that the replica gets allocated on the other node
    prepareCreate("index").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put("index.routing.allocation.include._name", master)).get();
    client().admin().indices().prepareUpdateSettings("index").setSettings(Settings.builder().put("index.routing.allocation.include._name", "")).get();
    ensureGreen();
    // Check routing tables
    ClusterState state = client().admin().cluster().prepareState().get().getState();
    assertEquals(master, state.nodes().getMasterNode().getName());
    List<ShardRouting> shards = state.routingTable().allShards("index");
    assertThat(shards, hasSize(2));
    for (ShardRouting shard : shards) {
        if (shard.primary()) {
            // primary must be on the master
            assertEquals(state.nodes().getMasterNodeId(), shard.currentNodeId());
        } else {
            assertTrue(shard.active());
        }
    }
    // Block cluster state processing on the replica
    BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(otherNode, random());
    internalCluster().setDisruptionScheme(disruption);
    disruption.startDisrupting();
    final ActionFuture<AcknowledgedResponse> putMappingResponse = executeAndCancelCommittedPublication(client().admin().indices().preparePutMapping("index").setSource("field", "type=long"));
    final Index index = resolveIndex("index");
    // Wait for mappings to be available on master
    assertBusy(() -> {
        final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master);
        final IndexService indexService = indicesService.indexServiceSafe(index);
        assertNotNull(indexService);
        final MapperService mapperService = indexService.mapperService();
        DocumentMapper mapper = mapperService.documentMapper(MapperService.SINGLE_MAPPING_NAME);
        assertNotNull(mapper);
        assertNotNull(mapper.mappers().getMapper("field"));
    });
    final ActionFuture<IndexResponse> docIndexResponse = client().prepareIndex("index").setId("1").setSource("field", 42).execute();
    assertBusy(() -> assertTrue(client().prepareGet("index", "1").get().isExists()));
    // index another document, this time using dynamic mappings.
    // The ack timeout of 0 on dynamic mapping updates makes it possible for the document to be indexed on the primary, even
    // if the dynamic mapping update is not applied on the replica yet.
    // this request does not change the cluster state, because the mapping is dynamic,
    // we need to await and cancel committed publication
    ActionFuture<IndexResponse> dynamicMappingsFut = executeAndCancelCommittedPublication(client().prepareIndex("index").setId("2").setSource("field2", 42));
    // ...and wait for second mapping to be available on master
    assertBusy(() -> {
        final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master);
        final IndexService indexService = indicesService.indexServiceSafe(index);
        assertNotNull(indexService);
        final MapperService mapperService = indexService.mapperService();
        DocumentMapper mapper = mapperService.documentMapper(MapperService.SINGLE_MAPPING_NAME);
        assertNotNull(mapper);
        assertNotNull(mapper.mappers().getMapper("field2"));
    });
    assertBusy(() -> assertTrue(client().prepareGet("index", "2").get().isExists()));
    // The mappings have not been propagated to the replica yet as a consequence the document count not be indexed
    // We wait on purpose to make sure that the document is not indexed because the shard operation is stalled
    // and not just because it takes time to replicate the indexing request to the replica
    Thread.sleep(100);
    assertFalse(putMappingResponse.isDone());
    assertFalse(docIndexResponse.isDone());
    // Now make sure the indexing request finishes successfully
    disruption.stopDisrupting();
    assertTrue(putMappingResponse.get(10, TimeUnit.SECONDS).isAcknowledged());
    assertThat(docIndexResponse.get(10, TimeUnit.SECONDS), instanceOf(IndexResponse.class));
    // both shards should have succeeded
    assertEquals(2, docIndexResponse.get(10, TimeUnit.SECONDS).getShardInfo().getTotal());
    assertThat(dynamicMappingsFut.get(10, TimeUnit.SECONDS).getResult(), equalTo(CREATED));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) BlockClusterStateProcessing(org.opensearch.test.disruption.BlockClusterStateProcessing) IndexService(org.opensearch.index.IndexService) DocumentMapper(org.opensearch.index.mapper.DocumentMapper) AcknowledgedResponse(org.opensearch.action.support.master.AcknowledgedResponse) IndicesService(org.opensearch.indices.IndicesService) Index(org.opensearch.index.Index) IndexResponse(org.opensearch.action.index.IndexResponse) ShardRouting(org.opensearch.cluster.routing.ShardRouting) MapperService(org.opensearch.index.mapper.MapperService)

Example 3 with BlockClusterStateProcessing

use of org.opensearch.test.disruption.BlockClusterStateProcessing in project OpenSearch by opensearch-project.

the class RareClusterStateIT method testDeleteCreateInOneBulk.

public void testDeleteCreateInOneBulk() throws Exception {
    internalCluster().startMasterOnlyNode();
    String dataNode = internalCluster().startDataOnlyNode();
    assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
    prepareCreate("test").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)).addMapping(MapperService.SINGLE_MAPPING_NAME).get();
    ensureGreen("test");
    // block none master node.
    BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(dataNode, random());
    internalCluster().setDisruptionScheme(disruption);
    logger.info("--> indexing a doc");
    index("test", MapperService.SINGLE_MAPPING_NAME, "1");
    refresh();
    disruption.startDisrupting();
    logger.info("--> delete index and recreate it");
    executeAndCancelCommittedPublication(client().admin().indices().prepareDelete("test").setTimeout("0s")).get(10, TimeUnit.SECONDS);
    executeAndCancelCommittedPublication(prepareCreate("test").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_WAIT_FOR_ACTIVE_SHARDS.getKey(), "0")).setTimeout("0s")).get(10, TimeUnit.SECONDS);
    logger.info("--> letting cluster proceed");
    disruption.stopDisrupting();
    ensureGreen(TimeValue.timeValueMinutes(30), "test");
    // due to publish_timeout of 0, wait for data node to have cluster state fully applied
    assertBusy(() -> {
        long masterClusterStateVersion = internalCluster().clusterService(internalCluster().getMasterName()).state().version();
        long dataClusterStateVersion = internalCluster().clusterService(dataNode).state().version();
        assertThat(masterClusterStateVersion, equalTo(dataClusterStateVersion));
    });
    assertHitCount(client().prepareSearch("test").get(), 0);
}
Also used : BlockClusterStateProcessing(org.opensearch.test.disruption.BlockClusterStateProcessing)

Example 4 with BlockClusterStateProcessing

use of org.opensearch.test.disruption.BlockClusterStateProcessing in project OpenSearch by opensearch-project.

the class IndicesStoreIntegrationIT method testIndexCleanup.

public void testIndexCleanup() throws Exception {
    internalCluster().startNode(nonDataNode());
    final String node_1 = internalCluster().startNode(nonMasterNode());
    final String node_2 = internalCluster().startNode(nonMasterNode());
    logger.info("--> creating index [test] with one shard and on replica");
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(indexSettings()).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)));
    ensureGreen("test");
    ClusterState state = client().admin().cluster().prepareState().get().getState();
    Index index = state.metadata().index("test").getIndex();
    logger.info("--> making sure that shard and its replica are allocated on node_1 and node_2");
    assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_1, index)), equalTo(true));
    assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
    logger.info("--> starting node server3");
    final String node_3 = internalCluster().startNode(nonMasterNode());
    logger.info("--> running cluster_health");
    ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForNodes("4").setWaitForNoRelocatingShards(true).get();
    assertThat(clusterHealth.isTimedOut(), equalTo(false));
    assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_1, index)), equalTo(true));
    assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
    assertThat(Files.exists(shardDirectory(node_3, index, 0)), equalTo(false));
    assertThat(Files.exists(indexDirectory(node_3, index)), equalTo(false));
    logger.info("--> move shard from node_1 to node_3, and wait for relocation to finish");
    if (randomBoolean()) {
        // sometimes add cluster-state delay to trigger observers in IndicesStore.ShardActiveRequestHandler
        BlockClusterStateProcessing disruption = relocateAndBlockCompletion(logger, "test", 0, node_1, node_3);
        // wait a little so that cluster state observer is registered
        sleep(50);
        logger.info("--> stopping disruption");
        disruption.stopDisrupting();
    } else {
        internalCluster().client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, node_1, node_3)).get();
    }
    clusterHealth = client().admin().cluster().prepareHealth().setWaitForNoRelocatingShards(true).get();
    assertThat(clusterHealth.isTimedOut(), equalTo(false));
    assertShardDeleted(node_1, index, 0);
    assertIndexDeleted(node_1, index);
    assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
    assertThat(Files.exists(shardDirectory(node_3, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_3, index)), equalTo(true));
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) BlockClusterStateProcessing(org.opensearch.test.disruption.BlockClusterStateProcessing) ClusterHealthResponse(org.opensearch.action.admin.cluster.health.ClusterHealthResponse) MoveAllocationCommand(org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand) Index(org.opensearch.index.Index)

Example 5 with BlockClusterStateProcessing

use of org.opensearch.test.disruption.BlockClusterStateProcessing in project OpenSearch by opensearch-project.

the class IndicesStoreIntegrationIT method relocateAndBlockCompletion.

/**
 * relocate a shard and block cluster state processing on the relocation target node to activate the shard
 */
public static BlockClusterStateProcessing relocateAndBlockCompletion(Logger logger, String index, int shard, String nodeFrom, String nodeTo) throws InterruptedException {
    BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(nodeTo, random());
    internalCluster().setDisruptionScheme(disruption);
    MockTransportService transportService = (MockTransportService) internalCluster().getInstance(TransportService.class, nodeTo);
    CountDownLatch beginRelocationLatch = new CountDownLatch(1);
    CountDownLatch receivedShardExistsRequestLatch = new CountDownLatch(1);
    // use a tracer on the target node to track relocation start and end
    transportService.addMessageListener(new TransportMessageListener() {

        @Override
        public void onRequestReceived(long requestId, String action) {
            if (action.equals(PeerRecoveryTargetService.Actions.FILES_INFO)) {
                logger.info("received: {}, relocation starts", action);
                beginRelocationLatch.countDown();
            } else if (action.equals(IndicesStore.ACTION_SHARD_EXISTS)) {
                // Whenever a node deletes a shard because it was relocated somewhere else, it first
                // checks if enough other copies are started somewhere else. The node sends a ShardActiveRequest
                // to the other nodes that should have a copy according to cluster state.
                receivedShardExistsRequestLatch.countDown();
                logger.info("received: {}, relocation done", action);
            }
        }
    });
    internalCluster().client().admin().cluster().prepareReroute().add(new MoveAllocationCommand(index, shard, nodeFrom, nodeTo)).get();
    logger.info("--> waiting for relocation to start");
    beginRelocationLatch.await();
    logger.info("--> starting disruption");
    disruption.startDisrupting();
    logger.info("--> waiting for relocation to finish");
    receivedShardExistsRequestLatch.await();
    logger.info("--> relocation completed (but cluster state processing block still in place)");
    return disruption;
}
Also used : BlockClusterStateProcessing(org.opensearch.test.disruption.BlockClusterStateProcessing) MockTransportService(org.opensearch.test.transport.MockTransportService) TransportService(org.opensearch.transport.TransportService) MockTransportService(org.opensearch.test.transport.MockTransportService) MoveAllocationCommand(org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand) CountDownLatch(java.util.concurrent.CountDownLatch) TransportMessageListener(org.opensearch.transport.TransportMessageListener)

Aggregations

BlockClusterStateProcessing (org.opensearch.test.disruption.BlockClusterStateProcessing)5 ClusterState (org.opensearch.cluster.ClusterState)3 IndexResponse (org.opensearch.action.index.IndexResponse)2 AcknowledgedResponse (org.opensearch.action.support.master.AcknowledgedResponse)2 ShardRouting (org.opensearch.cluster.routing.ShardRouting)2 MoveAllocationCommand (org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand)2 Index (org.opensearch.index.Index)2 Collections.emptyMap (java.util.Collections.emptyMap)1 Map (java.util.Map)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 OpenSearchParseException (org.opensearch.OpenSearchParseException)1 ClusterHealthResponse (org.opensearch.action.admin.cluster.health.ClusterHealthResponse)1 MappingMetadata (org.opensearch.cluster.metadata.MappingMetadata)1 IndexService (org.opensearch.index.IndexService)1 DocumentMapper (org.opensearch.index.mapper.DocumentMapper)1 MapperService (org.opensearch.index.mapper.MapperService)1 IndicesService (org.opensearch.indices.IndicesService)1 MockTransportService (org.opensearch.test.transport.MockTransportService)1 TransportMessageListener (org.opensearch.transport.TransportMessageListener)1 TransportService (org.opensearch.transport.TransportService)1