use of org.opensearch.test.disruption.BlockClusterStateProcessing in project OpenSearch by opensearch-project.
the class RareClusterStateIT method testDelayedMappingPropagationOnPrimary.
public void testDelayedMappingPropagationOnPrimary() throws Exception {
// Here we want to test that things go well if there is a first request
// that adds mappings but before mappings are propagated to all nodes
// another index request introduces the same mapping. The master node
// will reply immediately since it did not change the cluster state
// but the change might not be on the node that performed the indexing
// operation yet
final List<String> nodeNames = internalCluster().startNodes(2);
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
final String master = internalCluster().getMasterName();
assertThat(nodeNames, hasItem(master));
String otherNode = null;
for (String node : nodeNames) {
if (node.equals(master) == false) {
otherNode = node;
break;
}
}
assertNotNull(otherNode);
// Don't allocate the shard on the master node
assertAcked(prepareCreate("index").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put("index.routing.allocation.exclude._name", master)).get());
ensureGreen();
// Check routing tables
ClusterState state = client().admin().cluster().prepareState().get().getState();
assertEquals(master, state.nodes().getMasterNode().getName());
List<ShardRouting> shards = state.routingTable().allShards("index");
assertThat(shards, hasSize(1));
for (ShardRouting shard : shards) {
if (shard.primary()) {
// primary must not be on the master node
assertFalse(state.nodes().getMasterNodeId().equals(shard.currentNodeId()));
} else {
// only primaries
fail();
}
}
// Block cluster state processing where our shard is
BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(otherNode, random());
internalCluster().setDisruptionScheme(disruption);
disruption.startDisrupting();
// Add a new mapping...
ActionFuture<AcknowledgedResponse> putMappingResponse = executeAndCancelCommittedPublication(client().admin().indices().preparePutMapping("index").setSource("field", "type=long"));
// ...and wait for mappings to be available on master
assertBusy(() -> {
MappingMetadata typeMappings = client().admin().indices().prepareGetMappings("index").get().getMappings().get("index");
assertNotNull(typeMappings);
Object properties;
try {
properties = typeMappings.getSourceAsMap().get("properties");
} catch (OpenSearchParseException e) {
throw new AssertionError(e);
}
assertNotNull(properties);
Object fieldMapping = ((Map<String, Object>) properties).get("field");
assertNotNull(fieldMapping);
});
// this request does not change the cluster state, because mapping is already created,
// we don't await and cancel committed publication
ActionFuture<IndexResponse> docIndexResponse = client().prepareIndex("index").setId("1").setSource("field", 42).execute();
// Wait a bit to make sure that the reason why we did not get a response
// is that cluster state processing is blocked and not just that it takes
// time to process the indexing request
Thread.sleep(100);
assertFalse(putMappingResponse.isDone());
assertFalse(docIndexResponse.isDone());
// Now make sure the indexing request finishes successfully
disruption.stopDisrupting();
assertTrue(putMappingResponse.get(10, TimeUnit.SECONDS).isAcknowledged());
assertThat(docIndexResponse.get(10, TimeUnit.SECONDS), instanceOf(IndexResponse.class));
assertEquals(1, docIndexResponse.get(10, TimeUnit.SECONDS).getShardInfo().getTotal());
}
use of org.opensearch.test.disruption.BlockClusterStateProcessing in project OpenSearch by opensearch-project.
the class RareClusterStateIT method testDelayedMappingPropagationOnReplica.
public void testDelayedMappingPropagationOnReplica() throws Exception {
// This is essentially the same thing as testDelayedMappingPropagationOnPrimary
// but for replicas
// Here we want to test that everything goes well if the mappings that
// are needed for a document are not available on the replica at the
// time of indexing it
final List<String> nodeNames = internalCluster().startNodes(2);
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
final String master = internalCluster().getMasterName();
assertThat(nodeNames, hasItem(master));
String otherNode = null;
for (String node : nodeNames) {
if (node.equals(master) == false) {
otherNode = node;
break;
}
}
assertNotNull(otherNode);
// Force allocation of the primary on the master node by first only allocating on the master
// and then allowing all nodes so that the replica gets allocated on the other node
prepareCreate("index").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put("index.routing.allocation.include._name", master)).get();
client().admin().indices().prepareUpdateSettings("index").setSettings(Settings.builder().put("index.routing.allocation.include._name", "")).get();
ensureGreen();
// Check routing tables
ClusterState state = client().admin().cluster().prepareState().get().getState();
assertEquals(master, state.nodes().getMasterNode().getName());
List<ShardRouting> shards = state.routingTable().allShards("index");
assertThat(shards, hasSize(2));
for (ShardRouting shard : shards) {
if (shard.primary()) {
// primary must be on the master
assertEquals(state.nodes().getMasterNodeId(), shard.currentNodeId());
} else {
assertTrue(shard.active());
}
}
// Block cluster state processing on the replica
BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(otherNode, random());
internalCluster().setDisruptionScheme(disruption);
disruption.startDisrupting();
final ActionFuture<AcknowledgedResponse> putMappingResponse = executeAndCancelCommittedPublication(client().admin().indices().preparePutMapping("index").setSource("field", "type=long"));
final Index index = resolveIndex("index");
// Wait for mappings to be available on master
assertBusy(() -> {
final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master);
final IndexService indexService = indicesService.indexServiceSafe(index);
assertNotNull(indexService);
final MapperService mapperService = indexService.mapperService();
DocumentMapper mapper = mapperService.documentMapper(MapperService.SINGLE_MAPPING_NAME);
assertNotNull(mapper);
assertNotNull(mapper.mappers().getMapper("field"));
});
final ActionFuture<IndexResponse> docIndexResponse = client().prepareIndex("index").setId("1").setSource("field", 42).execute();
assertBusy(() -> assertTrue(client().prepareGet("index", "1").get().isExists()));
// index another document, this time using dynamic mappings.
// The ack timeout of 0 on dynamic mapping updates makes it possible for the document to be indexed on the primary, even
// if the dynamic mapping update is not applied on the replica yet.
// this request does not change the cluster state, because the mapping is dynamic,
// we need to await and cancel committed publication
ActionFuture<IndexResponse> dynamicMappingsFut = executeAndCancelCommittedPublication(client().prepareIndex("index").setId("2").setSource("field2", 42));
// ...and wait for second mapping to be available on master
assertBusy(() -> {
final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master);
final IndexService indexService = indicesService.indexServiceSafe(index);
assertNotNull(indexService);
final MapperService mapperService = indexService.mapperService();
DocumentMapper mapper = mapperService.documentMapper(MapperService.SINGLE_MAPPING_NAME);
assertNotNull(mapper);
assertNotNull(mapper.mappers().getMapper("field2"));
});
assertBusy(() -> assertTrue(client().prepareGet("index", "2").get().isExists()));
// The mappings have not been propagated to the replica yet as a consequence the document count not be indexed
// We wait on purpose to make sure that the document is not indexed because the shard operation is stalled
// and not just because it takes time to replicate the indexing request to the replica
Thread.sleep(100);
assertFalse(putMappingResponse.isDone());
assertFalse(docIndexResponse.isDone());
// Now make sure the indexing request finishes successfully
disruption.stopDisrupting();
assertTrue(putMappingResponse.get(10, TimeUnit.SECONDS).isAcknowledged());
assertThat(docIndexResponse.get(10, TimeUnit.SECONDS), instanceOf(IndexResponse.class));
// both shards should have succeeded
assertEquals(2, docIndexResponse.get(10, TimeUnit.SECONDS).getShardInfo().getTotal());
assertThat(dynamicMappingsFut.get(10, TimeUnit.SECONDS).getResult(), equalTo(CREATED));
}
use of org.opensearch.test.disruption.BlockClusterStateProcessing in project OpenSearch by opensearch-project.
the class RareClusterStateIT method testDeleteCreateInOneBulk.
public void testDeleteCreateInOneBulk() throws Exception {
internalCluster().startMasterOnlyNode();
String dataNode = internalCluster().startDataOnlyNode();
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
prepareCreate("test").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)).addMapping(MapperService.SINGLE_MAPPING_NAME).get();
ensureGreen("test");
// block none master node.
BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(dataNode, random());
internalCluster().setDisruptionScheme(disruption);
logger.info("--> indexing a doc");
index("test", MapperService.SINGLE_MAPPING_NAME, "1");
refresh();
disruption.startDisrupting();
logger.info("--> delete index and recreate it");
executeAndCancelCommittedPublication(client().admin().indices().prepareDelete("test").setTimeout("0s")).get(10, TimeUnit.SECONDS);
executeAndCancelCommittedPublication(prepareCreate("test").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).put(IndexMetadata.SETTING_WAIT_FOR_ACTIVE_SHARDS.getKey(), "0")).setTimeout("0s")).get(10, TimeUnit.SECONDS);
logger.info("--> letting cluster proceed");
disruption.stopDisrupting();
ensureGreen(TimeValue.timeValueMinutes(30), "test");
// due to publish_timeout of 0, wait for data node to have cluster state fully applied
assertBusy(() -> {
long masterClusterStateVersion = internalCluster().clusterService(internalCluster().getMasterName()).state().version();
long dataClusterStateVersion = internalCluster().clusterService(dataNode).state().version();
assertThat(masterClusterStateVersion, equalTo(dataClusterStateVersion));
});
assertHitCount(client().prepareSearch("test").get(), 0);
}
use of org.opensearch.test.disruption.BlockClusterStateProcessing in project OpenSearch by opensearch-project.
the class IndicesStoreIntegrationIT method testIndexCleanup.
public void testIndexCleanup() throws Exception {
internalCluster().startNode(nonDataNode());
final String node_1 = internalCluster().startNode(nonMasterNode());
final String node_2 = internalCluster().startNode(nonMasterNode());
logger.info("--> creating index [test] with one shard and on replica");
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(indexSettings()).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)));
ensureGreen("test");
ClusterState state = client().admin().cluster().prepareState().get().getState();
Index index = state.metadata().index("test").getIndex();
logger.info("--> making sure that shard and its replica are allocated on node_1 and node_2");
assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(true));
assertThat(Files.exists(indexDirectory(node_1, index)), equalTo(true));
assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
logger.info("--> starting node server3");
final String node_3 = internalCluster().startNode(nonMasterNode());
logger.info("--> running cluster_health");
ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForNodes("4").setWaitForNoRelocatingShards(true).get();
assertThat(clusterHealth.isTimedOut(), equalTo(false));
assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(true));
assertThat(Files.exists(indexDirectory(node_1, index)), equalTo(true));
assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
assertThat(Files.exists(shardDirectory(node_3, index, 0)), equalTo(false));
assertThat(Files.exists(indexDirectory(node_3, index)), equalTo(false));
logger.info("--> move shard from node_1 to node_3, and wait for relocation to finish");
if (randomBoolean()) {
// sometimes add cluster-state delay to trigger observers in IndicesStore.ShardActiveRequestHandler
BlockClusterStateProcessing disruption = relocateAndBlockCompletion(logger, "test", 0, node_1, node_3);
// wait a little so that cluster state observer is registered
sleep(50);
logger.info("--> stopping disruption");
disruption.stopDisrupting();
} else {
internalCluster().client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, node_1, node_3)).get();
}
clusterHealth = client().admin().cluster().prepareHealth().setWaitForNoRelocatingShards(true).get();
assertThat(clusterHealth.isTimedOut(), equalTo(false));
assertShardDeleted(node_1, index, 0);
assertIndexDeleted(node_1, index);
assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
assertThat(Files.exists(shardDirectory(node_3, index, 0)), equalTo(true));
assertThat(Files.exists(indexDirectory(node_3, index)), equalTo(true));
}
use of org.opensearch.test.disruption.BlockClusterStateProcessing in project OpenSearch by opensearch-project.
the class IndicesStoreIntegrationIT method relocateAndBlockCompletion.
/**
* relocate a shard and block cluster state processing on the relocation target node to activate the shard
*/
public static BlockClusterStateProcessing relocateAndBlockCompletion(Logger logger, String index, int shard, String nodeFrom, String nodeTo) throws InterruptedException {
BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(nodeTo, random());
internalCluster().setDisruptionScheme(disruption);
MockTransportService transportService = (MockTransportService) internalCluster().getInstance(TransportService.class, nodeTo);
CountDownLatch beginRelocationLatch = new CountDownLatch(1);
CountDownLatch receivedShardExistsRequestLatch = new CountDownLatch(1);
// use a tracer on the target node to track relocation start and end
transportService.addMessageListener(new TransportMessageListener() {
@Override
public void onRequestReceived(long requestId, String action) {
if (action.equals(PeerRecoveryTargetService.Actions.FILES_INFO)) {
logger.info("received: {}, relocation starts", action);
beginRelocationLatch.countDown();
} else if (action.equals(IndicesStore.ACTION_SHARD_EXISTS)) {
// Whenever a node deletes a shard because it was relocated somewhere else, it first
// checks if enough other copies are started somewhere else. The node sends a ShardActiveRequest
// to the other nodes that should have a copy according to cluster state.
receivedShardExistsRequestLatch.countDown();
logger.info("received: {}, relocation done", action);
}
}
});
internalCluster().client().admin().cluster().prepareReroute().add(new MoveAllocationCommand(index, shard, nodeFrom, nodeTo)).get();
logger.info("--> waiting for relocation to start");
beginRelocationLatch.await();
logger.info("--> starting disruption");
disruption.startDisrupting();
logger.info("--> waiting for relocation to finish");
receivedShardExistsRequestLatch.await();
logger.info("--> relocation completed (but cluster state processing block still in place)");
return disruption;
}
Aggregations