use of org.elasticsearch.test.disruption.BlockClusterStateProcessing in project elasticsearch by elastic.
the class RareClusterStateIT method testDeleteCreateInOneBulk.
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/14932")
public void testDeleteCreateInOneBulk() throws Exception {
internalCluster().startNodes(2);
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
prepareCreate("test").setSettings(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, true).addMapping("type").get();
ensureGreen("test");
// now that the cluster is stable, remove publishing timeout
assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "0")));
Set<String> nodes = new HashSet<>(Arrays.asList(internalCluster().getNodeNames()));
nodes.remove(internalCluster().getMasterName());
// block none master node.
BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(nodes.iterator().next(), random());
internalCluster().setDisruptionScheme(disruption);
logger.info("--> indexing a doc");
index("test", "type", "1");
refresh();
disruption.startDisrupting();
logger.info("--> delete index and recreate it");
assertFalse(client().admin().indices().prepareDelete("test").setTimeout("200ms").get().isAcknowledged());
assertFalse(prepareCreate("test").setTimeout("200ms").setSettings(IndexMetaData.SETTING_AUTO_EXPAND_REPLICAS, true).get().isAcknowledged());
logger.info("--> letting cluster proceed");
disruption.stopDisrupting();
ensureGreen(TimeValue.timeValueMinutes(30), "test");
assertHitCount(client().prepareSearch("test").get(), 0);
}
use of org.elasticsearch.test.disruption.BlockClusterStateProcessing in project elasticsearch by elastic.
the class RareClusterStateIT method testDelayedMappingPropagationOnPrimary.
public void testDelayedMappingPropagationOnPrimary() throws Exception {
// Here we want to test that things go well if there is a first request
// that adds mappings but before mappings are propagated to all nodes
// another index request introduces the same mapping. The master node
// will reply immediately since it did not change the cluster state
// but the change might not be on the node that performed the indexing
// operation yet
Settings settings = Settings.builder().put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), // explicitly set so it won't default to publish timeout
"30s").put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), // don't wait post commit as we are blocking things by design
"0s").build();
final List<String> nodeNames = internalCluster().startNodes(2, settings);
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
final String master = internalCluster().getMasterName();
assertThat(nodeNames, hasItem(master));
String otherNode = null;
for (String node : nodeNames) {
if (node.equals(master) == false) {
otherNode = node;
break;
}
}
assertNotNull(otherNode);
// Don't allocate the shard on the master node
assertAcked(prepareCreate("index").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0).put("index.routing.allocation.exclude._name", master)).get());
ensureGreen();
// Check routing tables
ClusterState state = client().admin().cluster().prepareState().get().getState();
assertEquals(master, state.nodes().getMasterNode().getName());
List<ShardRouting> shards = state.routingTable().allShards("index");
assertThat(shards, hasSize(1));
for (ShardRouting shard : shards) {
if (shard.primary()) {
// primary must not be on the master node
assertFalse(state.nodes().getMasterNodeId().equals(shard.currentNodeId()));
} else {
// only primaries
fail();
}
}
// Block cluster state processing where our shard is
BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(otherNode, random());
internalCluster().setDisruptionScheme(disruption);
disruption.startDisrupting();
// Add a new mapping...
final AtomicReference<Object> putMappingResponse = new AtomicReference<>();
client().admin().indices().preparePutMapping("index").setType("type").setSource("field", "type=long").execute(new ActionListener<PutMappingResponse>() {
@Override
public void onResponse(PutMappingResponse response) {
putMappingResponse.set(response);
}
@Override
public void onFailure(Exception e) {
putMappingResponse.set(e);
}
});
// ...and wait for mappings to be available on master
assertBusy(new Runnable() {
@Override
public void run() {
ImmutableOpenMap<String, MappingMetaData> indexMappings = client().admin().indices().prepareGetMappings("index").get().getMappings().get("index");
assertNotNull(indexMappings);
MappingMetaData typeMappings = indexMappings.get("type");
assertNotNull(typeMappings);
Object properties;
try {
properties = typeMappings.getSourceAsMap().get("properties");
} catch (IOException e) {
throw new AssertionError(e);
}
assertNotNull(properties);
Object fieldMapping = ((Map<String, Object>) properties).get("field");
assertNotNull(fieldMapping);
}
});
final AtomicReference<Object> docIndexResponse = new AtomicReference<>();
client().prepareIndex("index", "type", "1").setSource("field", 42).execute(new ActionListener<IndexResponse>() {
@Override
public void onResponse(IndexResponse response) {
docIndexResponse.set(response);
}
@Override
public void onFailure(Exception e) {
docIndexResponse.set(e);
}
});
// Wait a bit to make sure that the reason why we did not get a response
// is that cluster state processing is blocked and not just that it takes
// time to process the indexing request
Thread.sleep(100);
assertThat(putMappingResponse.get(), equalTo(null));
assertThat(docIndexResponse.get(), equalTo(null));
// Now make sure the indexing request finishes successfully
disruption.stopDisrupting();
assertBusy(new Runnable() {
@Override
public void run() {
assertThat(putMappingResponse.get(), instanceOf(PutMappingResponse.class));
PutMappingResponse resp = (PutMappingResponse) putMappingResponse.get();
assertTrue(resp.isAcknowledged());
assertThat(docIndexResponse.get(), instanceOf(IndexResponse.class));
IndexResponse docResp = (IndexResponse) docIndexResponse.get();
assertEquals(Arrays.toString(docResp.getShardInfo().getFailures()), 1, docResp.getShardInfo().getTotal());
}
});
}
use of org.elasticsearch.test.disruption.BlockClusterStateProcessing in project elasticsearch by elastic.
the class IndicesStoreIntegrationIT method testIndexCleanup.
public void testIndexCleanup() throws Exception {
final String masterNode = internalCluster().startNode(Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false));
final String node_1 = internalCluster().startNode(Settings.builder().put(Node.NODE_MASTER_SETTING.getKey(), false));
final String node_2 = internalCluster().startNode(Settings.builder().put(Node.NODE_MASTER_SETTING.getKey(), false));
logger.info("--> creating index [test] with one shard and on replica");
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(indexSettings()).put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)));
ensureGreen("test");
ClusterState state = client().admin().cluster().prepareState().get().getState();
Index index = state.metaData().index("test").getIndex();
logger.info("--> making sure that shard and its replica are allocated on node_1 and node_2");
assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(true));
assertThat(Files.exists(indexDirectory(node_1, index)), equalTo(true));
assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
logger.info("--> starting node server3");
final String node_3 = internalCluster().startNode(Settings.builder().put(Node.NODE_MASTER_SETTING.getKey(), false));
logger.info("--> running cluster_health");
ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForNodes("4").setWaitForNoRelocatingShards(true).get();
assertThat(clusterHealth.isTimedOut(), equalTo(false));
assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(true));
assertThat(Files.exists(indexDirectory(node_1, index)), equalTo(true));
assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
assertThat(Files.exists(shardDirectory(node_3, index, 0)), equalTo(false));
assertThat(Files.exists(indexDirectory(node_3, index)), equalTo(false));
logger.info("--> move shard from node_1 to node_3, and wait for relocation to finish");
if (randomBoolean()) {
// sometimes add cluster-state delay to trigger observers in IndicesStore.ShardActiveRequestHandler
BlockClusterStateProcessing disruption = relocateAndBlockCompletion(logger, "test", 0, node_1, node_3);
// wait a little so that cluster state observer is registered
sleep(50);
logger.info("--> stopping disruption");
disruption.stopDisrupting();
} else {
internalCluster().client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, node_1, node_3)).get();
}
clusterHealth = client().admin().cluster().prepareHealth().setWaitForNoRelocatingShards(true).get();
assertThat(clusterHealth.isTimedOut(), equalTo(false));
assertThat(waitForShardDeletion(node_1, index, 0), equalTo(false));
assertThat(waitForIndexDeletion(node_1, index), equalTo(false));
assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
assertThat(Files.exists(shardDirectory(node_3, index, 0)), equalTo(true));
assertThat(Files.exists(indexDirectory(node_3, index)), equalTo(true));
}
use of org.elasticsearch.test.disruption.BlockClusterStateProcessing in project elasticsearch by elastic.
the class IndicesStoreIntegrationIT method relocateAndBlockCompletion.
/**
* relocate a shard and block cluster state processing on the relocation target node to activate the shard
*/
public static BlockClusterStateProcessing relocateAndBlockCompletion(Logger logger, String index, int shard, String nodeFrom, String nodeTo) throws InterruptedException {
BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(nodeTo, random());
internalCluster().setDisruptionScheme(disruption);
MockTransportService transportService = (MockTransportService) internalCluster().getInstance(TransportService.class, nodeTo);
ClusterService clusterService = internalCluster().getInstance(ClusterService.class, nodeTo);
CountDownLatch beginRelocationLatch = new CountDownLatch(1);
CountDownLatch receivedShardExistsRequestLatch = new CountDownLatch(1);
// use a tracer on the target node to track relocation start and end
transportService.addTracer(new MockTransportService.Tracer() {
@Override
public void receivedRequest(long requestId, String action) {
if (action.equals(PeerRecoveryTargetService.Actions.FILES_INFO)) {
logger.info("received: {}, relocation starts", action);
beginRelocationLatch.countDown();
} else if (action.equals(IndicesStore.ACTION_SHARD_EXISTS)) {
// Whenever a node deletes a shard because it was relocated somewhere else, it first
// checks if enough other copies are started somewhere else. The node sends a ShardActiveRequest
// to the other nodes that should have a copy according to cluster state.
receivedShardExistsRequestLatch.countDown();
logger.info("received: {}, relocation done", action);
} else if (action.equals(PeerRecoveryTargetService.Actions.WAIT_CLUSTERSTATE)) {
logger.info("received: {}, waiting on cluster state", action);
// a race with the BlockClusterStateProcessing block that is added below.
try {
assertBusy(() -> assertTrue(clusterService.state().routingTable().index(index).shard(shard).primaryShard().relocating()));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
});
internalCluster().client().admin().cluster().prepareReroute().add(new MoveAllocationCommand(index, shard, nodeFrom, nodeTo)).get();
logger.info("--> waiting for relocation to start");
beginRelocationLatch.await();
logger.info("--> starting disruption");
disruption.startDisrupting();
logger.info("--> waiting for relocation to finish");
receivedShardExistsRequestLatch.await();
logger.info("--> relocation completed (but cluster state processing block still in place)");
return disruption;
}
use of org.elasticsearch.test.disruption.BlockClusterStateProcessing in project elasticsearch by elastic.
the class RareClusterStateIT method testDelayedMappingPropagationOnReplica.
public void testDelayedMappingPropagationOnReplica() throws Exception {
// This is essentially the same thing as testDelayedMappingPropagationOnPrimary
// but for replicas
// Here we want to test that everything goes well if the mappings that
// are needed for a document are not available on the replica at the
// time of indexing it
final List<String> nodeNames = internalCluster().startNodes(2, Settings.builder().put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), // explicitly set so it won't default to publish timeout
"30s").put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), // don't wait post commit as we are blocking things by design
"0s").build());
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("2").get().isTimedOut());
final String master = internalCluster().getMasterName();
assertThat(nodeNames, hasItem(master));
String otherNode = null;
for (String node : nodeNames) {
if (node.equals(master) == false) {
otherNode = node;
break;
}
}
assertNotNull(otherNode);
// Force allocation of the primary on the master node by first only allocating on the master
// and then allowing all nodes so that the replica gets allocated on the other node
assertAcked(prepareCreate("index").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1).put("index.routing.allocation.include._name", master)).get());
assertAcked(client().admin().indices().prepareUpdateSettings("index").setSettings(Settings.builder().put("index.routing.allocation.include._name", "")).get());
ensureGreen();
// Check routing tables
ClusterState state = client().admin().cluster().prepareState().get().getState();
assertEquals(master, state.nodes().getMasterNode().getName());
List<ShardRouting> shards = state.routingTable().allShards("index");
assertThat(shards, hasSize(2));
for (ShardRouting shard : shards) {
if (shard.primary()) {
// primary must be on the master
assertEquals(state.nodes().getMasterNodeId(), shard.currentNodeId());
} else {
assertTrue(shard.active());
}
}
// Block cluster state processing on the replica
BlockClusterStateProcessing disruption = new BlockClusterStateProcessing(otherNode, random());
internalCluster().setDisruptionScheme(disruption);
disruption.startDisrupting();
final AtomicReference<Object> putMappingResponse = new AtomicReference<>();
client().admin().indices().preparePutMapping("index").setType("type").setSource("field", "type=long").execute(new ActionListener<PutMappingResponse>() {
@Override
public void onResponse(PutMappingResponse response) {
putMappingResponse.set(response);
}
@Override
public void onFailure(Exception e) {
putMappingResponse.set(e);
}
});
final Index index = resolveIndex("index");
// Wait for mappings to be available on master
assertBusy(new Runnable() {
@Override
public void run() {
final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, master);
final IndexService indexService = indicesService.indexServiceSafe(index);
assertNotNull(indexService);
final MapperService mapperService = indexService.mapperService();
DocumentMapper mapper = mapperService.documentMapper("type");
assertNotNull(mapper);
assertNotNull(mapper.mappers().getMapper("field"));
}
});
final AtomicReference<Object> docIndexResponse = new AtomicReference<>();
client().prepareIndex("index", "type", "1").setSource("field", 42).execute(new ActionListener<IndexResponse>() {
@Override
public void onResponse(IndexResponse response) {
docIndexResponse.set(response);
}
@Override
public void onFailure(Exception e) {
docIndexResponse.set(e);
}
});
// Wait for document to be indexed on primary
assertBusy(new Runnable() {
@Override
public void run() {
assertTrue(client().prepareGet("index", "type", "1").setPreference("_primary").get().isExists());
}
});
// The mappings have not been propagated to the replica yet as a consequence the document count not be indexed
// We wait on purpose to make sure that the document is not indexed because the shard operation is stalled
// and not just because it takes time to replicate the indexing request to the replica
Thread.sleep(100);
assertThat(putMappingResponse.get(), equalTo(null));
assertThat(docIndexResponse.get(), equalTo(null));
// Now make sure the indexing request finishes successfully
disruption.stopDisrupting();
assertBusy(new Runnable() {
@Override
public void run() {
assertThat(putMappingResponse.get(), instanceOf(PutMappingResponse.class));
PutMappingResponse resp = (PutMappingResponse) putMappingResponse.get();
assertTrue(resp.isAcknowledged());
assertThat(docIndexResponse.get(), instanceOf(IndexResponse.class));
IndexResponse docResp = (IndexResponse) docIndexResponse.get();
assertEquals(Arrays.toString(docResp.getShardInfo().getFailures()), 2, // both shards should have succeeded
docResp.getShardInfo().getTotal());
}
});
}
Aggregations