use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.
the class GatewayIndexStateIT method testTwoNodesSingleDoc.
public void testTwoNodesSingleDoc() throws Exception {
logger.info("--> cleaning nodes");
logger.info("--> starting 2 nodes");
internalCluster().startNodes(2);
logger.info("--> indexing a simple document");
client().prepareIndex("test", "type1", "1").setSource("field1", "value1").setRefreshPolicy(IMMEDIATE).get();
logger.info("--> waiting for green status");
ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("2").execute().actionGet();
assertThat(health.isTimedOut(), equalTo(false));
logger.info("--> verify 1 doc in the index");
for (int i = 0; i < 10; i++) {
assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), 1L);
}
logger.info("--> closing test index...");
client().admin().indices().prepareClose("test").execute().actionGet();
ClusterStateResponse stateResponse = client().admin().cluster().prepareState().execute().actionGet();
assertThat(stateResponse.getState().metaData().index("test").getState(), equalTo(IndexMetaData.State.CLOSE));
assertThat(stateResponse.getState().routingTable().index("test"), nullValue());
logger.info("--> opening the index...");
client().admin().indices().prepareOpen("test").execute().actionGet();
logger.info("--> waiting for green status");
health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("2").execute().actionGet();
assertThat(health.isTimedOut(), equalTo(false));
logger.info("--> verify 1 doc in the index");
assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), 1L);
for (int i = 0; i < 10; i++) {
assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), 1L);
}
}
use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.
the class GatewayIndexStateIT method testJustMasterNode.
public void testJustMasterNode() throws Exception {
logger.info("--> cleaning nodes");
logger.info("--> starting 1 master node non data");
internalCluster().startNode(Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false).build());
logger.info("--> create an index");
client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE).execute().actionGet();
logger.info("--> closing master node");
internalCluster().closeNonSharedNodes(false);
logger.info("--> starting 1 master node non data again");
internalCluster().startNode(Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false).build());
logger.info("--> waiting for test index to be created");
ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setIndices("test").execute().actionGet();
assertThat(health.isTimedOut(), equalTo(false));
logger.info("--> verify we have an index");
ClusterStateResponse clusterStateResponse = client().admin().cluster().prepareState().setIndices("test").execute().actionGet();
assertThat(clusterStateResponse.getState().metaData().hasIndex("test"), equalTo(true));
}
use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.
the class IndexWithShadowReplicasIT method testNodeJoinsWithoutShadowReplicaConfigured.
public void testNodeJoinsWithoutShadowReplicaConfigured() throws Exception {
Path dataPath = createTempDir();
Settings nodeSettings = nodeSettings(dataPath);
internalCluster().startNodes(2, nodeSettings);
String IDX = "test";
Settings idxSettings = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2).put(IndexMetaData.SETTING_DATA_PATH, dataPath.toAbsolutePath().toString()).put(IndexMetaData.SETTING_SHADOW_REPLICAS, true).put(IndexMetaData.SETTING_SHARED_FILESYSTEM, true).build();
prepareCreate(IDX).setSettings(idxSettings).addMapping("doc", "foo", "type=text").get();
client().prepareIndex(IDX, "doc", "1").setSource("foo", "bar").get();
client().prepareIndex(IDX, "doc", "2").setSource("foo", "bar").get();
flushAndRefresh(IDX);
internalCluster().startNodes(1);
ensureYellow(IDX);
final ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).execute().actionGet();
assertThat(clusterHealth.getNumberOfNodes(), equalTo(3));
// the new node is not configured for a shadow replica index, so no shards should have been assigned to it
assertThat(clusterHealth.getStatus(), equalTo(ClusterHealthStatus.YELLOW));
}
use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.
the class CorruptedFileIT method testCorruptFileAndRecover.
/**
* Tests that we can actually recover from a corruption on the primary given that we have replica shards around.
*/
public void testCorruptFileAndRecover() throws ExecutionException, InterruptedException, IOException {
int numDocs = scaledRandomIntBetween(100, 1000);
// have enough space for 3 copies
internalCluster().ensureAtLeastNumDataNodes(3);
if (cluster().numDataNodes() == 3) {
logger.info("--> cluster has [3] data nodes, corrupted primary will be overwritten");
}
assertThat(cluster().numDataNodes(), greaterThanOrEqualTo(3));
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, "1").put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1").put(MergePolicyConfig.INDEX_MERGE_ENABLED, false).put(MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), // no checkindex - we corrupt shards on purpose
false).put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), // no translog based flush - it might change the .liv / segments.N files
new ByteSizeValue(1, ByteSizeUnit.PB))));
ensureGreen();
disableAllocation("test");
IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex("test", "type").setSource("field", "value");
}
indexRandom(true, builders);
ensureGreen();
assertAllSuccessful(client().admin().indices().prepareFlush().setForce(true).execute().actionGet());
// we have to flush at least once here since we don't corrupt the translog
SearchResponse countResponse = client().prepareSearch().setSize(0).get();
assertHitCount(countResponse, numDocs);
final int numShards = numShards("test");
ShardRouting corruptedShardRouting = corruptRandomPrimaryFile();
logger.info("--> {} corrupted", corruptedShardRouting);
enableAllocation("test");
/*
* we corrupted the primary shard - now lets make sure we never recover from it successfully
*/
Settings build = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "2").build();
client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
ClusterHealthResponse health = client().admin().cluster().health(Requests.clusterHealthRequest("test").waitForGreenStatus().timeout(// sometimes due to cluster rebalacing and random settings default timeout is just not enough.
"5m").waitForNoRelocatingShards(true)).actionGet();
if (health.isTimedOut()) {
logger.info("cluster state:\n{}\n{}", client().admin().cluster().prepareState().get().getState(), client().admin().cluster().preparePendingClusterTasks().get());
assertThat("timed out waiting for green state", health.isTimedOut(), equalTo(false));
}
assertThat(health.getStatus(), equalTo(ClusterHealthStatus.GREEN));
final int numIterations = scaledRandomIntBetween(5, 20);
for (int i = 0; i < numIterations; i++) {
SearchResponse response = client().prepareSearch().setSize(numDocs).get();
assertHitCount(response, numDocs);
}
/*
* now hook into the IndicesService and register a close listener to
* run the checkindex. if the corruption is still there we will catch it.
*/
// primary + 2 replicas
final CountDownLatch latch = new CountDownLatch(numShards * 3);
final CopyOnWriteArrayList<Exception> exception = new CopyOnWriteArrayList<>();
final IndexEventListener listener = new IndexEventListener() {
@Override
public void afterIndexShardClosed(ShardId sid, @Nullable IndexShard indexShard, Settings indexSettings) {
if (indexShard != null) {
Store store = indexShard.store();
store.incRef();
try {
if (!Lucene.indexExists(store.directory()) && indexShard.state() == IndexShardState.STARTED) {
return;
}
try (CheckIndex checkIndex = new CheckIndex(store.directory())) {
BytesStreamOutput os = new BytesStreamOutput();
PrintStream out = new PrintStream(os, false, StandardCharsets.UTF_8.name());
checkIndex.setInfoStream(out);
out.flush();
CheckIndex.Status status = checkIndex.checkIndex();
if (!status.clean) {
logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
throw new IOException("index check failure");
}
}
} catch (Exception e) {
exception.add(e);
} finally {
store.decRef();
latch.countDown();
}
}
}
};
for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) {
eventListener.setNewDelegate(listener);
}
try {
client().admin().indices().prepareDelete("test").get();
latch.await();
assertThat(exception, empty());
} finally {
for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) {
eventListener.setNewDelegate(null);
}
}
}
use of org.graylog.shaded.elasticsearch7.org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.
the class CorruptedFileIT method testCorruptPrimaryNoReplica.
/**
* Tests corruption that happens on a single shard when no replicas are present. We make sure that the primary stays unassigned
* and all other replicas for the healthy shards happens
*/
public void testCorruptPrimaryNoReplica() throws ExecutionException, InterruptedException, IOException {
int numDocs = scaledRandomIntBetween(100, 1000);
internalCluster().ensureAtLeastNumDataNodes(2);
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "0").put(MergePolicyConfig.INDEX_MERGE_ENABLED, false).put(MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), // no checkindex - we corrupt shards on purpose
false).put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), // no translog based flush - it might change the .liv / segments.N files
new ByteSizeValue(1, ByteSizeUnit.PB))));
ensureGreen();
IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex("test", "type").setSource("field", "value");
}
indexRandom(true, builders);
ensureGreen();
assertAllSuccessful(client().admin().indices().prepareFlush().setForce(true).execute().actionGet());
// we have to flush at least once here since we don't corrupt the translog
SearchResponse countResponse = client().prepareSearch().setSize(0).get();
assertHitCount(countResponse, numDocs);
ShardRouting shardRouting = corruptRandomPrimaryFile();
/*
* we corrupted the primary shard - now lets make sure we never recover from it successfully
*/
Settings build = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1").build();
client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
client().admin().cluster().prepareReroute().get();
boolean didClusterTurnRed = awaitBusy(() -> {
ClusterHealthStatus test = client().admin().cluster().health(Requests.clusterHealthRequest("test")).actionGet().getStatus();
return test == ClusterHealthStatus.RED;
}, 5, // sometimes on slow nodes the replication / recovery is just dead slow
TimeUnit.MINUTES);
final ClusterHealthResponse response = client().admin().cluster().health(Requests.clusterHealthRequest("test")).get();
if (response.getStatus() != ClusterHealthStatus.RED) {
logger.info("Cluster turned red in busy loop: {}", didClusterTurnRed);
logger.info("cluster state:\n{}\n{}", client().admin().cluster().prepareState().get().getState(), client().admin().cluster().preparePendingClusterTasks().get());
}
assertThat(response.getStatus(), is(ClusterHealthStatus.RED));
ClusterState state = client().admin().cluster().prepareState().get().getState();
GroupShardsIterator shardIterators = state.getRoutingTable().activePrimaryShardsGrouped(new String[] { "test" }, false);
for (ShardIterator iterator : shardIterators) {
ShardRouting routing;
while ((routing = iterator.nextOrNull()) != null) {
if (routing.getId() == shardRouting.getId()) {
assertThat(routing.state(), equalTo(ShardRoutingState.UNASSIGNED));
} else {
assertThat(routing.state(), anyOf(equalTo(ShardRoutingState.RELOCATING), equalTo(ShardRoutingState.STARTED)));
}
}
}
final List<Path> files = listShardFiles(shardRouting);
Path corruptedFile = null;
for (Path file : files) {
if (file.getFileName().toString().startsWith("corrupted_")) {
corruptedFile = file;
break;
}
}
assertThat(corruptedFile, notNullValue());
}
Aggregations