Search in sources :

Example 26 with ClusterHealthResponse

use of org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.

the class GatewayIndexStateIT method testTwoNodesSingleDoc.

public void testTwoNodesSingleDoc() throws Exception {
    logger.info("--> cleaning nodes");
    logger.info("--> starting 2 nodes");
    internalCluster().startNodes(2);
    logger.info("--> indexing a simple document");
    client().prepareIndex("test", "type1", "1").setSource("field1", "value1").setRefreshPolicy(IMMEDIATE).get();
    logger.info("--> waiting for green status");
    ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("2").execute().actionGet();
    assertThat(health.isTimedOut(), equalTo(false));
    logger.info("--> verify 1 doc in the index");
    for (int i = 0; i < 10; i++) {
        assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), 1L);
    }
    logger.info("--> closing test index...");
    client().admin().indices().prepareClose("test").execute().actionGet();
    ClusterStateResponse stateResponse = client().admin().cluster().prepareState().execute().actionGet();
    assertThat(stateResponse.getState().metaData().index("test").getState(), equalTo(IndexMetaData.State.CLOSE));
    assertThat(stateResponse.getState().routingTable().index("test"), nullValue());
    logger.info("--> opening the index...");
    client().admin().indices().prepareOpen("test").execute().actionGet();
    logger.info("--> waiting for green status");
    health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForGreenStatus().setWaitForNodes("2").execute().actionGet();
    assertThat(health.isTimedOut(), equalTo(false));
    logger.info("--> verify 1 doc in the index");
    assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), 1L);
    for (int i = 0; i < 10; i++) {
        assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), 1L);
    }
}
Also used : ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) ClusterStateResponse(org.elasticsearch.action.admin.cluster.state.ClusterStateResponse)

Example 27 with ClusterHealthResponse

use of org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.

the class GatewayIndexStateIT method testJustMasterNode.

public void testJustMasterNode() throws Exception {
    logger.info("--> cleaning nodes");
    logger.info("--> starting 1 master node non data");
    internalCluster().startNode(Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false).build());
    logger.info("--> create an index");
    client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE).execute().actionGet();
    logger.info("--> closing master node");
    internalCluster().closeNonSharedNodes(false);
    logger.info("--> starting 1 master node non data again");
    internalCluster().startNode(Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false).build());
    logger.info("--> waiting for test index to be created");
    ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setIndices("test").execute().actionGet();
    assertThat(health.isTimedOut(), equalTo(false));
    logger.info("--> verify we have an index");
    ClusterStateResponse clusterStateResponse = client().admin().cluster().prepareState().setIndices("test").execute().actionGet();
    assertThat(clusterStateResponse.getState().metaData().hasIndex("test"), equalTo(true));
}
Also used : ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) ClusterStateResponse(org.elasticsearch.action.admin.cluster.state.ClusterStateResponse)

Example 28 with ClusterHealthResponse

use of org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.

the class IndexWithShadowReplicasIT method testNodeJoinsWithoutShadowReplicaConfigured.

public void testNodeJoinsWithoutShadowReplicaConfigured() throws Exception {
    Path dataPath = createTempDir();
    Settings nodeSettings = nodeSettings(dataPath);
    internalCluster().startNodes(2, nodeSettings);
    String IDX = "test";
    Settings idxSettings = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2).put(IndexMetaData.SETTING_DATA_PATH, dataPath.toAbsolutePath().toString()).put(IndexMetaData.SETTING_SHADOW_REPLICAS, true).put(IndexMetaData.SETTING_SHARED_FILESYSTEM, true).build();
    prepareCreate(IDX).setSettings(idxSettings).addMapping("doc", "foo", "type=text").get();
    client().prepareIndex(IDX, "doc", "1").setSource("foo", "bar").get();
    client().prepareIndex(IDX, "doc", "2").setSource("foo", "bar").get();
    flushAndRefresh(IDX);
    internalCluster().startNodes(1);
    ensureYellow(IDX);
    final ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).execute().actionGet();
    assertThat(clusterHealth.getNumberOfNodes(), equalTo(3));
    // the new node is not configured for a shadow replica index, so no shards should have been assigned to it
    assertThat(clusterHealth.getStatus(), equalTo(ClusterHealthStatus.YELLOW));
}
Also used : Path(java.nio.file.Path) ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) Settings(org.elasticsearch.common.settings.Settings)

Example 29 with ClusterHealthResponse

use of org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.

the class CorruptedFileIT method testCorruptFileAndRecover.

/**
     * Tests that we can actually recover from a corruption on the primary given that we have replica shards around.
     */
public void testCorruptFileAndRecover() throws ExecutionException, InterruptedException, IOException {
    int numDocs = scaledRandomIntBetween(100, 1000);
    // have enough space for 3 copies
    internalCluster().ensureAtLeastNumDataNodes(3);
    if (cluster().numDataNodes() == 3) {
        logger.info("--> cluster has [3] data nodes, corrupted primary will be overwritten");
    }
    assertThat(cluster().numDataNodes(), greaterThanOrEqualTo(3));
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, "1").put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1").put(MergePolicyConfig.INDEX_MERGE_ENABLED, false).put(MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), // no checkindex - we corrupt shards on purpose
    false).put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), // no translog based flush - it might change the .liv / segments.N files
    new ByteSizeValue(1, ByteSizeUnit.PB))));
    ensureGreen();
    disableAllocation("test");
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
    for (int i = 0; i < builders.length; i++) {
        builders[i] = client().prepareIndex("test", "type").setSource("field", "value");
    }
    indexRandom(true, builders);
    ensureGreen();
    assertAllSuccessful(client().admin().indices().prepareFlush().setForce(true).execute().actionGet());
    // we have to flush at least once here since we don't corrupt the translog
    SearchResponse countResponse = client().prepareSearch().setSize(0).get();
    assertHitCount(countResponse, numDocs);
    final int numShards = numShards("test");
    ShardRouting corruptedShardRouting = corruptRandomPrimaryFile();
    logger.info("--> {} corrupted", corruptedShardRouting);
    enableAllocation("test");
    /*
         * we corrupted the primary shard - now lets make sure we never recover from it successfully
         */
    Settings build = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "2").build();
    client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
    ClusterHealthResponse health = client().admin().cluster().health(Requests.clusterHealthRequest("test").waitForGreenStatus().timeout(// sometimes due to cluster rebalacing and random settings default timeout is just not enough.
    "5m").waitForNoRelocatingShards(true)).actionGet();
    if (health.isTimedOut()) {
        logger.info("cluster state:\n{}\n{}", client().admin().cluster().prepareState().get().getState(), client().admin().cluster().preparePendingClusterTasks().get());
        assertThat("timed out waiting for green state", health.isTimedOut(), equalTo(false));
    }
    assertThat(health.getStatus(), equalTo(ClusterHealthStatus.GREEN));
    final int numIterations = scaledRandomIntBetween(5, 20);
    for (int i = 0; i < numIterations; i++) {
        SearchResponse response = client().prepareSearch().setSize(numDocs).get();
        assertHitCount(response, numDocs);
    }
    /*
         * now hook into the IndicesService and register a close listener to
         * run the checkindex. if the corruption is still there we will catch it.
         */
    // primary + 2 replicas
    final CountDownLatch latch = new CountDownLatch(numShards * 3);
    final CopyOnWriteArrayList<Exception> exception = new CopyOnWriteArrayList<>();
    final IndexEventListener listener = new IndexEventListener() {

        @Override
        public void afterIndexShardClosed(ShardId sid, @Nullable IndexShard indexShard, Settings indexSettings) {
            if (indexShard != null) {
                Store store = indexShard.store();
                store.incRef();
                try {
                    if (!Lucene.indexExists(store.directory()) && indexShard.state() == IndexShardState.STARTED) {
                        return;
                    }
                    try (CheckIndex checkIndex = new CheckIndex(store.directory())) {
                        BytesStreamOutput os = new BytesStreamOutput();
                        PrintStream out = new PrintStream(os, false, StandardCharsets.UTF_8.name());
                        checkIndex.setInfoStream(out);
                        out.flush();
                        CheckIndex.Status status = checkIndex.checkIndex();
                        if (!status.clean) {
                            logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
                            throw new IOException("index check failure");
                        }
                    }
                } catch (Exception e) {
                    exception.add(e);
                } finally {
                    store.decRef();
                    latch.countDown();
                }
            }
        }
    };
    for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) {
        eventListener.setNewDelegate(listener);
    }
    try {
        client().admin().indices().prepareDelete("test").get();
        latch.await();
        assertThat(exception, empty());
    } finally {
        for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) {
            eventListener.setNewDelegate(null);
        }
    }
}
Also used : MockIndexEventListener(org.elasticsearch.test.MockIndexEventListener) PrintStream(java.io.PrintStream) ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) IndexShard(org.elasticsearch.index.shard.IndexShard) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) MockFSIndexStore(org.elasticsearch.test.store.MockFSIndexStore) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) TransportException(org.elasticsearch.transport.TransportException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) BytesStreamOutput(org.elasticsearch.common.io.stream.BytesStreamOutput) SearchResponse(org.elasticsearch.action.search.SearchResponse) IndexRequestBuilder(org.elasticsearch.action.index.IndexRequestBuilder) ShardId(org.elasticsearch.index.shard.ShardId) MockIndexEventListener(org.elasticsearch.test.MockIndexEventListener) IndexEventListener(org.elasticsearch.index.shard.IndexEventListener) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings) Nullable(org.elasticsearch.common.Nullable) CheckIndex(org.apache.lucene.index.CheckIndex) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 30 with ClusterHealthResponse

use of org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.

the class CorruptedFileIT method testCorruptPrimaryNoReplica.

/**
     * Tests corruption that happens on a single shard when no replicas are present. We make sure that the primary stays unassigned
     * and all other replicas for the healthy shards happens
     */
public void testCorruptPrimaryNoReplica() throws ExecutionException, InterruptedException, IOException {
    int numDocs = scaledRandomIntBetween(100, 1000);
    internalCluster().ensureAtLeastNumDataNodes(2);
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "0").put(MergePolicyConfig.INDEX_MERGE_ENABLED, false).put(MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), // no checkindex - we corrupt shards on purpose
    false).put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), // no translog based flush - it might change the .liv / segments.N files
    new ByteSizeValue(1, ByteSizeUnit.PB))));
    ensureGreen();
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
    for (int i = 0; i < builders.length; i++) {
        builders[i] = client().prepareIndex("test", "type").setSource("field", "value");
    }
    indexRandom(true, builders);
    ensureGreen();
    assertAllSuccessful(client().admin().indices().prepareFlush().setForce(true).execute().actionGet());
    // we have to flush at least once here since we don't corrupt the translog
    SearchResponse countResponse = client().prepareSearch().setSize(0).get();
    assertHitCount(countResponse, numDocs);
    ShardRouting shardRouting = corruptRandomPrimaryFile();
    /*
         * we corrupted the primary shard - now lets make sure we never recover from it successfully
         */
    Settings build = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1").build();
    client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
    client().admin().cluster().prepareReroute().get();
    boolean didClusterTurnRed = awaitBusy(() -> {
        ClusterHealthStatus test = client().admin().cluster().health(Requests.clusterHealthRequest("test")).actionGet().getStatus();
        return test == ClusterHealthStatus.RED;
    }, 5, // sometimes on slow nodes the replication / recovery is just dead slow
    TimeUnit.MINUTES);
    final ClusterHealthResponse response = client().admin().cluster().health(Requests.clusterHealthRequest("test")).get();
    if (response.getStatus() != ClusterHealthStatus.RED) {
        logger.info("Cluster turned red in busy loop: {}", didClusterTurnRed);
        logger.info("cluster state:\n{}\n{}", client().admin().cluster().prepareState().get().getState(), client().admin().cluster().preparePendingClusterTasks().get());
    }
    assertThat(response.getStatus(), is(ClusterHealthStatus.RED));
    ClusterState state = client().admin().cluster().prepareState().get().getState();
    GroupShardsIterator shardIterators = state.getRoutingTable().activePrimaryShardsGrouped(new String[] { "test" }, false);
    for (ShardIterator iterator : shardIterators) {
        ShardRouting routing;
        while ((routing = iterator.nextOrNull()) != null) {
            if (routing.getId() == shardRouting.getId()) {
                assertThat(routing.state(), equalTo(ShardRoutingState.UNASSIGNED));
            } else {
                assertThat(routing.state(), anyOf(equalTo(ShardRoutingState.RELOCATING), equalTo(ShardRoutingState.STARTED)));
            }
        }
    }
    final List<Path> files = listShardFiles(shardRouting);
    Path corruptedFile = null;
    for (Path file : files) {
        if (file.getFileName().toString().startsWith("corrupted_")) {
            corruptedFile = file;
            break;
        }
    }
    assertThat(corruptedFile, notNullValue());
}
Also used : Path(java.nio.file.Path) ClusterState(org.elasticsearch.cluster.ClusterState) ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) SearchResponse(org.elasticsearch.action.search.SearchResponse) IndexRequestBuilder(org.elasticsearch.action.index.IndexRequestBuilder) ClusterHealthStatus(org.elasticsearch.cluster.health.ClusterHealthStatus) GroupShardsIterator(org.elasticsearch.cluster.routing.GroupShardsIterator) ShardIterator(org.elasticsearch.cluster.routing.ShardIterator) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings)

Aggregations

ClusterHealthResponse (org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse)93 Settings (org.elasticsearch.common.settings.Settings)25 Client (org.elasticsearch.client.Client)22 ClusterState (org.elasticsearch.cluster.ClusterState)16 IOException (java.io.IOException)11 SearchResponse (org.elasticsearch.action.search.SearchResponse)10 MoveAllocationCommand (org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand)10 CloseIndexResponse (org.elasticsearch.action.admin.indices.close.CloseIndexResponse)9 OpenIndexResponse (org.elasticsearch.action.admin.indices.open.OpenIndexResponse)9 ClusterHealthRequest (org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest)8 ClusterStateResponse (org.elasticsearch.action.admin.cluster.state.ClusterStateResponse)7 IndexRequestBuilder (org.elasticsearch.action.index.IndexRequestBuilder)7 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)7 Index (org.elasticsearch.index.Index)7 ArrayList (java.util.ArrayList)6 CountDownLatch (java.util.concurrent.CountDownLatch)6 Path (java.nio.file.Path)5 ExecutionException (java.util.concurrent.ExecutionException)5 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)4 NodesInfoResponse (org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse)4