Search in sources :

Example 16 with ClusterHealthResponse

use of org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.

the class CorruptedFileIT method testCorruptFileAndRecover.

/**
     * Tests that we can actually recover from a corruption on the primary given that we have replica shards around.
     */
public void testCorruptFileAndRecover() throws ExecutionException, InterruptedException, IOException {
    int numDocs = scaledRandomIntBetween(100, 1000);
    // have enough space for 3 copies
    internalCluster().ensureAtLeastNumDataNodes(3);
    if (cluster().numDataNodes() == 3) {
        logger.info("--> cluster has [3] data nodes, corrupted primary will be overwritten");
    }
    assertThat(cluster().numDataNodes(), greaterThanOrEqualTo(3));
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, "1").put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1").put(MergePolicyConfig.INDEX_MERGE_ENABLED, false).put(MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), // no checkindex - we corrupt shards on purpose
    false).put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), // no translog based flush - it might change the .liv / segments.N files
    new ByteSizeValue(1, ByteSizeUnit.PB))));
    ensureGreen();
    disableAllocation("test");
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
    for (int i = 0; i < builders.length; i++) {
        builders[i] = client().prepareIndex("test", "type").setSource("field", "value");
    }
    indexRandom(true, builders);
    ensureGreen();
    assertAllSuccessful(client().admin().indices().prepareFlush().setForce(true).execute().actionGet());
    // we have to flush at least once here since we don't corrupt the translog
    SearchResponse countResponse = client().prepareSearch().setSize(0).get();
    assertHitCount(countResponse, numDocs);
    final int numShards = numShards("test");
    ShardRouting corruptedShardRouting = corruptRandomPrimaryFile();
    logger.info("--> {} corrupted", corruptedShardRouting);
    enableAllocation("test");
    /*
         * we corrupted the primary shard - now lets make sure we never recover from it successfully
         */
    Settings build = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "2").build();
    client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
    ClusterHealthResponse health = client().admin().cluster().health(Requests.clusterHealthRequest("test").waitForGreenStatus().timeout(// sometimes due to cluster rebalacing and random settings default timeout is just not enough.
    "5m").waitForNoRelocatingShards(true)).actionGet();
    if (health.isTimedOut()) {
        logger.info("cluster state:\n{}\n{}", client().admin().cluster().prepareState().get().getState(), client().admin().cluster().preparePendingClusterTasks().get());
        assertThat("timed out waiting for green state", health.isTimedOut(), equalTo(false));
    }
    assertThat(health.getStatus(), equalTo(ClusterHealthStatus.GREEN));
    final int numIterations = scaledRandomIntBetween(5, 20);
    for (int i = 0; i < numIterations; i++) {
        SearchResponse response = client().prepareSearch().setSize(numDocs).get();
        assertHitCount(response, numDocs);
    }
    /*
         * now hook into the IndicesService and register a close listener to
         * run the checkindex. if the corruption is still there we will catch it.
         */
    // primary + 2 replicas
    final CountDownLatch latch = new CountDownLatch(numShards * 3);
    final CopyOnWriteArrayList<Exception> exception = new CopyOnWriteArrayList<>();
    final IndexEventListener listener = new IndexEventListener() {

        @Override
        public void afterIndexShardClosed(ShardId sid, @Nullable IndexShard indexShard, Settings indexSettings) {
            if (indexShard != null) {
                Store store = indexShard.store();
                store.incRef();
                try {
                    if (!Lucene.indexExists(store.directory()) && indexShard.state() == IndexShardState.STARTED) {
                        return;
                    }
                    try (CheckIndex checkIndex = new CheckIndex(store.directory())) {
                        BytesStreamOutput os = new BytesStreamOutput();
                        PrintStream out = new PrintStream(os, false, StandardCharsets.UTF_8.name());
                        checkIndex.setInfoStream(out);
                        out.flush();
                        CheckIndex.Status status = checkIndex.checkIndex();
                        if (!status.clean) {
                            logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
                            throw new IOException("index check failure");
                        }
                    }
                } catch (Exception e) {
                    exception.add(e);
                } finally {
                    store.decRef();
                    latch.countDown();
                }
            }
        }
    };
    for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) {
        eventListener.setNewDelegate(listener);
    }
    try {
        client().admin().indices().prepareDelete("test").get();
        latch.await();
        assertThat(exception, empty());
    } finally {
        for (MockIndexEventListener.TestEventListener eventListener : internalCluster().getDataNodeInstances(MockIndexEventListener.TestEventListener.class)) {
            eventListener.setNewDelegate(null);
        }
    }
}
Also used : MockIndexEventListener(org.elasticsearch.test.MockIndexEventListener) PrintStream(java.io.PrintStream) ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) IndexShard(org.elasticsearch.index.shard.IndexShard) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) MockFSIndexStore(org.elasticsearch.test.store.MockFSIndexStore) IOException(java.io.IOException) CountDownLatch(java.util.concurrent.CountDownLatch) TransportException(org.elasticsearch.transport.TransportException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) BytesStreamOutput(org.elasticsearch.common.io.stream.BytesStreamOutput) SearchResponse(org.elasticsearch.action.search.SearchResponse) IndexRequestBuilder(org.elasticsearch.action.index.IndexRequestBuilder) ShardId(org.elasticsearch.index.shard.ShardId) MockIndexEventListener(org.elasticsearch.test.MockIndexEventListener) IndexEventListener(org.elasticsearch.index.shard.IndexEventListener) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings) Nullable(org.elasticsearch.common.Nullable) CheckIndex(org.apache.lucene.index.CheckIndex) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList)

Example 17 with ClusterHealthResponse

use of org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.

the class CorruptedFileIT method testCorruptPrimaryNoReplica.

/**
     * Tests corruption that happens on a single shard when no replicas are present. We make sure that the primary stays unassigned
     * and all other replicas for the healthy shards happens
     */
public void testCorruptPrimaryNoReplica() throws ExecutionException, InterruptedException, IOException {
    int numDocs = scaledRandomIntBetween(100, 1000);
    internalCluster().ensureAtLeastNumDataNodes(2);
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "0").put(MergePolicyConfig.INDEX_MERGE_ENABLED, false).put(MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), // no checkindex - we corrupt shards on purpose
    false).put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), // no translog based flush - it might change the .liv / segments.N files
    new ByteSizeValue(1, ByteSizeUnit.PB))));
    ensureGreen();
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
    for (int i = 0; i < builders.length; i++) {
        builders[i] = client().prepareIndex("test", "type").setSource("field", "value");
    }
    indexRandom(true, builders);
    ensureGreen();
    assertAllSuccessful(client().admin().indices().prepareFlush().setForce(true).execute().actionGet());
    // we have to flush at least once here since we don't corrupt the translog
    SearchResponse countResponse = client().prepareSearch().setSize(0).get();
    assertHitCount(countResponse, numDocs);
    ShardRouting shardRouting = corruptRandomPrimaryFile();
    /*
         * we corrupted the primary shard - now lets make sure we never recover from it successfully
         */
    Settings build = Settings.builder().put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, "1").build();
    client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
    client().admin().cluster().prepareReroute().get();
    boolean didClusterTurnRed = awaitBusy(() -> {
        ClusterHealthStatus test = client().admin().cluster().health(Requests.clusterHealthRequest("test")).actionGet().getStatus();
        return test == ClusterHealthStatus.RED;
    }, 5, // sometimes on slow nodes the replication / recovery is just dead slow
    TimeUnit.MINUTES);
    final ClusterHealthResponse response = client().admin().cluster().health(Requests.clusterHealthRequest("test")).get();
    if (response.getStatus() != ClusterHealthStatus.RED) {
        logger.info("Cluster turned red in busy loop: {}", didClusterTurnRed);
        logger.info("cluster state:\n{}\n{}", client().admin().cluster().prepareState().get().getState(), client().admin().cluster().preparePendingClusterTasks().get());
    }
    assertThat(response.getStatus(), is(ClusterHealthStatus.RED));
    ClusterState state = client().admin().cluster().prepareState().get().getState();
    GroupShardsIterator shardIterators = state.getRoutingTable().activePrimaryShardsGrouped(new String[] { "test" }, false);
    for (ShardIterator iterator : shardIterators) {
        ShardRouting routing;
        while ((routing = iterator.nextOrNull()) != null) {
            if (routing.getId() == shardRouting.getId()) {
                assertThat(routing.state(), equalTo(ShardRoutingState.UNASSIGNED));
            } else {
                assertThat(routing.state(), anyOf(equalTo(ShardRoutingState.RELOCATING), equalTo(ShardRoutingState.STARTED)));
            }
        }
    }
    final List<Path> files = listShardFiles(shardRouting);
    Path corruptedFile = null;
    for (Path file : files) {
        if (file.getFileName().toString().startsWith("corrupted_")) {
            corruptedFile = file;
            break;
        }
    }
    assertThat(corruptedFile, notNullValue());
}
Also used : Path(java.nio.file.Path) ClusterState(org.elasticsearch.cluster.ClusterState) ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) ByteSizeValue(org.elasticsearch.common.unit.ByteSizeValue) SearchResponse(org.elasticsearch.action.search.SearchResponse) IndexRequestBuilder(org.elasticsearch.action.index.IndexRequestBuilder) ClusterHealthStatus(org.elasticsearch.cluster.health.ClusterHealthStatus) GroupShardsIterator(org.elasticsearch.cluster.routing.GroupShardsIterator) ShardIterator(org.elasticsearch.cluster.routing.ShardIterator) ShardRouting(org.elasticsearch.cluster.routing.ShardRouting) Settings(org.elasticsearch.common.settings.Settings) IndexSettings(org.elasticsearch.index.IndexSettings)

Example 18 with ClusterHealthResponse

use of org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.

the class IndicesStoreIntegrationIT method testIndexCleanup.

public void testIndexCleanup() throws Exception {
    final String masterNode = internalCluster().startNode(Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false));
    final String node_1 = internalCluster().startNode(Settings.builder().put(Node.NODE_MASTER_SETTING.getKey(), false));
    final String node_2 = internalCluster().startNode(Settings.builder().put(Node.NODE_MASTER_SETTING.getKey(), false));
    logger.info("--> creating index [test] with one shard and on replica");
    assertAcked(prepareCreate("test").setSettings(Settings.builder().put(indexSettings()).put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)));
    ensureGreen("test");
    ClusterState state = client().admin().cluster().prepareState().get().getState();
    Index index = state.metaData().index("test").getIndex();
    logger.info("--> making sure that shard and its replica are allocated on node_1 and node_2");
    assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_1, index)), equalTo(true));
    assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
    logger.info("--> starting node server3");
    final String node_3 = internalCluster().startNode(Settings.builder().put(Node.NODE_MASTER_SETTING.getKey(), false));
    logger.info("--> running cluster_health");
    ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForNodes("4").setWaitForNoRelocatingShards(true).get();
    assertThat(clusterHealth.isTimedOut(), equalTo(false));
    assertThat(Files.exists(shardDirectory(node_1, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_1, index)), equalTo(true));
    assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
    assertThat(Files.exists(shardDirectory(node_3, index, 0)), equalTo(false));
    assertThat(Files.exists(indexDirectory(node_3, index)), equalTo(false));
    logger.info("--> move shard from node_1 to node_3, and wait for relocation to finish");
    if (randomBoolean()) {
        // sometimes add cluster-state delay to trigger observers in IndicesStore.ShardActiveRequestHandler
        BlockClusterStateProcessing disruption = relocateAndBlockCompletion(logger, "test", 0, node_1, node_3);
        // wait a little so that cluster state observer is registered
        sleep(50);
        logger.info("--> stopping disruption");
        disruption.stopDisrupting();
    } else {
        internalCluster().client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, node_1, node_3)).get();
    }
    clusterHealth = client().admin().cluster().prepareHealth().setWaitForNoRelocatingShards(true).get();
    assertThat(clusterHealth.isTimedOut(), equalTo(false));
    assertThat(waitForShardDeletion(node_1, index, 0), equalTo(false));
    assertThat(waitForIndexDeletion(node_1, index), equalTo(false));
    assertThat(Files.exists(shardDirectory(node_2, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_2, index)), equalTo(true));
    assertThat(Files.exists(shardDirectory(node_3, index, 0)), equalTo(true));
    assertThat(Files.exists(indexDirectory(node_3, index)), equalTo(true));
}
Also used : ClusterState(org.elasticsearch.cluster.ClusterState) BlockClusterStateProcessing(org.elasticsearch.test.disruption.BlockClusterStateProcessing) ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse) MoveAllocationCommand(org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand) Index(org.elasticsearch.index.Index)

Example 19 with ClusterHealthResponse

use of org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.

the class SimpleNodesInfoIT method testAllocatedProcessors.

public void testAllocatedProcessors() throws Exception {
    List<String> nodesIds = internalCluster().startNodes(Settings.builder().put(EsExecutors.PROCESSORS_SETTING.getKey(), 3).build(), Settings.builder().put(EsExecutors.PROCESSORS_SETTING.getKey(), 6).build());
    final String node_1 = nodesIds.get(0);
    final String node_2 = nodesIds.get(1);
    ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForGreenStatus().setWaitForNodes("2").get();
    logger.info("--> done cluster_health, status {}", clusterHealth.getStatus());
    String server1NodeId = internalCluster().getInstance(ClusterService.class, node_1).state().nodes().getLocalNodeId();
    String server2NodeId = internalCluster().getInstance(ClusterService.class, node_2).state().nodes().getLocalNodeId();
    logger.info("--> started nodes: {} and {}", server1NodeId, server2NodeId);
    NodesInfoResponse response = client().admin().cluster().prepareNodesInfo().execute().actionGet();
    assertThat(response.getNodes().size(), is(2));
    assertThat(response.getNodesMap().get(server1NodeId), notNullValue());
    assertThat(response.getNodesMap().get(server2NodeId), notNullValue());
    assertThat(response.getNodesMap().get(server1NodeId).getOs().getAvailableProcessors(), equalTo(Runtime.getRuntime().availableProcessors()));
    assertThat(response.getNodesMap().get(server2NodeId).getOs().getAvailableProcessors(), equalTo(Runtime.getRuntime().availableProcessors()));
    assertThat(response.getNodesMap().get(server1NodeId).getOs().getAllocatedProcessors(), equalTo(3));
    assertThat(response.getNodesMap().get(server2NodeId).getOs().getAllocatedProcessors(), equalTo(6));
}
Also used : NodesInfoResponse(org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse) ClusterService(org.elasticsearch.cluster.service.ClusterService) ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse)

Example 20 with ClusterHealthResponse

use of org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse in project elasticsearch by elastic.

the class SimpleNodesInfoIT method testNodesInfos.

public void testNodesInfos() throws Exception {
    List<String> nodesIds = internalCluster().startNodes(2);
    final String node_1 = nodesIds.get(0);
    final String node_2 = nodesIds.get(1);
    ClusterHealthResponse clusterHealth = client().admin().cluster().prepareHealth().setWaitForGreenStatus().setWaitForNodes("2").get();
    logger.info("--> done cluster_health, status {}", clusterHealth.getStatus());
    String server1NodeId = internalCluster().getInstance(ClusterService.class, node_1).state().nodes().getLocalNodeId();
    String server2NodeId = internalCluster().getInstance(ClusterService.class, node_2).state().nodes().getLocalNodeId();
    logger.info("--> started nodes: {} and {}", server1NodeId, server2NodeId);
    NodesInfoResponse response = client().admin().cluster().prepareNodesInfo().execute().actionGet();
    assertThat(response.getNodes().size(), is(2));
    assertThat(response.getNodesMap().get(server1NodeId), notNullValue());
    assertThat(response.getNodesMap().get(server2NodeId), notNullValue());
    response = client().admin().cluster().nodesInfo(nodesInfoRequest()).actionGet();
    assertThat(response.getNodes().size(), is(2));
    assertThat(response.getNodesMap().get(server1NodeId), notNullValue());
    assertThat(response.getNodesMap().get(server2NodeId), notNullValue());
    response = client().admin().cluster().nodesInfo(nodesInfoRequest(server1NodeId)).actionGet();
    assertThat(response.getNodes().size(), is(1));
    assertThat(response.getNodesMap().get(server1NodeId), notNullValue());
    response = client().admin().cluster().nodesInfo(nodesInfoRequest(server1NodeId)).actionGet();
    assertThat(response.getNodes().size(), is(1));
    assertThat(response.getNodesMap().get(server1NodeId), notNullValue());
    response = client().admin().cluster().nodesInfo(nodesInfoRequest(server2NodeId)).actionGet();
    assertThat(response.getNodes().size(), is(1));
    assertThat(response.getNodesMap().get(server2NodeId), notNullValue());
    response = client().admin().cluster().nodesInfo(nodesInfoRequest(server2NodeId)).actionGet();
    assertThat(response.getNodes().size(), is(1));
    assertThat(response.getNodesMap().get(server2NodeId), notNullValue());
}
Also used : NodesInfoResponse(org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse) ClusterService(org.elasticsearch.cluster.service.ClusterService) ClusterHealthResponse(org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse)

Aggregations

ClusterHealthResponse (org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse)93 Settings (org.elasticsearch.common.settings.Settings)25 Client (org.elasticsearch.client.Client)22 ClusterState (org.elasticsearch.cluster.ClusterState)16 IOException (java.io.IOException)11 SearchResponse (org.elasticsearch.action.search.SearchResponse)10 MoveAllocationCommand (org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand)10 CloseIndexResponse (org.elasticsearch.action.admin.indices.close.CloseIndexResponse)9 OpenIndexResponse (org.elasticsearch.action.admin.indices.open.OpenIndexResponse)9 ClusterHealthRequest (org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest)8 ClusterStateResponse (org.elasticsearch.action.admin.cluster.state.ClusterStateResponse)7 IndexRequestBuilder (org.elasticsearch.action.index.IndexRequestBuilder)7 ShardRouting (org.elasticsearch.cluster.routing.ShardRouting)7 Index (org.elasticsearch.index.Index)7 ArrayList (java.util.ArrayList)6 CountDownLatch (java.util.concurrent.CountDownLatch)6 Path (java.nio.file.Path)5 ExecutionException (java.util.concurrent.ExecutionException)5 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)4 NodesInfoResponse (org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse)4