Search in sources :

Example 1 with RecoveryResponse

use of org.opensearch.action.admin.indices.recovery.RecoveryResponse in project OpenSearch by opensearch-project.

the class ZenDiscoveryIT method testNoShardRelocationsOccurWhenElectedMasterNodeFails.

public void testNoShardRelocationsOccurWhenElectedMasterNodeFails() throws Exception {
    Settings masterNodeSettings = masterOnlyNode();
    internalCluster().startNodes(2, masterNodeSettings);
    Settings dateNodeSettings = dataNode();
    internalCluster().startNodes(2, dateNodeSettings);
    ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForNodes("4").setWaitForNoRelocatingShards(true).get();
    assertThat(clusterHealthResponse.isTimedOut(), is(false));
    createIndex("test");
    ensureSearchable("test");
    RecoveryResponse r = client().admin().indices().prepareRecoveries("test").get();
    int numRecoveriesBeforeNewMaster = r.shardRecoveryStates().get("test").size();
    final String oldMaster = internalCluster().getMasterName();
    internalCluster().stopCurrentMasterNode();
    assertBusy(() -> {
        String current = internalCluster().getMasterName();
        assertThat(current, notNullValue());
        assertThat(current, not(equalTo(oldMaster)));
    });
    ensureSearchable("test");
    r = client().admin().indices().prepareRecoveries("test").get();
    int numRecoveriesAfterNewMaster = r.shardRecoveryStates().get("test").size();
    assertThat(numRecoveriesAfterNewMaster, equalTo(numRecoveriesBeforeNewMaster));
}
Also used : ClusterHealthResponse(org.opensearch.action.admin.cluster.health.ClusterHealthResponse) Matchers.containsString(org.hamcrest.Matchers.containsString) Settings(org.opensearch.common.settings.Settings) RecoveryResponse(org.opensearch.action.admin.indices.recovery.RecoveryResponse)

Example 2 with RecoveryResponse

use of org.opensearch.action.admin.indices.recovery.RecoveryResponse in project OpenSearch by opensearch-project.

the class RemoveCorruptedShardDataCommandIT method testCorruptTranslogTruncationOfReplica.

public void testCorruptTranslogTruncationOfReplica() throws Exception {
    internalCluster().startMasterOnlyNode();
    final String node1 = internalCluster().startDataOnlyNode();
    final String node2 = internalCluster().startDataOnlyNode();
    logger.info("--> nodes name: {}, {}", node1, node2);
    final String indexName = "test";
    assertAcked(prepareCreate(indexName).setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "-1").put(MockEngineSupport.DISABLE_FLUSH_ON_CLOSE.getKey(), // never flush - always recover from translog
    true).put("index.routing.allocation.exclude._name", node2)));
    ensureYellow();
    assertAcked(client().admin().indices().prepareUpdateSettings(indexName).setSettings(Settings.builder().put("index.routing.allocation.exclude._name", (String) null)));
    ensureGreen();
    // Index some documents
    int numDocsToKeep = randomIntBetween(0, 100);
    logger.info("--> indexing [{}] docs to be kept", numDocsToKeep);
    IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocsToKeep];
    for (int i = 0; i < builders.length; i++) {
        builders[i] = client().prepareIndex(indexName).setSource("foo", "bar");
    }
    indexRandom(false, false, false, Arrays.asList(builders));
    flush(indexName);
    disableTranslogFlush(indexName);
    // having no extra docs is an interesting case for seq no based recoveries - test it more often
    int numDocsToTruncate = randomBoolean() ? 0 : randomIntBetween(0, 100);
    logger.info("--> indexing [{}] more docs to be truncated", numDocsToTruncate);
    builders = new IndexRequestBuilder[numDocsToTruncate];
    for (int i = 0; i < builders.length; i++) {
        builders[i] = client().prepareIndex(indexName).setSource("foo", "bar");
    }
    indexRandom(false, false, false, Arrays.asList(builders));
    final int totalDocs = numDocsToKeep + numDocsToTruncate;
    // sample the replica node translog dirs
    final ShardId shardId = new ShardId(resolveIndex(indexName), 0);
    final Path translogDir = getPathToShardData(node2, shardId, ShardPath.TRANSLOG_FOLDER_NAME);
    final Settings node1PathSettings = internalCluster().dataPathSettings(node1);
    final Settings node2PathSettings = internalCluster().dataPathSettings(node2);
    assertBusy(() -> internalCluster().getInstances(GatewayMetaState.class).forEach(gw -> assertTrue(gw.allPendingAsyncStatesWritten())));
    // stop data nodes
    internalCluster().stopRandomDataNode();
    internalCluster().stopRandomDataNode();
    // Corrupt the translog file(s) on the replica
    logger.info("--> corrupting translog");
    TestTranslog.corruptRandomTranslogFile(logger, random(), translogDir);
    // Start the node with the non-corrupted data path
    logger.info("--> starting node");
    internalCluster().startNode(node1PathSettings);
    ensureYellow();
    // Run a search and make sure it succeeds
    assertHitCount(client().prepareSearch(indexName).setQuery(matchAllQuery()).get(), totalDocs);
    // check replica corruption
    final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
    final MockTerminal terminal = new MockTerminal();
    final OptionParser parser = command.getParser();
    final Environment environment = TestEnvironment.newEnvironment(Settings.builder().put(internalCluster().getDefaultSettings()).put(node2PathSettings).build());
    terminal.addTextInput("y");
    OptionSet options = parser.parse("-d", translogDir.toAbsolutePath().toString());
    logger.info("--> running command for [{}]", translogDir.toAbsolutePath());
    command.execute(terminal, options, environment);
    logger.info("--> output:\n{}", terminal.getOutput());
    logger.info("--> starting the replica node to test recovery");
    internalCluster().startNode(node2PathSettings);
    ensureGreen(indexName);
    for (String node : internalCluster().nodesInclude(indexName)) {
        assertHitCount(client().prepareSearch(indexName).setPreference("_only_nodes:" + node).setQuery(matchAllQuery()).get(), totalDocs);
    }
    final RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries(indexName).setActiveOnly(false).get();
    final RecoveryState replicaRecoveryState = recoveryResponse.shardRecoveryStates().get(indexName).stream().filter(recoveryState -> recoveryState.getPrimary() == false).findFirst().get();
    // the replica translog was disabled so it doesn't know what hte global checkpoint is and thus can't do ops based recovery
    assertThat(replicaRecoveryState.getIndex().toString(), replicaRecoveryState.getIndex().recoveredFileCount(), greaterThan(0));
    // Ensure that the global checkpoint and local checkpoint are restored from the max seqno of the last commit.
    final SeqNoStats seqNoStats = getSeqNoStats(indexName, 0);
    assertThat(seqNoStats.getGlobalCheckpoint(), equalTo(seqNoStats.getMaxSeqNo()));
    assertThat(seqNoStats.getLocalCheckpoint(), equalTo(seqNoStats.getMaxSeqNo()));
}
Also used : Path(java.nio.file.Path) IndexRequestBuilder(org.opensearch.action.index.IndexRequestBuilder) SeqNoStats(org.opensearch.index.seqno.SeqNoStats) Arrays(java.util.Arrays) TranslogCorruptedException(org.opensearch.index.translog.TranslogCorruptedException) MockTerminal(org.opensearch.cli.MockTerminal) Matchers.not(org.hamcrest.Matchers.not) ByteSizeUnit(org.opensearch.common.unit.ByteSizeUnit) AllocateStalePrimaryAllocationCommand(org.opensearch.cluster.routing.allocation.command.AllocateStalePrimaryAllocationCommand) ObjectObjectCursor(com.carrotsearch.hppc.cursors.ObjectObjectCursor) ClusterAllocationExplanation(org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplanation) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) Matcher(java.util.regex.Matcher) OpenSearchAssertions.assertHitCount(org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount) QueryBuilders.matchAllQuery(org.opensearch.index.query.QueryBuilders.matchAllQuery) RecoveryState(org.opensearch.indices.recovery.RecoveryState) Directory(org.apache.lucene.store.Directory) Map(java.util.Map) Matchers.nullValue(org.hamcrest.Matchers.nullValue) OptionParser(joptsimple.OptionParser) SearchRequestBuilder(org.opensearch.action.search.SearchRequestBuilder) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) Path(java.nio.file.Path) OptionSet(joptsimple.OptionSet) NodeEnvironment(org.opensearch.env.NodeEnvironment) CollectionUtils.iterableAsArrayList(org.opensearch.common.util.CollectionUtils.iterableAsArrayList) Matchers.notNullValue(org.hamcrest.Matchers.notNullValue) Index(org.opensearch.index.Index) Matchers.allOf(org.hamcrest.Matchers.allOf) Collection(java.util.Collection) ShardAllocationDecision(org.opensearch.cluster.routing.allocation.ShardAllocationDecision) IndicesService(org.opensearch.indices.IndicesService) ExceptionsHelper(org.opensearch.ExceptionsHelper) Set(java.util.Set) Settings(org.opensearch.common.settings.Settings) Collectors(java.util.stream.Collectors) Matchers.startsWith(org.hamcrest.Matchers.startsWith) IndexWriter(org.apache.lucene.index.IndexWriter) GatewayMetaState(org.opensearch.gateway.GatewayMetaState) List(java.util.List) Stream(java.util.stream.Stream) Matchers.equalTo(org.hamcrest.Matchers.equalTo) IndexSettings(org.opensearch.index.IndexSettings) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Pattern(java.util.regex.Pattern) OpenSearchIntegTestCase(org.opensearch.test.OpenSearchIntegTestCase) Matchers.containsString(org.hamcrest.Matchers.containsString) PathUtils(org.opensearch.common.io.PathUtils) FS(org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest.Metric.FS) TestEnvironment(org.opensearch.env.TestEnvironment) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes) RandomPicks(com.carrotsearch.randomizedtesting.generators.RandomPicks) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) FlushRequest(org.opensearch.action.admin.indices.flush.FlushRequest) ShardIterator(org.opensearch.cluster.routing.ShardIterator) LockObtainFailedException(org.apache.lucene.store.LockObtainFailedException) NativeFSLockFactory(org.apache.lucene.store.NativeFSLockFactory) ByteSizeValue(org.opensearch.common.unit.ByteSizeValue) HashMap(java.util.HashMap) MockTransportService(org.opensearch.test.transport.MockTransportService) InternalTestCluster(org.opensearch.test.InternalTestCluster) ClusterState(org.opensearch.cluster.ClusterState) Lock(org.apache.lucene.store.Lock) InternalSettingsPlugin(org.opensearch.test.InternalSettingsPlugin) ShardRoutingState(org.opensearch.cluster.routing.ShardRoutingState) Matchers.hasSize(org.hamcrest.Matchers.hasSize) StreamSupport(java.util.stream.StreamSupport) CorruptionUtils(org.opensearch.test.CorruptionUtils) FSDirectory(org.apache.lucene.store.FSDirectory) AllocationDecision(org.opensearch.cluster.routing.allocation.AllocationDecision) Environment(org.opensearch.env.Environment) OpenSearchAssertions.assertAcked(org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked) MockEngineFactoryPlugin(org.opensearch.index.MockEngineFactoryPlugin) Files(java.nio.file.Files) TestTranslog(org.opensearch.index.translog.TestTranslog) IOException(java.io.IOException) MockEngineSupport(org.opensearch.test.engine.MockEngineSupport) Plugin(org.opensearch.plugins.Plugin) ShardRouting(org.opensearch.cluster.routing.ShardRouting) GroupShardsIterator(org.opensearch.cluster.routing.GroupShardsIterator) RecoveryResponse(org.opensearch.action.admin.indices.recovery.RecoveryResponse) NodesStatsResponse(org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse) ShardStats(org.opensearch.action.admin.indices.stats.ShardStats) MergePolicyConfig(org.opensearch.index.MergePolicyConfig) Matchers.containsString(org.hamcrest.Matchers.containsString) MockTerminal(org.opensearch.cli.MockTerminal) OptionParser(joptsimple.OptionParser) RecoveryResponse(org.opensearch.action.admin.indices.recovery.RecoveryResponse) IndexRequestBuilder(org.opensearch.action.index.IndexRequestBuilder) SeqNoStats(org.opensearch.index.seqno.SeqNoStats) NodeEnvironment(org.opensearch.env.NodeEnvironment) TestEnvironment(org.opensearch.env.TestEnvironment) Environment(org.opensearch.env.Environment) OptionSet(joptsimple.OptionSet) RecoveryState(org.opensearch.indices.recovery.RecoveryState) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings)

Example 3 with RecoveryResponse

use of org.opensearch.action.admin.indices.recovery.RecoveryResponse in project OpenSearch by opensearch-project.

the class AbortedRestoreIT method testAbortedRestoreAlsoAbortFileRestores.

public void testAbortedRestoreAlsoAbortFileRestores() throws Exception {
    internalCluster().startMasterOnlyNode();
    final String dataNode = internalCluster().startDataOnlyNode();
    final String indexName = "test-abort-restore";
    createIndex(indexName, indexSettingsNoReplicas(1).build());
    indexRandomDocs(indexName, scaledRandomIntBetween(10, 1_000));
    ensureGreen();
    forceMerge();
    final String repositoryName = "repository";
    createRepository(repositoryName, "mock");
    final String snapshotName = "snapshot";
    createFullSnapshot(repositoryName, snapshotName);
    assertAcked(client().admin().indices().prepareDelete(indexName));
    logger.info("--> blocking all data nodes for repository [{}]", repositoryName);
    blockAllDataNodes(repositoryName);
    failReadsAllDataNodes(repositoryName);
    logger.info("--> starting restore");
    final ActionFuture<RestoreSnapshotResponse> future = client().admin().cluster().prepareRestoreSnapshot(repositoryName, snapshotName).setWaitForCompletion(true).setIndices(indexName).execute();
    assertBusy(() -> {
        final RecoveryResponse recoveries = client().admin().indices().prepareRecoveries(indexName).setIndicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN).setActiveOnly(true).get();
        assertThat(recoveries.hasRecoveries(), is(true));
        final List<RecoveryState> shardRecoveries = recoveries.shardRecoveryStates().get(indexName);
        assertThat(shardRecoveries, hasSize(1));
        assertThat(future.isDone(), is(false));
        for (RecoveryState shardRecovery : shardRecoveries) {
            assertThat(shardRecovery.getRecoverySource().getType(), equalTo(RecoverySource.Type.SNAPSHOT));
            assertThat(shardRecovery.getStage(), equalTo(RecoveryState.Stage.INDEX));
        }
    });
    final ThreadPool.Info snapshotThreadPoolInfo = threadPool(dataNode).info(ThreadPool.Names.SNAPSHOT);
    assertThat(snapshotThreadPoolInfo.getMax(), greaterThan(0));
    logger.info("--> waiting for snapshot thread [max={}] pool to be full", snapshotThreadPoolInfo.getMax());
    waitForMaxActiveSnapshotThreads(dataNode, equalTo(snapshotThreadPoolInfo.getMax()));
    logger.info("--> aborting restore by deleting the index");
    assertAcked(client().admin().indices().prepareDelete(indexName));
    logger.info("--> unblocking repository [{}]", repositoryName);
    unblockAllDataNodes(repositoryName);
    logger.info("--> restore should have failed");
    final RestoreSnapshotResponse restoreSnapshotResponse = future.get();
    assertThat(restoreSnapshotResponse.getRestoreInfo().failedShards(), equalTo(1));
    assertThat(restoreSnapshotResponse.getRestoreInfo().successfulShards(), equalTo(0));
    logger.info("--> waiting for snapshot thread pool to be empty");
    waitForMaxActiveSnapshotThreads(dataNode, equalTo(0));
}
Also used : ThreadPool(org.opensearch.threadpool.ThreadPool) RecoveryState(org.opensearch.indices.recovery.RecoveryState) RestoreSnapshotResponse(org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse) RecoveryResponse(org.opensearch.action.admin.indices.recovery.RecoveryResponse)

Example 4 with RecoveryResponse

use of org.opensearch.action.admin.indices.recovery.RecoveryResponse in project OpenSearch by opensearch-project.

the class IndexRecoveryIT method testRerouteRecovery.

public void testRerouteRecovery() throws Exception {
    logger.info("--> start node A");
    final String nodeA = internalCluster().startNode();
    logger.info("--> create index on node: {}", nodeA);
    ByteSizeValue shardSize = createAndPopulateIndex(INDEX_NAME, 1, SHARD_COUNT, REPLICA_COUNT).getShards()[0].getStats().getStore().size();
    logger.info("--> start node B");
    final String nodeB = internalCluster().startNode();
    ensureGreen();
    logger.info("--> slowing down recoveries");
    slowDownRecovery(shardSize);
    logger.info("--> move shard from: {} to: {}", nodeA, nodeB);
    client().admin().cluster().prepareReroute().add(new MoveAllocationCommand(INDEX_NAME, 0, nodeA, nodeB)).execute().actionGet().getState();
    logger.info("--> waiting for recovery to start both on source and target");
    final Index index = resolveIndex(INDEX_NAME);
    assertBusy(() -> {
        IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodeA);
        assertThat(indicesService.indexServiceSafe(index).getShard(0).recoveryStats().currentAsSource(), equalTo(1));
        indicesService = internalCluster().getInstance(IndicesService.class, nodeB);
        assertThat(indicesService.indexServiceSafe(index).getShard(0).recoveryStats().currentAsTarget(), equalTo(1));
    });
    logger.info("--> request recoveries");
    RecoveryResponse response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
    List<RecoveryState> recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
    List<RecoveryState> nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
    assertThat(nodeARecoveryStates.size(), equalTo(1));
    List<RecoveryState> nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
    assertThat(nodeBRecoveryStates.size(), equalTo(1));
    assertRecoveryState(nodeARecoveryStates.get(0), 0, RecoverySource.EmptyStoreRecoverySource.INSTANCE, true, Stage.DONE, null, nodeA);
    validateIndexRecoveryState(nodeARecoveryStates.get(0).getIndex());
    assertOnGoingRecoveryState(nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, nodeA, nodeB);
    validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());
    logger.info("--> request node recovery stats");
    NodesStatsResponse statsResponse = client().admin().cluster().prepareNodesStats().clear().setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery)).get();
    long nodeAThrottling = Long.MAX_VALUE;
    long nodeBThrottling = Long.MAX_VALUE;
    for (NodeStats nodeStats : statsResponse.getNodes()) {
        final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
        if (nodeStats.getNode().getName().equals(nodeA)) {
            assertThat("node A should have ongoing recovery as source", recoveryStats.currentAsSource(), equalTo(1));
            assertThat("node A should not have ongoing recovery as target", recoveryStats.currentAsTarget(), equalTo(0));
            nodeAThrottling = recoveryStats.throttleTime().millis();
        }
        if (nodeStats.getNode().getName().equals(nodeB)) {
            assertThat("node B should not have ongoing recovery as source", recoveryStats.currentAsSource(), equalTo(0));
            assertThat("node B should have ongoing recovery as target", recoveryStats.currentAsTarget(), equalTo(1));
            nodeBThrottling = recoveryStats.throttleTime().millis();
        }
    }
    logger.info("--> checking throttling increases");
    final long finalNodeAThrottling = nodeAThrottling;
    final long finalNodeBThrottling = nodeBThrottling;
    assertBusy(() -> {
        NodesStatsResponse statsResponse1 = client().admin().cluster().prepareNodesStats().clear().setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery)).get();
        assertThat(statsResponse1.getNodes(), hasSize(2));
        for (NodeStats nodeStats : statsResponse1.getNodes()) {
            final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
            if (nodeStats.getNode().getName().equals(nodeA)) {
                assertThat("node A throttling should increase", recoveryStats.throttleTime().millis(), greaterThan(finalNodeAThrottling));
            }
            if (nodeStats.getNode().getName().equals(nodeB)) {
                assertThat("node B throttling should increase", recoveryStats.throttleTime().millis(), greaterThan(finalNodeBThrottling));
            }
        }
    });
    logger.info("--> speeding up recoveries");
    restoreRecoverySpeed();
    // wait for it to be finished
    ensureGreen();
    response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
    recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
    assertThat(recoveryStates.size(), equalTo(1));
    assertRecoveryState(recoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
    validateIndexRecoveryState(recoveryStates.get(0).getIndex());
    Consumer<String> assertNodeHasThrottleTimeAndNoRecoveries = nodeName -> {
        NodesStatsResponse nodesStatsResponse = client().admin().cluster().prepareNodesStats().setNodesIds(nodeName).clear().setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery)).get();
        assertThat(nodesStatsResponse.getNodes(), hasSize(1));
        NodeStats nodeStats = nodesStatsResponse.getNodes().get(0);
        final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats();
        assertThat(recoveryStats.currentAsSource(), equalTo(0));
        assertThat(recoveryStats.currentAsTarget(), equalTo(0));
        assertThat(nodeName + " throttling should be >0", recoveryStats.throttleTime().millis(), greaterThan(0L));
    };
    // we have to use assertBusy as recovery counters are decremented only when the last reference to the RecoveryTarget
    // is decremented, which may happen after the recovery was done.
    assertBusy(() -> assertNodeHasThrottleTimeAndNoRecoveries.accept(nodeA));
    assertBusy(() -> assertNodeHasThrottleTimeAndNoRecoveries.accept(nodeB));
    logger.info("--> bump replica count");
    client().admin().indices().prepareUpdateSettings(INDEX_NAME).setSettings(Settings.builder().put("number_of_replicas", 1)).execute().actionGet();
    ensureGreen();
    assertBusy(() -> assertNodeHasThrottleTimeAndNoRecoveries.accept(nodeA));
    assertBusy(() -> assertNodeHasThrottleTimeAndNoRecoveries.accept(nodeB));
    logger.info("--> start node C");
    String nodeC = internalCluster().startNode();
    assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("3").get().isTimedOut());
    logger.info("--> slowing down recoveries");
    slowDownRecovery(shardSize);
    logger.info("--> move replica shard from: {} to: {}", nodeA, nodeC);
    client().admin().cluster().prepareReroute().add(new MoveAllocationCommand(INDEX_NAME, 0, nodeA, nodeC)).execute().actionGet().getState();
    response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
    recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
    nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
    assertThat(nodeARecoveryStates.size(), equalTo(1));
    nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
    assertThat(nodeBRecoveryStates.size(), equalTo(1));
    List<RecoveryState> nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
    assertThat(nodeCRecoveryStates.size(), equalTo(1));
    assertRecoveryState(nodeARecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, Stage.DONE, nodeB, nodeA);
    validateIndexRecoveryState(nodeARecoveryStates.get(0).getIndex());
    assertRecoveryState(nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
    validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());
    // relocations of replicas are marked as REPLICA and the source node is the node holding the primary (B)
    assertOnGoingRecoveryState(nodeCRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, nodeB, nodeC);
    validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());
    if (randomBoolean()) {
        // shutdown node with relocation source of replica shard and check if recovery continues
        internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodeA));
        ensureStableCluster(2);
        response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
        recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
        nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
        assertThat(nodeARecoveryStates.size(), equalTo(0));
        nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
        assertThat(nodeBRecoveryStates.size(), equalTo(1));
        nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
        assertThat(nodeCRecoveryStates.size(), equalTo(1));
        assertRecoveryState(nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
        validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());
        assertOnGoingRecoveryState(nodeCRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, nodeB, nodeC);
        validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());
    }
    logger.info("--> speeding up recoveries");
    restoreRecoverySpeed();
    ensureGreen();
    response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
    recoveryStates = response.shardRecoveryStates().get(INDEX_NAME);
    nodeARecoveryStates = findRecoveriesForTargetNode(nodeA, recoveryStates);
    assertThat(nodeARecoveryStates.size(), equalTo(0));
    nodeBRecoveryStates = findRecoveriesForTargetNode(nodeB, recoveryStates);
    assertThat(nodeBRecoveryStates.size(), equalTo(1));
    nodeCRecoveryStates = findRecoveriesForTargetNode(nodeC, recoveryStates);
    assertThat(nodeCRecoveryStates.size(), equalTo(1));
    assertRecoveryState(nodeBRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, true, Stage.DONE, nodeA, nodeB);
    validateIndexRecoveryState(nodeBRecoveryStates.get(0).getIndex());
    // relocations of replicas are marked as REPLICA and the source node is the node holding the primary (B)
    assertRecoveryState(nodeCRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, Stage.DONE, nodeB, nodeC);
    validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex());
}
Also used : OpenSearchRejectedExecutionException(org.opensearch.common.util.concurrent.OpenSearchRejectedExecutionException) SequenceNumbers(org.opensearch.index.seqno.SequenceNumbers) Arrays(java.util.Arrays) IndexResponse(org.opensearch.action.index.IndexResponse) ClusterStateResponse(org.opensearch.action.admin.cluster.state.ClusterStateResponse) SnapshotRecoverySource(org.opensearch.cluster.routing.RecoverySource.SnapshotRecoverySource) Matchers.not(org.hamcrest.Matchers.not) SnapshotState(org.opensearch.snapshots.SnapshotState) ClusterScope(org.opensearch.test.OpenSearchIntegTestCase.ClusterScope) Version(org.opensearch.Version) Strings(org.opensearch.common.Strings) Transport(org.opensearch.transport.Transport) PlainActionFuture(org.opensearch.action.support.PlainActionFuture) Map(java.util.Map) ShardStateAction(org.opensearch.cluster.action.shard.ShardStateAction) Repository(org.opensearch.repositories.Repository) TimeValue(org.opensearch.common.unit.TimeValue) Index(org.opensearch.index.Index) AbstractTokenFilterFactory(org.opensearch.index.analysis.AbstractTokenFilterFactory) TransportRequestOptions(org.opensearch.transport.TransportRequestOptions) Settings(org.opensearch.common.settings.Settings) ReplicationTracker(org.opensearch.index.seqno.ReplicationTracker) Scope(org.opensearch.test.OpenSearchIntegTestCase.Scope) TransportService(org.opensearch.transport.TransportService) Engine(org.opensearch.index.engine.Engine) CountDownLatch(java.util.concurrent.CountDownLatch) UPDATED(org.opensearch.action.DocWriteResponse.Result.UPDATED) NodeStats(org.opensearch.action.admin.cluster.node.stats.NodeStats) IndicesStatsResponse(org.opensearch.action.admin.indices.stats.IndicesStatsResponse) XContentType(org.opensearch.common.xcontent.XContentType) Matchers.greaterThan(org.hamcrest.Matchers.greaterThan) Matchers.is(org.hamcrest.Matchers.is) RepositoriesService(org.opensearch.repositories.RepositoriesService) TransportRequestHandler(org.opensearch.transport.TransportRequestHandler) CHUNK_SIZE_SETTING(org.opensearch.node.RecoverySettingsChunkSizePlugin.CHUNK_SIZE_SETTING) MapperParsingException(org.opensearch.index.mapper.MapperParsingException) Priority(org.opensearch.common.Priority) MockTransportService(org.opensearch.test.transport.MockTransportService) ArrayList(java.util.ArrayList) RecoverySource(org.opensearch.cluster.routing.RecoverySource) RestoreSnapshotResponse(org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse) ClusterState(org.opensearch.cluster.ClusterState) ShardRoutingState(org.opensearch.cluster.routing.ShardRoutingState) BiConsumer(java.util.function.BiConsumer) Matchers.hasSize(org.hamcrest.Matchers.hasSize) StreamSupport(java.util.stream.StreamSupport) CircuitBreakingException(org.opensearch.common.breaker.CircuitBreakingException) Matchers.greaterThanOrEqualTo(org.hamcrest.Matchers.greaterThanOrEqualTo) TokenStream(org.apache.lucene.analysis.TokenStream) SetOnce(org.apache.lucene.util.SetOnce) IOException(java.io.IOException) IndexService(org.opensearch.index.IndexService) Plugin(org.opensearch.plugins.Plugin) ExecutionException(java.util.concurrent.ExecutionException) RecoveryResponse(org.opensearch.action.admin.indices.recovery.RecoveryResponse) AnalysisModule(org.opensearch.indices.analysis.AnalysisModule) PluginsService(org.opensearch.plugins.PluginsService) RecoveryStats(org.opensearch.index.recovery.RecoveryStats) RetentionLeases(org.opensearch.index.seqno.RetentionLeases) ClusterService(org.opensearch.cluster.service.ClusterService) ShardStats(org.opensearch.action.admin.indices.stats.ShardStats) IndexRequestBuilder(org.opensearch.action.index.IndexRequestBuilder) MockFSIndexStore(org.opensearch.test.store.MockFSIndexStore) StubbableTransport(org.opensearch.test.transport.StubbableTransport) ByteSizeUnit(org.opensearch.common.unit.ByteSizeUnit) OpenSearchException(org.opensearch.OpenSearchException) MoveAllocationCommand(org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand) CircuitBreaker(org.opensearch.common.breaker.CircuitBreaker) CommonStatsFlags(org.opensearch.action.admin.indices.stats.CommonStatsFlags) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) OpenSearchAssertions.assertHitCount(org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount) Matchers.everyItem(org.hamcrest.Matchers.everyItem) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) NodeClosedException(org.opensearch.node.NodeClosedException) RecoveryRequest(org.opensearch.action.admin.indices.recovery.RecoveryRequest) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) Matchers.isOneOf(org.hamcrest.Matchers.isOneOf) ReplicaShardAllocatorIT(org.opensearch.gateway.ReplicaShardAllocatorIT) TransportChannel(org.opensearch.transport.TransportChannel) Matchers.lessThanOrEqualTo(org.hamcrest.Matchers.lessThanOrEqualTo) Collection(java.util.Collection) IndicesService(org.opensearch.indices.IndicesService) Task(org.opensearch.tasks.Task) Store(org.opensearch.index.store.Store) NodeIndicesStats(org.opensearch.indices.NodeIndicesStats) Collectors(java.util.stream.Collectors) List(java.util.List) ClusterHealthResponse(org.opensearch.action.admin.cluster.health.ClusterHealthResponse) CREATED(org.opensearch.action.DocWriteResponse.Result.CREATED) Matchers.equalTo(org.hamcrest.Matchers.equalTo) IndexSettings(org.opensearch.index.IndexSettings) ReplicationResponse(org.opensearch.action.support.replication.ReplicationResponse) ConnectTransportException(org.opensearch.transport.ConnectTransportException) OpenSearchIntegTestCase(org.opensearch.test.OpenSearchIntegTestCase) AllocateEmptyPrimaryAllocationCommand(org.opensearch.cluster.routing.allocation.command.AllocateEmptyPrimaryAllocationCommand) RefreshPolicy(org.opensearch.action.support.WriteRequest.RefreshPolicy) IntStream(java.util.stream.IntStream) DiscoveryNodes(org.opensearch.cluster.node.DiscoveryNodes) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Stage(org.opensearch.indices.recovery.RecoveryState.Stage) TokenFilterFactory(org.opensearch.index.analysis.TokenFilterFactory) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ByteSizeValue(org.opensearch.common.unit.ByteSizeValue) InternalTestCluster(org.opensearch.test.InternalTestCluster) NodeConnectionsService(org.opensearch.cluster.NodeConnectionsService) IndexShard(org.opensearch.index.shard.IndexShard) InternalSettingsPlugin(org.opensearch.test.InternalSettingsPlugin) BackgroundIndexer(org.opensearch.test.BackgroundIndexer) Collections.singletonMap(java.util.Collections.singletonMap) SearchResponse(org.opensearch.action.search.SearchResponse) StoreStats(org.opensearch.index.store.StoreStats) OpenSearchAssertions.assertAcked(org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked) RepositoryData(org.opensearch.repositories.RepositoryData) CreateSnapshotResponse(org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse) PeerRecoverySource(org.opensearch.cluster.routing.RecoverySource.PeerRecoverySource) Matchers.empty(org.hamcrest.Matchers.empty) MockEngineFactoryPlugin(org.opensearch.index.MockEngineFactoryPlugin) TransportRequest(org.opensearch.transport.TransportRequest) Semaphore(java.util.concurrent.Semaphore) MockEngineSupport(org.opensearch.test.engine.MockEngineSupport) ActiveShardCount(org.opensearch.action.support.ActiveShardCount) ShardRouting(org.opensearch.cluster.routing.ShardRouting) ShardId(org.opensearch.index.shard.ShardId) Consumer(java.util.function.Consumer) AnalysisPlugin(org.opensearch.plugins.AnalysisPlugin) Collectors.toList(java.util.stream.Collectors.toList) Snapshot(org.opensearch.snapshots.Snapshot) NodesStatsResponse(org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse) RecoverySettingsChunkSizePlugin(org.opensearch.node.RecoverySettingsChunkSizePlugin) Collections(java.util.Collections) CommonStatsFlags(org.opensearch.action.admin.indices.stats.CommonStatsFlags) ByteSizeValue(org.opensearch.common.unit.ByteSizeValue) MoveAllocationCommand(org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand) IndicesService(org.opensearch.indices.IndicesService) Index(org.opensearch.index.Index) RecoveryStats(org.opensearch.index.recovery.RecoveryStats) RecoveryResponse(org.opensearch.action.admin.indices.recovery.RecoveryResponse) NodesStatsResponse(org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse) NodeStats(org.opensearch.action.admin.cluster.node.stats.NodeStats)

Example 5 with RecoveryResponse

use of org.opensearch.action.admin.indices.recovery.RecoveryResponse in project OpenSearch by opensearch-project.

the class IndexRecoveryIT method testHistoryRetention.

public void testHistoryRetention() throws Exception {
    internalCluster().startNodes(3);
    final String indexName = "test";
    client().admin().indices().prepareCreate(indexName).setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).put(IndexSettings.FILE_BASED_RECOVERY_THRESHOLD_SETTING.getKey(), 1.0)).get();
    ensureGreen(indexName);
    // Perform some replicated operations so the replica isn't simply empty, because ops-based recovery isn't better in that case
    final List<IndexRequestBuilder> requests = new ArrayList<>();
    final int replicatedDocCount = scaledRandomIntBetween(25, 250);
    while (requests.size() < replicatedDocCount) {
        requests.add(client().prepareIndex(indexName).setSource("{}", XContentType.JSON));
    }
    indexRandom(true, requests);
    if (randomBoolean()) {
        flush(indexName);
    }
    String firstNodeToStop = randomFrom(internalCluster().getNodeNames());
    Settings firstNodeToStopDataPathSettings = internalCluster().dataPathSettings(firstNodeToStop);
    internalCluster().stopRandomNode(InternalTestCluster.nameFilter(firstNodeToStop));
    String secondNodeToStop = randomFrom(internalCluster().getNodeNames());
    Settings secondNodeToStopDataPathSettings = internalCluster().dataPathSettings(secondNodeToStop);
    internalCluster().stopRandomNode(InternalTestCluster.nameFilter(secondNodeToStop));
    final long desyncNanoTime = System.nanoTime();
    // noinspection StatementWithEmptyBody
    while (System.nanoTime() <= desyncNanoTime) {
    // time passes
    }
    final int numNewDocs = scaledRandomIntBetween(25, 250);
    for (int i = 0; i < numNewDocs; i++) {
        client().prepareIndex(indexName).setSource("{}", XContentType.JSON).setRefreshPolicy(RefreshPolicy.IMMEDIATE).get();
    }
    // Flush twice to update the safe commit's local checkpoint
    assertThat(client().admin().indices().prepareFlush(indexName).setForce(true).execute().get().getFailedShards(), equalTo(0));
    assertThat(client().admin().indices().prepareFlush(indexName).setForce(true).execute().get().getFailedShards(), equalTo(0));
    assertAcked(client().admin().indices().prepareUpdateSettings(indexName).setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)));
    internalCluster().startNode(randomFrom(firstNodeToStopDataPathSettings, secondNodeToStopDataPathSettings));
    ensureGreen(indexName);
    final RecoveryResponse recoveryResponse = client().admin().indices().recoveries(new RecoveryRequest(indexName)).get();
    final List<RecoveryState> recoveryStates = recoveryResponse.shardRecoveryStates().get(indexName);
    recoveryStates.removeIf(r -> r.getTimer().getStartNanoTime() <= desyncNanoTime);
    assertThat(recoveryStates, hasSize(1));
    assertThat(recoveryStates.get(0).getIndex().totalFileCount(), is(0));
    assertThat(recoveryStates.get(0).getTranslog().recoveredOperations(), greaterThan(0));
}
Also used : IndexRequestBuilder(org.opensearch.action.index.IndexRequestBuilder) RecoveryRequest(org.opensearch.action.admin.indices.recovery.RecoveryRequest) ArrayList(java.util.ArrayList) Settings(org.opensearch.common.settings.Settings) IndexSettings(org.opensearch.index.IndexSettings) RecoveryResponse(org.opensearch.action.admin.indices.recovery.RecoveryResponse)

Aggregations

RecoveryResponse (org.opensearch.action.admin.indices.recovery.RecoveryResponse)17 Settings (org.opensearch.common.settings.Settings)7 RecoveryState (org.opensearch.indices.recovery.RecoveryState)7 List (java.util.List)6 ClusterState (org.opensearch.cluster.ClusterState)6 IndexSettings (org.opensearch.index.IndexSettings)6 ArrayList (java.util.ArrayList)5 IndexRequestBuilder (org.opensearch.action.index.IndexRequestBuilder)5 SnapshotRecoverySource (org.opensearch.cluster.routing.RecoverySource.SnapshotRecoverySource)5 IOException (java.io.IOException)4 Matchers.containsString (org.hamcrest.Matchers.containsString)4 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)4 Map (java.util.Map)3 Collectors.toList (java.util.stream.Collectors.toList)3 ClusterHealthResponse (org.opensearch.action.admin.cluster.health.ClusterHealthResponse)3 RestoreSnapshotResponse (org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse)3 RecoveryRequest (org.opensearch.action.admin.indices.recovery.RecoveryRequest)3 DiscoveryNodes (org.opensearch.cluster.node.DiscoveryNodes)3 Index (org.opensearch.index.Index)3 Arrays (java.util.Arrays)2