use of org.opensearch.indices.recovery.StartRecoveryRequest in project OpenSearch by opensearch-project.
the class IndexShardTestCase method recoverUnstartedReplica.
/**
* Recovers a replica from the give primary, allow the user to supply a custom recovery target. A typical usage of a custom recovery
* target is to assert things in the various stages of recovery.
*
* Note: this method keeps the shard in {@link IndexShardState#POST_RECOVERY} and doesn't start it.
*
* @param replica the recovery target shard
* @param primary the recovery source shard
* @param targetSupplier supplies an instance of {@link RecoveryTarget}
* @param markAsRecovering set to {@code false} if the replica is marked as recovering
*/
protected final void recoverUnstartedReplica(final IndexShard replica, final IndexShard primary, final BiFunction<IndexShard, DiscoveryNode, RecoveryTarget> targetSupplier, final boolean markAsRecovering, final Set<String> inSyncIds, final IndexShardRoutingTable routingTable) throws IOException {
final DiscoveryNode pNode = getFakeDiscoNode(primary.routingEntry().currentNodeId());
final DiscoveryNode rNode = getFakeDiscoNode(replica.routingEntry().currentNodeId());
if (markAsRecovering) {
replica.markAsRecovering("remote", new RecoveryState(replica.routingEntry(), pNode, rNode));
} else {
assertEquals(replica.state(), IndexShardState.RECOVERING);
}
replica.prepareForIndexRecovery();
final RecoveryTarget recoveryTarget = targetSupplier.apply(replica, pNode);
final long startingSeqNo = recoveryTarget.indexShard().recoverLocallyUpToGlobalCheckpoint();
final StartRecoveryRequest request = PeerRecoveryTargetService.getStartRecoveryRequest(logger, rNode, recoveryTarget, startingSeqNo);
int fileChunkSizeInBytes = Math.toIntExact(randomBoolean() ? RecoverySettings.DEFAULT_CHUNK_SIZE.getBytes() : randomIntBetween(1, 10 * 1024 * 1024));
final RecoverySourceHandler recovery = new RecoverySourceHandler(primary, new AsyncRecoveryTarget(recoveryTarget, threadPool.generic()), threadPool, request, fileChunkSizeInBytes, between(1, 8), between(1, 8));
primary.updateShardState(primary.routingEntry(), primary.getPendingPrimaryTerm(), null, currentClusterStateVersion.incrementAndGet(), inSyncIds, routingTable);
try {
PlainActionFuture<RecoveryResponse> future = new PlainActionFuture<>();
recovery.recoverToTarget(future);
future.actionGet();
recoveryTarget.markAsDone();
} catch (Exception e) {
recoveryTarget.fail(new RecoveryFailedException(request, e), false);
throw e;
}
}
use of org.opensearch.indices.recovery.StartRecoveryRequest in project OpenSearch by opensearch-project.
the class CloseWhileRelocatingShardsIT method testCloseWhileRelocatingShards.
public void testCloseWhileRelocatingShards() throws Exception {
final String[] indices = new String[randomIntBetween(3, 5)];
final Map<String, Long> docsPerIndex = new HashMap<>();
final Map<String, BackgroundIndexer> indexers = new HashMap<>();
for (int i = 0; i < indices.length; i++) {
final String indexName = "index-" + i;
int nbDocs = 0;
switch(i) {
case 0:
logger.debug("creating empty index {}", indexName);
createIndex(indexName);
break;
case 1:
nbDocs = scaledRandomIntBetween(1, 100);
logger.debug("creating index {} with {} documents", indexName, nbDocs);
createIndex(indexName);
indexRandom(randomBoolean(), IntStream.range(0, nbDocs).mapToObj(n -> client().prepareIndex(indexName).setSource("num", n)).collect(Collectors.toList()));
break;
default:
logger.debug("creating index {} with background indexing", indexName);
final BackgroundIndexer indexer = new BackgroundIndexer(indexName, "_doc", client(), -1, 1);
indexers.put(indexName, indexer);
indexer.setFailureAssertion(t -> assertException(t, indexName));
waitForDocs(1, indexer);
}
docsPerIndex.put(indexName, (long) nbDocs);
indices[i] = indexName;
}
ensureGreen(TimeValue.timeValueSeconds(60L), indices);
assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), Rebalance.NONE.toString())));
final String targetNode = internalCluster().startDataOnlyNode();
// wait for the cluster-manager to finish processing join.
ensureClusterSizeConsistency();
try {
final ClusterService clusterService = internalCluster().getInstance(ClusterService.class, internalCluster().getMasterName());
final ClusterState state = clusterService.state();
final CountDownLatch latch = new CountDownLatch(indices.length);
final CountDownLatch release = new CountDownLatch(indices.length);
// relocate one shard for every index to be closed
final AllocationCommands commands = new AllocationCommands();
for (final String index : indices) {
final NumShards numShards = getNumShards(index);
final int shardId = numShards.numPrimaries == 1 ? 0 : randomIntBetween(0, numShards.numPrimaries - 1);
final IndexRoutingTable indexRoutingTable = state.routingTable().index(index);
final ShardRouting primary = indexRoutingTable.shard(shardId).primaryShard();
assertTrue(primary.started());
String currentNodeId = primary.currentNodeId();
if (numShards.numReplicas > 0) {
final ShardRouting replica = indexRoutingTable.shard(shardId).replicaShards().iterator().next();
assertTrue(replica.started());
if (randomBoolean()) {
currentNodeId = replica.currentNodeId();
}
}
commands.add(new MoveAllocationCommand(index, shardId, state.nodes().resolveNode(currentNodeId).getName(), targetNode));
}
// Build the list of shards for which recoveries will be blocked
final Set<ShardId> blockedShards = commands.commands().stream().map(c -> (MoveAllocationCommand) c).map(c -> new ShardId(clusterService.state().metadata().index(c.index()).getIndex(), c.shardId())).collect(Collectors.toSet());
assertThat(blockedShards, hasSize(indices.length));
final Set<String> acknowledgedCloses = ConcurrentCollections.newConcurrentSet();
final Set<String> interruptedRecoveries = ConcurrentCollections.newConcurrentSet();
// Create a SendRequestBehavior that will block outgoing start recovery request
final StubbableTransport.SendRequestBehavior sendBehavior = (connection, requestId, action, request, options) -> {
if (PeerRecoverySourceService.Actions.START_RECOVERY.equals(action)) {
final StartRecoveryRequest startRecoveryRequest = ((StartRecoveryRequest) request);
if (blockedShards.contains(startRecoveryRequest.shardId())) {
logger.debug("blocking recovery of shard {}", startRecoveryRequest.shardId());
latch.countDown();
try {
release.await();
logger.debug("releasing recovery of shard {}", startRecoveryRequest.shardId());
} catch (final InterruptedException e) {
logger.warn(() -> new ParameterizedMessage("exception when releasing recovery of shard {}", startRecoveryRequest.shardId()), e);
interruptedRecoveries.add(startRecoveryRequest.shardId().getIndexName());
Thread.currentThread().interrupt();
return;
}
}
}
connection.sendRequest(requestId, action, request, options);
};
final MockTransportService targetTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, targetNode);
for (DiscoveryNode node : state.getNodes()) {
if (node.isDataNode() && node.getName().equals(targetNode) == false) {
final TransportService sourceTransportService = internalCluster().getInstance(TransportService.class, node.getName());
targetTransportService.addSendBehavior(sourceTransportService, sendBehavior);
}
}
assertAcked(client().admin().cluster().reroute(new ClusterRerouteRequest().commands(commands)).get());
// start index closing threads
final List<Thread> threads = new ArrayList<>();
for (final String indexToClose : indices) {
final Thread thread = new Thread(() -> {
try {
latch.await();
} catch (InterruptedException e) {
throw new AssertionError(e);
} finally {
release.countDown();
}
// Closing is not always acknowledged when shards are relocating: this is the case when the target shard is initializing
// or is catching up operations. In these cases the TransportVerifyShardBeforeCloseAction will detect that the global
// and max sequence number don't match and will not ack the close.
AcknowledgedResponse closeResponse = client().admin().indices().prepareClose(indexToClose).get();
if (closeResponse.isAcknowledged()) {
assertTrue("Index closing should not be acknowledged twice", acknowledgedCloses.add(indexToClose));
}
});
threads.add(thread);
thread.start();
}
latch.countDown();
for (Thread thread : threads) {
thread.join();
}
// stop indexers first without waiting for stop to not redundantly index on some while waiting for another one to stop
for (BackgroundIndexer indexer : indexers.values()) {
indexer.stop();
}
for (Map.Entry<String, BackgroundIndexer> entry : indexers.entrySet()) {
final BackgroundIndexer indexer = entry.getValue();
indexer.awaitStopped();
final String indexName = entry.getKey();
docsPerIndex.computeIfPresent(indexName, (key, value) -> value + indexer.totalIndexedDocs());
}
for (String index : indices) {
if (acknowledgedCloses.contains(index)) {
assertIndexIsClosed(index);
} else {
assertIndexIsOpened(index);
}
}
targetTransportService.clearAllRules();
// If a shard recovery has been interrupted, we expect its index to be closed
interruptedRecoveries.forEach(CloseIndexIT::assertIndexIsClosed);
assertThat("Consider that the test failed if no indices were successfully closed", acknowledgedCloses.size(), greaterThan(0));
assertAcked(client().admin().indices().prepareOpen("index-*"));
ensureGreen(indices);
for (String index : acknowledgedCloses) {
long docsCount = client().prepareSearch(index).setSize(0).setTrackTotalHits(true).get().getHits().getTotalHits().value;
assertEquals("Expected " + docsPerIndex.get(index) + " docs in index " + index + " but got " + docsCount + " (close acknowledged=" + acknowledgedCloses.contains(index) + ")", (long) docsPerIndex.get(index), docsCount);
}
} finally {
assertAcked(client().admin().cluster().prepareUpdateSettings().setTransientSettings(Settings.builder().putNull(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey())));
}
}
Aggregations