use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class IndexRecoveryIT method testUsesFileBasedRecoveryIfRetentionLeaseAheadOfGlobalCheckpoint.
public void testUsesFileBasedRecoveryIfRetentionLeaseAheadOfGlobalCheckpoint() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(2);
String indexName = "test-index";
createIndex(indexName, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "12h").build());
indexRandom(randomBoolean(), randomBoolean(), randomBoolean(), IntStream.range(0, between(0, 100)).mapToObj(n -> client().prepareIndex(indexName).setSource("num", n)).collect(toList()));
ensureGreen(indexName);
final ShardId shardId = new ShardId(resolveIndex(indexName), 0);
final DiscoveryNodes discoveryNodes = clusterService().state().nodes();
final IndexShardRoutingTable indexShardRoutingTable = clusterService().state().routingTable().shardRoutingTable(shardId);
final IndexShard primary = internalCluster().getInstance(IndicesService.class, discoveryNodes.get(indexShardRoutingTable.primaryShard().currentNodeId()).getName()).getShardOrNull(shardId);
final ShardRouting replicaShardRouting = indexShardRoutingTable.replicaShards().get(0);
internalCluster().restartNode(discoveryNodes.get(replicaShardRouting.currentNodeId()).getName(), new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes(Integer.toString(discoveryNodes.getSize() - 1)).setWaitForEvents(Priority.LANGUID).get().isTimedOut());
indexRandom(randomBoolean(), randomBoolean(), randomBoolean(), IntStream.range(0, between(1, 100)).mapToObj(n -> client().prepareIndex(indexName).setSource("num", n)).collect(toList()));
// We do not guarantee that the replica can recover locally all the way to its own global checkpoint before starting
// to recover from the primary, so we must be careful not to perform an operations-based recovery if this would require
// some operations that are not being retained. Emulate this by advancing the lease ahead of the replica's GCP:
primary.renewRetentionLease(ReplicationTracker.getPeerRecoveryRetentionLeaseId(replicaShardRouting), primary.seqNoStats().getMaxSeqNo() + 1, ReplicationTracker.PEER_RECOVERY_RETENTION_LEASE_SOURCE);
return super.onNodeStopped(nodeName);
}
});
ensureGreen(indexName);
// noinspection OptionalGetWithoutIsPresent because it fails the test if absent
final RecoveryState recoveryState = client().admin().indices().prepareRecoveries(indexName).get().shardRecoveryStates().get(indexName).stream().filter(rs -> rs.getPrimary() == false).findFirst().get();
assertThat(recoveryState.getIndex().totalFileCount(), greaterThan(0));
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class IndexRecoveryIT method testUsesFileBasedRecoveryIfOperationsBasedRecoveryWouldBeUnreasonable.
public void testUsesFileBasedRecoveryIfOperationsBasedRecoveryWouldBeUnreasonable() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(2);
String indexName = "test-index";
final Settings.Builder settings = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "12h").put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "100ms");
final double reasonableOperationsBasedRecoveryProportion;
if (randomBoolean()) {
reasonableOperationsBasedRecoveryProportion = randomDoubleBetween(0.05, 0.99, true);
settings.put(IndexSettings.FILE_BASED_RECOVERY_THRESHOLD_SETTING.getKey(), reasonableOperationsBasedRecoveryProportion);
} else {
reasonableOperationsBasedRecoveryProportion = IndexSettings.FILE_BASED_RECOVERY_THRESHOLD_SETTING.get(Settings.EMPTY);
}
logger.info("--> performing ops-based recoveries up to [{}%] of docs", reasonableOperationsBasedRecoveryProportion * 100.0);
createIndex(indexName, settings.build());
indexRandom(randomBoolean(), false, randomBoolean(), IntStream.range(0, between(0, 100)).mapToObj(n -> client().prepareIndex(indexName).setSource("num", n)).collect(toList()));
ensureGreen(indexName);
flush(indexName);
// wait for all history to be discarded
assertBusy(() -> {
for (ShardStats shardStats : client().admin().indices().prepareStats(indexName).get().getShards()) {
final long maxSeqNo = shardStats.getSeqNoStats().getMaxSeqNo();
assertTrue(shardStats.getRetentionLeaseStats().retentionLeases() + " should discard history up to " + maxSeqNo, shardStats.getRetentionLeaseStats().retentionLeases().leases().stream().allMatch(l -> l.retainingSequenceNumber() == maxSeqNo + 1));
}
});
// ensure that all operations are in the safe commit
flush(indexName);
final ShardStats shardStats = client().admin().indices().prepareStats(indexName).get().getShards()[0];
final long docCount = shardStats.getStats().docs.getCount();
assertThat(shardStats.getStats().docs.getDeleted(), equalTo(0L));
assertThat(shardStats.getSeqNoStats().getMaxSeqNo() + 1, equalTo(docCount));
final ShardId shardId = new ShardId(resolveIndex(indexName), 0);
final DiscoveryNodes discoveryNodes = clusterService().state().nodes();
final IndexShardRoutingTable indexShardRoutingTable = clusterService().state().routingTable().shardRoutingTable(shardId);
final ShardRouting replicaShardRouting = indexShardRoutingTable.replicaShards().get(0);
assertTrue("should have lease for " + replicaShardRouting, client().admin().indices().prepareStats(indexName).get().getShards()[0].getRetentionLeaseStats().retentionLeases().contains(ReplicationTracker.getPeerRecoveryRetentionLeaseId(replicaShardRouting)));
internalCluster().restartNode(discoveryNodes.get(replicaShardRouting.currentNodeId()).getName(), new InternalTestCluster.RestartCallback() {
@Override
public Settings onNodeStopped(String nodeName) throws Exception {
assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes(Integer.toString(discoveryNodes.getSize() - 1)).setWaitForEvents(Priority.LANGUID).get().isTimedOut());
final int newDocCount = Math.toIntExact(Math.round(Math.ceil((1 + Math.ceil(docCount * reasonableOperationsBasedRecoveryProportion)) / (1 - reasonableOperationsBasedRecoveryProportion))));
/*
* newDocCount >= (ceil(docCount * p) + 1) / (1-p)
*
* ==> 0 <= newDocCount * (1-p) - ceil(docCount * p) - 1
* = newDocCount - (newDocCount * p + ceil(docCount * p) + 1)
* < newDocCount - (ceil(newDocCount * p) + ceil(docCount * p))
* <= newDocCount - ceil(newDocCount * p + docCount * p)
*
* ==> docCount < newDocCount + docCount - ceil((newDocCount + docCount) * p)
* == localCheckpoint + 1 - ceil((newDocCount + docCount) * p)
* == firstReasonableSeqNo
*
* The replica has docCount docs, i.e. has operations with seqnos [0..docCount-1], so a seqno-based recovery will start
* from docCount < firstReasonableSeqNo
*
* ==> it is unreasonable to recover the replica using a seqno-based recovery
*/
indexRandom(randomBoolean(), randomBoolean(), randomBoolean(), IntStream.range(0, newDocCount).mapToObj(n -> client().prepareIndex(indexName).setSource("num", n)).collect(toList()));
flush(indexName);
assertBusy(() -> assertFalse("should no longer have lease for " + replicaShardRouting, client().admin().indices().prepareStats(indexName).get().getShards()[0].getRetentionLeaseStats().retentionLeases().contains(ReplicationTracker.getPeerRecoveryRetentionLeaseId(replicaShardRouting))));
return super.onNodeStopped(nodeName);
}
});
ensureGreen(indexName);
// noinspection OptionalGetWithoutIsPresent because it fails the test if absent
final RecoveryState recoveryState = client().admin().indices().prepareRecoveries(indexName).get().shardRecoveryStates().get(indexName).stream().filter(rs -> rs.getPrimary() == false).findFirst().get();
assertThat(recoveryState.getIndex().totalFileCount(), greaterThan(0));
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class SuggestStatsIT method nodeIdsWithIndex.
private Set<String> nodeIdsWithIndex(String... indices) {
ClusterState state = client().admin().cluster().prepareState().execute().actionGet().getState();
GroupShardsIterator<ShardIterator> allAssignedShardsGrouped = state.routingTable().allAssignedShardsGrouped(indices, true);
Set<String> nodes = new HashSet<>();
for (ShardIterator shardIterator : allAssignedShardsGrouped) {
for (ShardRouting routing : shardIterator) {
if (routing.active()) {
nodes.add(routing.currentNodeId());
}
}
}
return nodes;
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class CorruptedFileIT method testCorruptionOnNetworkLayer.
/**
* Tests corruption that happens on the network layer and that the primary does not get affected by corruption that happens on the way
* to the replica. The file on disk stays uncorrupted
*/
public void testCorruptionOnNetworkLayer() throws ExecutionException, InterruptedException {
int numDocs = scaledRandomIntBetween(100, 1000);
internalCluster().ensureAtLeastNumDataNodes(2);
if (cluster().numDataNodes() < 3) {
internalCluster().startDataOnlyNode();
}
NodesStatsResponse nodeStats = client().admin().cluster().prepareNodesStats().get();
List<NodeStats> dataNodeStats = new ArrayList<>();
for (NodeStats stat : nodeStats.getNodes()) {
if (stat.getNode().isDataNode()) {
dataNodeStats.add(stat);
}
}
assertThat(dataNodeStats.size(), greaterThanOrEqualTo(2));
Collections.shuffle(dataNodeStats, random());
NodeStats primariesNode = dataNodeStats.get(0);
NodeStats unluckyNode = dataNodeStats.get(1);
assertAcked(prepareCreate("test").setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, "0").put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, // don't go crazy here it must recovery fast
between(1, 4)).put(MockFSIndexStore.INDEX_CHECK_INDEX_ON_CLOSE_SETTING.getKey(), false).put("index.routing.allocation.include._name", primariesNode.getNode().getName()).put(EnableAllocationDecider.INDEX_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), EnableAllocationDecider.Rebalance.NONE)));
ensureGreen();
IndexRequestBuilder[] builders = new IndexRequestBuilder[numDocs];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex("test").setSource("field", "value");
}
indexRandom(true, builders);
ensureGreen();
assertAllSuccessful(client().admin().indices().prepareFlush().setForce(true).execute().actionGet());
// we have to flush at least once here since we don't corrupt the translog
SearchResponse countResponse = client().prepareSearch().setSize(0).get();
assertHitCount(countResponse, numDocs);
final boolean truncate = randomBoolean();
for (NodeStats dataNode : dataNodeStats) {
MockTransportService mockTransportService = ((MockTransportService) internalCluster().getInstance(TransportService.class, dataNode.getNode().getName()));
mockTransportService.addSendBehavior(internalCluster().getInstance(TransportService.class, unluckyNode.getNode().getName()), (connection, requestId, action, request, options) -> {
if (action.equals(PeerRecoveryTargetService.Actions.FILE_CHUNK)) {
RecoveryFileChunkRequest req = (RecoveryFileChunkRequest) request;
if (truncate && req.length() > 1) {
BytesRef bytesRef = req.content().toBytesRef();
BytesArray array = new BytesArray(bytesRef.bytes, bytesRef.offset, (int) req.length() - 1);
request = new RecoveryFileChunkRequest(req.recoveryId(), req.requestSeqNo(), req.shardId(), req.metadata(), req.position(), array, req.lastChunk(), req.totalTranslogOps(), req.sourceThrottleTimeInNanos());
} else {
assert req.content().toBytesRef().bytes == req.content().toBytesRef().bytes : "no internal reference!!";
final byte[] array = req.content().toBytesRef().bytes;
int i = randomIntBetween(0, req.content().length() - 1);
// flip one byte in the content
array[i] = (byte) ~array[i];
}
}
connection.sendRequest(requestId, action, request, options);
});
}
Settings build = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, "1").put("index.routing.allocation.include._name", "*").build();
client().admin().indices().prepareUpdateSettings("test").setSettings(build).get();
client().admin().cluster().prepareReroute().get();
ClusterHealthResponse actionGet = client().admin().cluster().health(Requests.clusterHealthRequest("test").waitForGreenStatus()).actionGet();
if (actionGet.isTimedOut()) {
logger.info("ensureGreen timed out, cluster state:\n{}\n{}", client().admin().cluster().prepareState().get().getState(), client().admin().cluster().preparePendingClusterTasks().get());
assertThat("timed out waiting for green state", actionGet.isTimedOut(), equalTo(false));
}
// we are green so primaries got not corrupted.
// ensure that no shard is actually allocated on the unlucky node
ClusterStateResponse clusterStateResponse = client().admin().cluster().prepareState().get();
for (IndexShardRoutingTable table : clusterStateResponse.getState().getRoutingTable().index("test")) {
for (ShardRouting routing : table) {
if (unluckyNode.getNode().getId().equals(routing.currentNodeId())) {
assertThat(routing.state(), not(equalTo(ShardRoutingState.STARTED)));
assertThat(routing.state(), not(equalTo(ShardRoutingState.RELOCATING)));
}
}
}
final int numIterations = scaledRandomIntBetween(5, 20);
for (int i = 0; i < numIterations; i++) {
SearchResponse response = client().prepareSearch().setSize(numDocs).get();
assertHitCount(response, numDocs);
}
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class CorruptedFileIT method corruptRandomPrimaryFile.
private ShardRouting corruptRandomPrimaryFile(final boolean includePerCommitFiles) throws IOException {
ClusterState state = client().admin().cluster().prepareState().get().getState();
Index test = state.metadata().index("test").getIndex();
GroupShardsIterator shardIterators = state.getRoutingTable().activePrimaryShardsGrouped(new String[] { "test" }, false);
List<ShardIterator> iterators = iterableAsArrayList(shardIterators);
ShardIterator shardIterator = RandomPicks.randomFrom(random(), iterators);
ShardRouting shardRouting = shardIterator.nextOrNull();
assertNotNull(shardRouting);
assertTrue(shardRouting.primary());
assertTrue(shardRouting.assignedToNode());
String nodeId = shardRouting.currentNodeId();
NodesStatsResponse nodeStatses = client().admin().cluster().prepareNodesStats(nodeId).addMetric(FS.metricName()).get();
// treeset makes sure iteration order is deterministic
Set<Path> files = new TreeSet<>();
for (FsInfo.Path info : nodeStatses.getNodes().get(0).getFs()) {
String path = info.getPath();
Path file = PathUtils.get(path).resolve("indices").resolve(test.getUUID()).resolve(Integer.toString(shardRouting.getId())).resolve("index");
if (Files.exists(file)) {
// multi data path might only have one path in use
try (Directory dir = FSDirectory.open(file)) {
SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir);
if (includePerCommitFiles) {
files.add(file.resolve(segmentCommitInfos.getSegmentsFileName()));
}
for (SegmentCommitInfo commitInfo : segmentCommitInfos) {
if (commitInfo.getDelCount() + commitInfo.getSoftDelCount() == commitInfo.info.maxDoc()) {
// don't corrupt fully deleted segments - they might be removed on snapshot
continue;
}
for (String commitFile : commitInfo.files()) {
if (includePerCommitFiles || isPerSegmentFile(commitFile)) {
files.add(file.resolve(commitFile));
}
}
}
}
}
}
CorruptionUtils.corruptFile(random(), files.toArray(new Path[0]));
return shardRouting;
}
Aggregations