use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class RetentionLeaseIT method testRetentionLeasesSyncOnRecovery.
public void testRetentionLeasesSyncOnRecovery() throws Exception {
final int numberOfReplicas = 2 - scaledRandomIntBetween(0, 2);
internalCluster().ensureAtLeastNumDataNodes(1 + numberOfReplicas);
/*
* We effectively disable the background sync to ensure that the retention leases are not synced in the background so that the only
* source of retention leases on the replicas would be from recovery.
*/
final Settings.Builder settings = Settings.builder().put("index.number_of_shards", 1).put("index.number_of_replicas", 0).put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), TimeValue.timeValueHours(24));
// when we increase the number of replicas below we want to exclude the replicas from being allocated so that they do not recover
assertAcked(prepareCreate("index", 1, settings));
ensureYellow("index");
final AcknowledgedResponse response = client().admin().indices().prepareUpdateSettings("index").setSettings(Settings.builder().put("index.number_of_replicas", numberOfReplicas).build()).get();
assertTrue(response.isAcknowledged());
final String primaryShardNodeId = clusterService().state().routingTable().index("index").shard(0).primaryShard().currentNodeId();
final String primaryShardNodeName = clusterService().state().nodes().get(primaryShardNodeId).getName();
final IndexShard primary = internalCluster().getInstance(IndicesService.class, primaryShardNodeName).getShardOrNull(new ShardId(resolveIndex("index"), 0));
final int length = randomIntBetween(1, 8);
final Map<String, RetentionLease> currentRetentionLeases = new LinkedHashMap<>();
logger.info("adding retention [{}}] leases", length);
for (int i = 0; i < length; i++) {
final String id = randomValueOtherThanMany(currentRetentionLeases.keySet()::contains, () -> randomAlphaOfLength(8));
final long retainingSequenceNumber = randomLongBetween(0, Long.MAX_VALUE);
final String source = randomAlphaOfLength(8);
final CountDownLatch latch = new CountDownLatch(1);
final ActionListener<ReplicationResponse> listener = countDownLatchListener(latch);
currentRetentionLeases.put(id, primary.addRetentionLease(id, retainingSequenceNumber, source, listener));
latch.await();
}
logger.info("finished adding [{}] retention leases", length);
// cause some recoveries to fail to ensure that retention leases are handled properly when retrying a recovery
assertAcked(client().admin().cluster().prepareUpdateSettings().setPersistentSettings(Settings.builder().put(INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), TimeValue.timeValueMillis(100))));
final Semaphore recoveriesToDisrupt = new Semaphore(scaledRandomIntBetween(0, 4));
final MockTransportService primaryTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, primaryShardNodeName);
primaryTransportService.addSendBehavior((connection, requestId, action, request, options) -> {
if (action.equals(PeerRecoveryTargetService.Actions.FINALIZE) && recoveriesToDisrupt.tryAcquire()) {
if (randomBoolean()) {
// return a ConnectTransportException to the START_RECOVERY action
final TransportService replicaTransportService = internalCluster().getInstance(TransportService.class, connection.getNode().getName());
final DiscoveryNode primaryNode = primaryTransportService.getLocalNode();
replicaTransportService.disconnectFromNode(primaryNode);
replicaTransportService.connectToNode(primaryNode);
} else {
// return an exception to the FINALIZE action
throw new OpenSearchException("failing recovery for test purposes");
}
}
connection.sendRequest(requestId, action, request, options);
});
logger.info("allow [{}] replicas to allocate", numberOfReplicas);
// now allow the replicas to be allocated and wait for recovery to finalize
allowNodes("index", 1 + numberOfReplicas);
ensureGreen("index");
// check current retention leases have been synced to all replicas
for (final ShardRouting replicaShard : clusterService().state().routingTable().index("index").shard(0).replicaShards()) {
final String replicaShardNodeId = replicaShard.currentNodeId();
final String replicaShardNodeName = clusterService().state().nodes().get(replicaShardNodeId).getName();
final IndexShard replica = internalCluster().getInstance(IndicesService.class, replicaShardNodeName).getShardOrNull(new ShardId(resolveIndex("index"), 0));
final Map<String, RetentionLease> retentionLeasesOnReplica = RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(replica.getRetentionLeases());
assertThat(retentionLeasesOnReplica, equalTo(currentRetentionLeases));
// check retention leases have been written on the replica; see RecoveryTarget#finalizeRecovery
assertThat(currentRetentionLeases, equalTo(RetentionLeaseUtils.toMapExcludingPeerRecoveryRetentionLeases(replica.loadRetentionLeases())));
}
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class ShardIndexingPressureSettingsIT method getPrimaryReplicaNodeNames.
private Tuple<String, String> getPrimaryReplicaNodeNames(String indexName) {
IndicesStatsResponse response = client().admin().indices().prepareStats(indexName).get();
String primaryId = Stream.of(response.getShards()).map(ShardStats::getShardRouting).filter(ShardRouting::primary).findAny().get().currentNodeId();
String replicaId = Stream.of(response.getShards()).map(ShardStats::getShardRouting).filter(sr -> sr.primary() == false).findAny().get().currentNodeId();
DiscoveryNodes nodes = client().admin().cluster().prepareState().get().getState().nodes();
String primaryName = nodes.get(primaryId).getName();
String replicaName = nodes.get(replicaId).getName();
return new Tuple<>(primaryName, replicaName);
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class SearchStatsIT method nodeIdsWithIndex.
private Set<String> nodeIdsWithIndex(String... indices) {
ClusterState state = client().admin().cluster().prepareState().get().getState();
GroupShardsIterator<ShardIterator> allAssignedShardsGrouped = state.routingTable().allAssignedShardsGrouped(indices, true);
Set<String> nodes = new HashSet<>();
for (ShardIterator shardIterator : allAssignedShardsGrouped) {
for (ShardRouting routing : shardIterator) {
if (routing.active()) {
nodes.add(routing.currentNodeId());
}
}
}
return nodes;
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class InternalClusterInfoService method buildShardLevelInfo.
static void buildShardLevelInfo(Logger logger, ShardStats[] stats, ImmutableOpenMap.Builder<String, Long> shardSizes, ImmutableOpenMap.Builder<ShardRouting, String> newShardRoutingToDataPath, Map<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace.Builder> reservedSpaceByShard) {
for (ShardStats s : stats) {
final ShardRouting shardRouting = s.getShardRouting();
newShardRoutingToDataPath.put(shardRouting, s.getDataPath());
final StoreStats storeStats = s.getStats().getStore();
if (storeStats == null) {
continue;
}
final long size = storeStats.sizeInBytes();
final long reserved = storeStats.getReservedSize().getBytes();
final String shardIdentifier = ClusterInfo.shardIdentifierFromRouting(shardRouting);
logger.trace("shard: {} size: {} reserved: {}", shardIdentifier, size, reserved);
shardSizes.put(shardIdentifier, size);
if (reserved != StoreStats.UNKNOWN_RESERVED_BYTES) {
final ClusterInfo.ReservedSpace.Builder reservedSpaceBuilder = reservedSpaceByShard.computeIfAbsent(new ClusterInfo.NodeAndPath(shardRouting.currentNodeId(), s.getDataPath()), t -> new ClusterInfo.ReservedSpace.Builder());
reservedSpaceBuilder.add(shardRouting.shardId(), reserved);
}
}
}
use of org.opensearch.cluster.routing.ShardRouting in project OpenSearch by opensearch-project.
the class InternalClusterInfoService method refresh.
/**
* Refreshes the ClusterInfo in a blocking fashion
*/
public final ClusterInfo refresh() {
logger.trace("refreshing cluster info");
final CountDownLatch nodeLatch = updateNodeStats(new ActionListener<NodesStatsResponse>() {
@Override
public void onResponse(NodesStatsResponse nodesStatsResponse) {
ImmutableOpenMap.Builder<String, DiskUsage> leastAvailableUsagesBuilder = ImmutableOpenMap.builder();
ImmutableOpenMap.Builder<String, DiskUsage> mostAvailableUsagesBuilder = ImmutableOpenMap.builder();
fillDiskUsagePerNode(logger, adjustNodesStats(nodesStatsResponse.getNodes()), leastAvailableUsagesBuilder, mostAvailableUsagesBuilder);
leastAvailableSpaceUsages = leastAvailableUsagesBuilder.build();
mostAvailableSpaceUsages = mostAvailableUsagesBuilder.build();
}
@Override
public void onFailure(Exception e) {
if (e instanceof ReceiveTimeoutTransportException) {
logger.error("NodeStatsAction timed out for ClusterInfoUpdateJob", e);
} else {
if (e instanceof ClusterBlockException) {
if (logger.isTraceEnabled()) {
logger.trace("Failed to execute NodeStatsAction for ClusterInfoUpdateJob", e);
}
} else {
logger.warn("Failed to execute NodeStatsAction for ClusterInfoUpdateJob", e);
}
// we empty the usages list, to be safe - we don't know what's going on.
leastAvailableSpaceUsages = ImmutableOpenMap.of();
mostAvailableSpaceUsages = ImmutableOpenMap.of();
}
}
});
final CountDownLatch indicesLatch = updateIndicesStats(new ActionListener<IndicesStatsResponse>() {
@Override
public void onResponse(IndicesStatsResponse indicesStatsResponse) {
final ShardStats[] stats = indicesStatsResponse.getShards();
final ImmutableOpenMap.Builder<String, Long> shardSizeByIdentifierBuilder = ImmutableOpenMap.builder();
final ImmutableOpenMap.Builder<ShardRouting, String> dataPathByShardRoutingBuilder = ImmutableOpenMap.builder();
final Map<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace.Builder> reservedSpaceBuilders = new HashMap<>();
buildShardLevelInfo(logger, stats, shardSizeByIdentifierBuilder, dataPathByShardRoutingBuilder, reservedSpaceBuilders);
final ImmutableOpenMap.Builder<ClusterInfo.NodeAndPath, ClusterInfo.ReservedSpace> rsrvdSpace = ImmutableOpenMap.builder();
reservedSpaceBuilders.forEach((nodeAndPath, builder) -> rsrvdSpace.put(nodeAndPath, builder.build()));
indicesStatsSummary = new IndicesStatsSummary(shardSizeByIdentifierBuilder.build(), dataPathByShardRoutingBuilder.build(), rsrvdSpace.build());
}
@Override
public void onFailure(Exception e) {
if (e instanceof ReceiveTimeoutTransportException) {
logger.error("IndicesStatsAction timed out for ClusterInfoUpdateJob", e);
} else {
if (e instanceof ClusterBlockException) {
if (logger.isTraceEnabled()) {
logger.trace("Failed to execute IndicesStatsAction for ClusterInfoUpdateJob", e);
}
} else {
logger.warn("Failed to execute IndicesStatsAction for ClusterInfoUpdateJob", e);
}
// we empty the usages list, to be safe - we don't know what's going on.
indicesStatsSummary = IndicesStatsSummary.EMPTY;
}
}
});
try {
if (nodeLatch.await(fetchTimeout.getMillis(), TimeUnit.MILLISECONDS) == false) {
logger.warn("Failed to update node information for ClusterInfoUpdateJob within {} timeout", fetchTimeout);
}
} catch (InterruptedException e) {
// restore interrupt status
Thread.currentThread().interrupt();
}
try {
if (indicesLatch.await(fetchTimeout.getMillis(), TimeUnit.MILLISECONDS) == false) {
logger.warn("Failed to update shard information for ClusterInfoUpdateJob within {} timeout", fetchTimeout);
}
} catch (InterruptedException e) {
// restore interrupt status
Thread.currentThread().interrupt();
}
ClusterInfo clusterInfo = getClusterInfo();
boolean anyListeners = false;
for (final Consumer<ClusterInfo> listener : listeners) {
anyListeners = true;
try {
logger.trace("notifying [{}] of new cluster info", listener);
listener.accept(clusterInfo);
} catch (Exception e) {
logger.info(new ParameterizedMessage("failed to notify [{}] of new cluster info", listener), e);
}
}
assert anyListeners : "expected to notify at least one listener";
return clusterInfo;
}
Aggregations