use of org.opensearch.cluster.routing.IndexShardRoutingTable in project OpenSearch by opensearch-project.
the class TransportBroadcastReplicationAction method shards.
/**
* @return all shard ids the request should run on
*/
protected List<ShardId> shards(Request request, ClusterState clusterState) {
List<ShardId> shardIds = new ArrayList<>();
String[] concreteIndices = indexNameExpressionResolver.concreteIndexNames(clusterState, request);
for (String index : concreteIndices) {
IndexMetadata indexMetadata = clusterState.metadata().getIndices().get(index);
if (indexMetadata != null) {
for (IntObjectCursor<IndexShardRoutingTable> shardRouting : clusterState.getRoutingTable().indicesRouting().get(index).getShards()) {
shardIds.add(shardRouting.value.shardId());
}
}
}
return shardIds;
}
use of org.opensearch.cluster.routing.IndexShardRoutingTable in project OpenSearch by opensearch-project.
the class IndexMetadataUpdater method updateInSyncAllocations.
/**
* Updates in-sync allocations with routing changes that were made to the routing table.
*/
private IndexMetadata.Builder updateInSyncAllocations(RoutingTable newRoutingTable, IndexMetadata oldIndexMetadata, IndexMetadata.Builder indexMetadataBuilder, ShardId shardId, Updates updates) {
assert Sets.haveEmptyIntersection(updates.addedAllocationIds, updates.removedAllocationIds) : "allocation ids cannot be both added and removed in the same allocation round, added ids: " + updates.addedAllocationIds + ", removed ids: " + updates.removedAllocationIds;
Set<String> oldInSyncAllocationIds = oldIndexMetadata.inSyncAllocationIds(shardId.id());
// check if we have been force-initializing an empty primary or a stale primary
if (updates.initializedPrimary != null && oldInSyncAllocationIds.isEmpty() == false && oldInSyncAllocationIds.contains(updates.initializedPrimary.allocationId().getId()) == false) {
// we're not reusing an existing in-sync allocation id to initialize a primary, which means that we're either force-allocating
// an empty or a stale primary (see AllocateEmptyPrimaryAllocationCommand or AllocateStalePrimaryAllocationCommand).
RecoverySource recoverySource = updates.initializedPrimary.recoverySource();
RecoverySource.Type recoverySourceType = recoverySource.getType();
boolean emptyPrimary = recoverySourceType == RecoverySource.Type.EMPTY_STORE;
assert updates.addedAllocationIds.isEmpty() : (emptyPrimary ? "empty" : "stale") + " primary is not force-initialized in same allocation round where shards are started";
if (indexMetadataBuilder == null) {
indexMetadataBuilder = IndexMetadata.builder(oldIndexMetadata);
}
if (emptyPrimary) {
// forcing an empty primary resets the in-sync allocations to the empty set (ShardRouting.allocatedPostIndexCreate)
indexMetadataBuilder.putInSyncAllocationIds(shardId.id(), Collections.emptySet());
} else {
final String allocationId;
if (recoverySource == RecoverySource.ExistingStoreRecoverySource.FORCE_STALE_PRIMARY_INSTANCE) {
allocationId = RecoverySource.ExistingStoreRecoverySource.FORCED_ALLOCATION_ID;
} else {
assert recoverySource instanceof RecoverySource.SnapshotRecoverySource : recoverySource;
allocationId = updates.initializedPrimary.allocationId().getId();
}
// forcing a stale primary resets the in-sync allocations to the singleton set with the stale id
indexMetadataBuilder.putInSyncAllocationIds(shardId.id(), Collections.singleton(allocationId));
}
} else {
// standard path for updating in-sync ids
Set<String> inSyncAllocationIds = new HashSet<>(oldInSyncAllocationIds);
inSyncAllocationIds.addAll(updates.addedAllocationIds);
inSyncAllocationIds.removeAll(updates.removedAllocationIds);
assert oldInSyncAllocationIds.contains(RecoverySource.ExistingStoreRecoverySource.FORCED_ALLOCATION_ID) == false || inSyncAllocationIds.contains(RecoverySource.ExistingStoreRecoverySource.FORCED_ALLOCATION_ID) == false : "fake allocation id has to be removed, inSyncAllocationIds:" + inSyncAllocationIds;
// Prevent set of inSyncAllocationIds to grow unboundedly. This can happen for example if we don't write to a primary
// but repeatedly shut down nodes that have active replicas.
// We use number_of_replicas + 1 (= possible active shard copies) to bound the inSyncAllocationIds set
// Only trim the set of allocation ids when it grows, otherwise we might trim too eagerly when the number
// of replicas was decreased while shards were unassigned.
// +1 for the primary
int maxActiveShards = oldIndexMetadata.getNumberOfReplicas() + 1;
IndexShardRoutingTable newShardRoutingTable = newRoutingTable.shardRoutingTable(shardId);
assert newShardRoutingTable.assignedShards().stream().filter(ShardRouting::isRelocationTarget).map(s -> s.allocationId().getId()).noneMatch(inSyncAllocationIds::contains) : newShardRoutingTable.assignedShards() + " vs " + inSyncAllocationIds;
if (inSyncAllocationIds.size() > oldInSyncAllocationIds.size() && inSyncAllocationIds.size() > maxActiveShards) {
// trim entries that have no corresponding shard routing in the cluster state (i.e. trim unavailable copies)
List<ShardRouting> assignedShards = newShardRoutingTable.assignedShards().stream().filter(s -> s.isRelocationTarget() == false).collect(Collectors.toList());
assert assignedShards.size() <= maxActiveShards : "cannot have more assigned shards " + assignedShards + " than maximum possible active shards " + maxActiveShards;
Set<String> assignedAllocations = assignedShards.stream().map(s -> s.allocationId().getId()).collect(Collectors.toSet());
inSyncAllocationIds = inSyncAllocationIds.stream().sorted(// values with routing entries first
Comparator.comparing(assignedAllocations::contains).reversed()).limit(maxActiveShards).collect(Collectors.toSet());
}
// in-sync set, this could create an empty primary on the next allocation.
if (newShardRoutingTable.activeShards().isEmpty() && updates.firstFailedPrimary != null) {
// add back allocation id of failed primary
inSyncAllocationIds.add(updates.firstFailedPrimary.allocationId().getId());
}
assert inSyncAllocationIds.isEmpty() == false || oldInSyncAllocationIds.isEmpty() : "in-sync allocations cannot become empty after they have been non-empty: " + oldInSyncAllocationIds;
// be extra safe here and only update in-sync set if it is non-empty
if (inSyncAllocationIds.isEmpty() == false) {
if (indexMetadataBuilder == null) {
indexMetadataBuilder = IndexMetadata.builder(oldIndexMetadata);
}
indexMetadataBuilder.putInSyncAllocationIds(shardId.id(), inSyncAllocationIds);
}
}
return indexMetadataBuilder;
}
use of org.opensearch.cluster.routing.IndexShardRoutingTable in project OpenSearch by opensearch-project.
the class ClusterStateDiffIT method randomChangeToIndexRoutingTable.
/**
* Randomly updates index routing table in the cluster state
*/
private IndexRoutingTable randomChangeToIndexRoutingTable(IndexRoutingTable original, String[] nodes) {
IndexRoutingTable.Builder builder = IndexRoutingTable.builder(original.getIndex());
for (ObjectCursor<IndexShardRoutingTable> indexShardRoutingTable : original.shards().values()) {
Set<String> availableNodes = Sets.newHashSet(nodes);
for (ShardRouting shardRouting : indexShardRoutingTable.value.shards()) {
availableNodes.remove(shardRouting.currentNodeId());
if (shardRouting.relocating()) {
availableNodes.remove(shardRouting.relocatingNodeId());
}
}
for (ShardRouting shardRouting : indexShardRoutingTable.value.shards()) {
final ShardRouting updatedShardRouting = randomChange(shardRouting, availableNodes);
availableNodes.remove(updatedShardRouting.currentNodeId());
if (shardRouting.relocating()) {
availableNodes.remove(updatedShardRouting.relocatingNodeId());
}
builder.addShard(updatedShardRouting);
}
}
return builder.build();
}
use of org.opensearch.cluster.routing.IndexShardRoutingTable in project OpenSearch by opensearch-project.
the class ReplicaToPrimaryPromotionIT method testPromoteReplicaToPrimary.
public void testPromoteReplicaToPrimary() throws Exception {
final String indexName = randomAlphaOfLength(5).toLowerCase(Locale.ROOT);
createIndex(indexName);
final int numOfDocs = scaledRandomIntBetween(0, 200);
if (numOfDocs > 0) {
try (BackgroundIndexer indexer = new BackgroundIndexer(indexName, "_doc", client(), numOfDocs)) {
waitForDocs(numOfDocs, indexer);
}
refresh(indexName);
}
assertHitCount(client().prepareSearch(indexName).setSize(0).get(), numOfDocs);
ensureGreen(indexName);
// sometimes test with a closed index
final IndexMetadata.State indexState = randomFrom(IndexMetadata.State.OPEN, IndexMetadata.State.CLOSE);
if (indexState == IndexMetadata.State.CLOSE) {
CloseIndexResponse closeIndexResponse = client().admin().indices().prepareClose(indexName).get();
assertThat("close index not acked - " + closeIndexResponse, closeIndexResponse.isAcknowledged(), equalTo(true));
ensureGreen(indexName);
}
// pick up a data node that contains a random primary shard
ClusterState state = client(internalCluster().getMasterName()).admin().cluster().prepareState().get().getState();
final int numShards = state.metadata().index(indexName).getNumberOfShards();
final ShardRouting primaryShard = state.routingTable().index(indexName).shard(randomIntBetween(0, numShards - 1)).primaryShard();
final DiscoveryNode randomNode = state.nodes().resolveNode(primaryShard.currentNodeId());
// stop the random data node, all remaining shards are promoted to primaries
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(randomNode.getName()));
ensureYellowAndNoInitializingShards(indexName);
state = client(internalCluster().getMasterName()).admin().cluster().prepareState().get().getState();
for (IndexShardRoutingTable shardRoutingTable : state.routingTable().index(indexName)) {
for (ShardRouting shardRouting : shardRoutingTable.activeShards()) {
assertThat(shardRouting + " should be promoted as a primary", shardRouting.primary(), is(true));
}
}
if (indexState == IndexMetadata.State.CLOSE) {
assertAcked(client().admin().indices().prepareOpen(indexName));
ensureYellowAndNoInitializingShards(indexName);
}
assertHitCount(client().prepareSearch(indexName).setSize(0).get(), numOfDocs);
}
use of org.opensearch.cluster.routing.IndexShardRoutingTable in project OpenSearch by opensearch-project.
the class IndexShardTestCase method startReplicaAfterRecovery.
protected void startReplicaAfterRecovery(IndexShard replica, IndexShard primary, Set<String> inSyncIds, IndexShardRoutingTable routingTable) throws IOException {
ShardRouting initializingReplicaRouting = replica.routingEntry();
IndexShardRoutingTable newRoutingTable = initializingReplicaRouting.isRelocationTarget() ? new IndexShardRoutingTable.Builder(routingTable).removeShard(primary.routingEntry()).addShard(replica.routingEntry()).build() : new IndexShardRoutingTable.Builder(routingTable).removeShard(initializingReplicaRouting).addShard(replica.routingEntry()).build();
Set<String> inSyncIdsWithReplica = new HashSet<>(inSyncIds);
inSyncIdsWithReplica.add(replica.routingEntry().allocationId().getId());
// update both primary and replica shard state
primary.updateShardState(primary.routingEntry(), primary.getPendingPrimaryTerm(), null, currentClusterStateVersion.incrementAndGet(), inSyncIdsWithReplica, newRoutingTable);
replica.updateShardState(replica.routingEntry().moveToStarted(), replica.getPendingPrimaryTerm(), null, currentClusterStateVersion.get(), inSyncIdsWithReplica, newRoutingTable);
}
Aggregations