Search in sources :

Example 11 with IndexShardRoutingTable

use of org.opensearch.cluster.routing.IndexShardRoutingTable in project OpenSearch by opensearch-project.

the class TransportBroadcastReplicationAction method shards.

/**
 * @return all shard ids the request should run on
 */
protected List<ShardId> shards(Request request, ClusterState clusterState) {
    List<ShardId> shardIds = new ArrayList<>();
    String[] concreteIndices = indexNameExpressionResolver.concreteIndexNames(clusterState, request);
    for (String index : concreteIndices) {
        IndexMetadata indexMetadata = clusterState.metadata().getIndices().get(index);
        if (indexMetadata != null) {
            for (IntObjectCursor<IndexShardRoutingTable> shardRouting : clusterState.getRoutingTable().indicesRouting().get(index).getShards()) {
                shardIds.add(shardRouting.value.shardId());
            }
        }
    }
    return shardIds;
}
Also used : ShardId(org.opensearch.index.shard.ShardId) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata)

Example 12 with IndexShardRoutingTable

use of org.opensearch.cluster.routing.IndexShardRoutingTable in project OpenSearch by opensearch-project.

the class IndexMetadataUpdater method updateInSyncAllocations.

/**
 * Updates in-sync allocations with routing changes that were made to the routing table.
 */
private IndexMetadata.Builder updateInSyncAllocations(RoutingTable newRoutingTable, IndexMetadata oldIndexMetadata, IndexMetadata.Builder indexMetadataBuilder, ShardId shardId, Updates updates) {
    assert Sets.haveEmptyIntersection(updates.addedAllocationIds, updates.removedAllocationIds) : "allocation ids cannot be both added and removed in the same allocation round, added ids: " + updates.addedAllocationIds + ", removed ids: " + updates.removedAllocationIds;
    Set<String> oldInSyncAllocationIds = oldIndexMetadata.inSyncAllocationIds(shardId.id());
    // check if we have been force-initializing an empty primary or a stale primary
    if (updates.initializedPrimary != null && oldInSyncAllocationIds.isEmpty() == false && oldInSyncAllocationIds.contains(updates.initializedPrimary.allocationId().getId()) == false) {
        // we're not reusing an existing in-sync allocation id to initialize a primary, which means that we're either force-allocating
        // an empty or a stale primary (see AllocateEmptyPrimaryAllocationCommand or AllocateStalePrimaryAllocationCommand).
        RecoverySource recoverySource = updates.initializedPrimary.recoverySource();
        RecoverySource.Type recoverySourceType = recoverySource.getType();
        boolean emptyPrimary = recoverySourceType == RecoverySource.Type.EMPTY_STORE;
        assert updates.addedAllocationIds.isEmpty() : (emptyPrimary ? "empty" : "stale") + " primary is not force-initialized in same allocation round where shards are started";
        if (indexMetadataBuilder == null) {
            indexMetadataBuilder = IndexMetadata.builder(oldIndexMetadata);
        }
        if (emptyPrimary) {
            // forcing an empty primary resets the in-sync allocations to the empty set (ShardRouting.allocatedPostIndexCreate)
            indexMetadataBuilder.putInSyncAllocationIds(shardId.id(), Collections.emptySet());
        } else {
            final String allocationId;
            if (recoverySource == RecoverySource.ExistingStoreRecoverySource.FORCE_STALE_PRIMARY_INSTANCE) {
                allocationId = RecoverySource.ExistingStoreRecoverySource.FORCED_ALLOCATION_ID;
            } else {
                assert recoverySource instanceof RecoverySource.SnapshotRecoverySource : recoverySource;
                allocationId = updates.initializedPrimary.allocationId().getId();
            }
            // forcing a stale primary resets the in-sync allocations to the singleton set with the stale id
            indexMetadataBuilder.putInSyncAllocationIds(shardId.id(), Collections.singleton(allocationId));
        }
    } else {
        // standard path for updating in-sync ids
        Set<String> inSyncAllocationIds = new HashSet<>(oldInSyncAllocationIds);
        inSyncAllocationIds.addAll(updates.addedAllocationIds);
        inSyncAllocationIds.removeAll(updates.removedAllocationIds);
        assert oldInSyncAllocationIds.contains(RecoverySource.ExistingStoreRecoverySource.FORCED_ALLOCATION_ID) == false || inSyncAllocationIds.contains(RecoverySource.ExistingStoreRecoverySource.FORCED_ALLOCATION_ID) == false : "fake allocation id has to be removed, inSyncAllocationIds:" + inSyncAllocationIds;
        // Prevent set of inSyncAllocationIds to grow unboundedly. This can happen for example if we don't write to a primary
        // but repeatedly shut down nodes that have active replicas.
        // We use number_of_replicas + 1 (= possible active shard copies) to bound the inSyncAllocationIds set
        // Only trim the set of allocation ids when it grows, otherwise we might trim too eagerly when the number
        // of replicas was decreased while shards were unassigned.
        // +1 for the primary
        int maxActiveShards = oldIndexMetadata.getNumberOfReplicas() + 1;
        IndexShardRoutingTable newShardRoutingTable = newRoutingTable.shardRoutingTable(shardId);
        assert newShardRoutingTable.assignedShards().stream().filter(ShardRouting::isRelocationTarget).map(s -> s.allocationId().getId()).noneMatch(inSyncAllocationIds::contains) : newShardRoutingTable.assignedShards() + " vs " + inSyncAllocationIds;
        if (inSyncAllocationIds.size() > oldInSyncAllocationIds.size() && inSyncAllocationIds.size() > maxActiveShards) {
            // trim entries that have no corresponding shard routing in the cluster state (i.e. trim unavailable copies)
            List<ShardRouting> assignedShards = newShardRoutingTable.assignedShards().stream().filter(s -> s.isRelocationTarget() == false).collect(Collectors.toList());
            assert assignedShards.size() <= maxActiveShards : "cannot have more assigned shards " + assignedShards + " than maximum possible active shards " + maxActiveShards;
            Set<String> assignedAllocations = assignedShards.stream().map(s -> s.allocationId().getId()).collect(Collectors.toSet());
            inSyncAllocationIds = inSyncAllocationIds.stream().sorted(// values with routing entries first
            Comparator.comparing(assignedAllocations::contains).reversed()).limit(maxActiveShards).collect(Collectors.toSet());
        }
        // in-sync set, this could create an empty primary on the next allocation.
        if (newShardRoutingTable.activeShards().isEmpty() && updates.firstFailedPrimary != null) {
            // add back allocation id of failed primary
            inSyncAllocationIds.add(updates.firstFailedPrimary.allocationId().getId());
        }
        assert inSyncAllocationIds.isEmpty() == false || oldInSyncAllocationIds.isEmpty() : "in-sync allocations cannot become empty after they have been non-empty: " + oldInSyncAllocationIds;
        // be extra safe here and only update in-sync set if it is non-empty
        if (inSyncAllocationIds.isEmpty() == false) {
            if (indexMetadataBuilder == null) {
                indexMetadataBuilder = IndexMetadata.builder(oldIndexMetadata);
            }
            indexMetadataBuilder.putInSyncAllocationIds(shardId.id(), inSyncAllocationIds);
        }
    }
    return indexMetadataBuilder;
}
Also used : IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) RoutingChangesObserver(org.opensearch.cluster.routing.RoutingChangesObserver) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Index(org.opensearch.index.Index) Set(java.util.Set) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) RecoverySource(org.opensearch.cluster.routing.RecoverySource) ShardRouting(org.opensearch.cluster.routing.ShardRouting) ShardId(org.opensearch.index.shard.ShardId) HashSet(java.util.HashSet) Objects(java.util.Objects) ClusterState(org.opensearch.cluster.ClusterState) Sets(org.opensearch.common.util.set.Sets) List(java.util.List) Logger(org.apache.logging.log4j.Logger) Map(java.util.Map) RoutingTable(org.opensearch.cluster.routing.RoutingTable) Comparator(java.util.Comparator) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) Collections(java.util.Collections) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) RecoverySource(org.opensearch.cluster.routing.RecoverySource) ShardRouting(org.opensearch.cluster.routing.ShardRouting) HashSet(java.util.HashSet)

Example 13 with IndexShardRoutingTable

use of org.opensearch.cluster.routing.IndexShardRoutingTable in project OpenSearch by opensearch-project.

the class ClusterStateDiffIT method randomChangeToIndexRoutingTable.

/**
 * Randomly updates index routing table in the cluster state
 */
private IndexRoutingTable randomChangeToIndexRoutingTable(IndexRoutingTable original, String[] nodes) {
    IndexRoutingTable.Builder builder = IndexRoutingTable.builder(original.getIndex());
    for (ObjectCursor<IndexShardRoutingTable> indexShardRoutingTable : original.shards().values()) {
        Set<String> availableNodes = Sets.newHashSet(nodes);
        for (ShardRouting shardRouting : indexShardRoutingTable.value.shards()) {
            availableNodes.remove(shardRouting.currentNodeId());
            if (shardRouting.relocating()) {
                availableNodes.remove(shardRouting.relocatingNodeId());
            }
        }
        for (ShardRouting shardRouting : indexShardRoutingTable.value.shards()) {
            final ShardRouting updatedShardRouting = randomChange(shardRouting, availableNodes);
            availableNodes.remove(updatedShardRouting.currentNodeId());
            if (shardRouting.relocating()) {
                availableNodes.remove(updatedShardRouting.relocatingNodeId());
            }
            builder.addShard(updatedShardRouting);
        }
    }
    return builder.build();
}
Also used : IndexRoutingTable(org.opensearch.cluster.routing.IndexRoutingTable) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) ShardRouting(org.opensearch.cluster.routing.ShardRouting) TestShardRouting(org.opensearch.cluster.routing.TestShardRouting)

Example 14 with IndexShardRoutingTable

use of org.opensearch.cluster.routing.IndexShardRoutingTable in project OpenSearch by opensearch-project.

the class ReplicaToPrimaryPromotionIT method testPromoteReplicaToPrimary.

public void testPromoteReplicaToPrimary() throws Exception {
    final String indexName = randomAlphaOfLength(5).toLowerCase(Locale.ROOT);
    createIndex(indexName);
    final int numOfDocs = scaledRandomIntBetween(0, 200);
    if (numOfDocs > 0) {
        try (BackgroundIndexer indexer = new BackgroundIndexer(indexName, "_doc", client(), numOfDocs)) {
            waitForDocs(numOfDocs, indexer);
        }
        refresh(indexName);
    }
    assertHitCount(client().prepareSearch(indexName).setSize(0).get(), numOfDocs);
    ensureGreen(indexName);
    // sometimes test with a closed index
    final IndexMetadata.State indexState = randomFrom(IndexMetadata.State.OPEN, IndexMetadata.State.CLOSE);
    if (indexState == IndexMetadata.State.CLOSE) {
        CloseIndexResponse closeIndexResponse = client().admin().indices().prepareClose(indexName).get();
        assertThat("close index not acked - " + closeIndexResponse, closeIndexResponse.isAcknowledged(), equalTo(true));
        ensureGreen(indexName);
    }
    // pick up a data node that contains a random primary shard
    ClusterState state = client(internalCluster().getMasterName()).admin().cluster().prepareState().get().getState();
    final int numShards = state.metadata().index(indexName).getNumberOfShards();
    final ShardRouting primaryShard = state.routingTable().index(indexName).shard(randomIntBetween(0, numShards - 1)).primaryShard();
    final DiscoveryNode randomNode = state.nodes().resolveNode(primaryShard.currentNodeId());
    // stop the random data node, all remaining shards are promoted to primaries
    internalCluster().stopRandomNode(InternalTestCluster.nameFilter(randomNode.getName()));
    ensureYellowAndNoInitializingShards(indexName);
    state = client(internalCluster().getMasterName()).admin().cluster().prepareState().get().getState();
    for (IndexShardRoutingTable shardRoutingTable : state.routingTable().index(indexName)) {
        for (ShardRouting shardRouting : shardRoutingTable.activeShards()) {
            assertThat(shardRouting + " should be promoted as a primary", shardRouting.primary(), is(true));
        }
    }
    if (indexState == IndexMetadata.State.CLOSE) {
        assertAcked(client().admin().indices().prepareOpen(indexName));
        ensureYellowAndNoInitializingShards(indexName);
    }
    assertHitCount(client().prepareSearch(indexName).setSize(0).get(), numOfDocs);
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) BackgroundIndexer(org.opensearch.test.BackgroundIndexer) CloseIndexResponse(org.opensearch.action.admin.indices.close.CloseIndexResponse) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) ShardRouting(org.opensearch.cluster.routing.ShardRouting)

Example 15 with IndexShardRoutingTable

use of org.opensearch.cluster.routing.IndexShardRoutingTable in project OpenSearch by opensearch-project.

the class IndexShardTestCase method startReplicaAfterRecovery.

protected void startReplicaAfterRecovery(IndexShard replica, IndexShard primary, Set<String> inSyncIds, IndexShardRoutingTable routingTable) throws IOException {
    ShardRouting initializingReplicaRouting = replica.routingEntry();
    IndexShardRoutingTable newRoutingTable = initializingReplicaRouting.isRelocationTarget() ? new IndexShardRoutingTable.Builder(routingTable).removeShard(primary.routingEntry()).addShard(replica.routingEntry()).build() : new IndexShardRoutingTable.Builder(routingTable).removeShard(initializingReplicaRouting).addShard(replica.routingEntry()).build();
    Set<String> inSyncIdsWithReplica = new HashSet<>(inSyncIds);
    inSyncIdsWithReplica.add(replica.routingEntry().allocationId().getId());
    // update both primary and replica shard state
    primary.updateShardState(primary.routingEntry(), primary.getPendingPrimaryTerm(), null, currentClusterStateVersion.incrementAndGet(), inSyncIdsWithReplica, newRoutingTable);
    replica.updateShardState(replica.routingEntry().moveToStarted(), replica.getPendingPrimaryTerm(), null, currentClusterStateVersion.get(), inSyncIdsWithReplica, newRoutingTable);
}
Also used : IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) TestShardRouting.newShardRouting(org.opensearch.cluster.routing.TestShardRouting.newShardRouting) ShardRouting(org.opensearch.cluster.routing.ShardRouting) TestShardRouting(org.opensearch.cluster.routing.TestShardRouting) HashSet(java.util.HashSet)

Aggregations

IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)84 ShardRouting (org.opensearch.cluster.routing.ShardRouting)60 ClusterState (org.opensearch.cluster.ClusterState)50 ShardId (org.opensearch.index.shard.ShardId)36 IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)32 IndexRoutingTable (org.opensearch.cluster.routing.IndexRoutingTable)27 Settings (org.opensearch.common.settings.Settings)21 HashSet (java.util.HashSet)20 PlainActionFuture (org.opensearch.action.support.PlainActionFuture)18 RoutingTable (org.opensearch.cluster.routing.RoutingTable)18 ArrayList (java.util.ArrayList)16 List (java.util.List)16 Matchers.equalTo (org.hamcrest.Matchers.equalTo)16 ShardRoutingState (org.opensearch.cluster.routing.ShardRoutingState)16 Collections (java.util.Collections)15 Collectors (java.util.stream.Collectors)14 ActionListener (org.opensearch.action.ActionListener)14 IndexShard (org.opensearch.index.shard.IndexShard)14 Set (java.util.Set)13 IOException (java.io.IOException)12