Search in sources :

Example 51 with IndexMetadata

use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.

the class TransportBroadcastReplicationAction method shards.

/**
 * @return all shard ids the request should run on
 */
protected List<ShardId> shards(Request request, ClusterState clusterState) {
    List<ShardId> shardIds = new ArrayList<>();
    String[] concreteIndices = indexNameExpressionResolver.concreteIndexNames(clusterState, request);
    for (String index : concreteIndices) {
        IndexMetadata indexMetadata = clusterState.metadata().getIndices().get(index);
        if (indexMetadata != null) {
            for (IntObjectCursor<IndexShardRoutingTable> shardRouting : clusterState.getRoutingTable().indicesRouting().get(index).getShards()) {
                shardIds.add(shardRouting.value.shardId());
            }
        }
    }
    return shardIds;
}
Also used : ShardId(org.opensearch.index.shard.ShardId) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata)

Example 52 with IndexMetadata

use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.

the class IndexMetadataUpdater method removeStaleIdsWithoutRoutings.

/**
 * Removes allocation ids from the in-sync set for shard copies for which there is no routing entries in the routing table.
 * This method is called in AllocationService before any changes to the routing table are made.
 */
public static ClusterState removeStaleIdsWithoutRoutings(ClusterState clusterState, List<StaleShard> staleShards, Logger logger) {
    Metadata oldMetadata = clusterState.metadata();
    RoutingTable oldRoutingTable = clusterState.routingTable();
    Metadata.Builder metadataBuilder = null;
    // group staleShards entries by index
    for (Map.Entry<Index, List<StaleShard>> indexEntry : staleShards.stream().collect(Collectors.groupingBy(fs -> fs.getShardId().getIndex())).entrySet()) {
        final IndexMetadata oldIndexMetadata = oldMetadata.getIndexSafe(indexEntry.getKey());
        IndexMetadata.Builder indexMetadataBuilder = null;
        // group staleShards entries by shard id
        for (Map.Entry<ShardId, List<StaleShard>> shardEntry : indexEntry.getValue().stream().collect(Collectors.groupingBy(staleShard -> staleShard.getShardId())).entrySet()) {
            int shardNumber = shardEntry.getKey().getId();
            Set<String> oldInSyncAllocations = oldIndexMetadata.inSyncAllocationIds(shardNumber);
            Set<String> idsToRemove = shardEntry.getValue().stream().map(e -> e.getAllocationId()).collect(Collectors.toSet());
            assert idsToRemove.stream().allMatch(id -> oldRoutingTable.getByAllocationId(shardEntry.getKey(), id) == null) : "removing stale ids: " + idsToRemove + ", some of which have still a routing entry: " + oldRoutingTable;
            Set<String> remainingInSyncAllocations = Sets.difference(oldInSyncAllocations, idsToRemove);
            assert remainingInSyncAllocations.isEmpty() == false : "Set of in-sync ids cannot become empty for shard " + shardEntry.getKey() + " (before: " + oldInSyncAllocations + ", ids to remove: " + idsToRemove + ")";
            // (see ShardRouting#allocatedPostIndexCreate)
            if (remainingInSyncAllocations.isEmpty() == false) {
                if (indexMetadataBuilder == null) {
                    indexMetadataBuilder = IndexMetadata.builder(oldIndexMetadata);
                }
                indexMetadataBuilder.putInSyncAllocationIds(shardNumber, remainingInSyncAllocations);
            }
            logger.warn("{} marking unavailable shards as stale: {}", shardEntry.getKey(), idsToRemove);
        }
        if (indexMetadataBuilder != null) {
            if (metadataBuilder == null) {
                metadataBuilder = Metadata.builder(oldMetadata);
            }
            metadataBuilder.put(indexMetadataBuilder);
        }
    }
    if (metadataBuilder != null) {
        return ClusterState.builder(clusterState).metadata(metadataBuilder).build();
    } else {
        return clusterState;
    }
}
Also used : IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) RoutingChangesObserver(org.opensearch.cluster.routing.RoutingChangesObserver) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Index(org.opensearch.index.Index) Set(java.util.Set) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) RecoverySource(org.opensearch.cluster.routing.RecoverySource) ShardRouting(org.opensearch.cluster.routing.ShardRouting) ShardId(org.opensearch.index.shard.ShardId) HashSet(java.util.HashSet) Objects(java.util.Objects) ClusterState(org.opensearch.cluster.ClusterState) Sets(org.opensearch.common.util.set.Sets) List(java.util.List) Logger(org.apache.logging.log4j.Logger) Map(java.util.Map) RoutingTable(org.opensearch.cluster.routing.RoutingTable) Comparator(java.util.Comparator) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) Collections(java.util.Collections) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Index(org.opensearch.index.Index) ShardId(org.opensearch.index.shard.ShardId) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) RoutingTable(org.opensearch.cluster.routing.RoutingTable) List(java.util.List) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) HashMap(java.util.HashMap) Map(java.util.Map)

Example 53 with IndexMetadata

use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.

the class IndexMetadataUpdater method updateInSyncAllocations.

/**
 * Updates in-sync allocations with routing changes that were made to the routing table.
 */
private IndexMetadata.Builder updateInSyncAllocations(RoutingTable newRoutingTable, IndexMetadata oldIndexMetadata, IndexMetadata.Builder indexMetadataBuilder, ShardId shardId, Updates updates) {
    assert Sets.haveEmptyIntersection(updates.addedAllocationIds, updates.removedAllocationIds) : "allocation ids cannot be both added and removed in the same allocation round, added ids: " + updates.addedAllocationIds + ", removed ids: " + updates.removedAllocationIds;
    Set<String> oldInSyncAllocationIds = oldIndexMetadata.inSyncAllocationIds(shardId.id());
    // check if we have been force-initializing an empty primary or a stale primary
    if (updates.initializedPrimary != null && oldInSyncAllocationIds.isEmpty() == false && oldInSyncAllocationIds.contains(updates.initializedPrimary.allocationId().getId()) == false) {
        // we're not reusing an existing in-sync allocation id to initialize a primary, which means that we're either force-allocating
        // an empty or a stale primary (see AllocateEmptyPrimaryAllocationCommand or AllocateStalePrimaryAllocationCommand).
        RecoverySource recoverySource = updates.initializedPrimary.recoverySource();
        RecoverySource.Type recoverySourceType = recoverySource.getType();
        boolean emptyPrimary = recoverySourceType == RecoverySource.Type.EMPTY_STORE;
        assert updates.addedAllocationIds.isEmpty() : (emptyPrimary ? "empty" : "stale") + " primary is not force-initialized in same allocation round where shards are started";
        if (indexMetadataBuilder == null) {
            indexMetadataBuilder = IndexMetadata.builder(oldIndexMetadata);
        }
        if (emptyPrimary) {
            // forcing an empty primary resets the in-sync allocations to the empty set (ShardRouting.allocatedPostIndexCreate)
            indexMetadataBuilder.putInSyncAllocationIds(shardId.id(), Collections.emptySet());
        } else {
            final String allocationId;
            if (recoverySource == RecoverySource.ExistingStoreRecoverySource.FORCE_STALE_PRIMARY_INSTANCE) {
                allocationId = RecoverySource.ExistingStoreRecoverySource.FORCED_ALLOCATION_ID;
            } else {
                assert recoverySource instanceof RecoverySource.SnapshotRecoverySource : recoverySource;
                allocationId = updates.initializedPrimary.allocationId().getId();
            }
            // forcing a stale primary resets the in-sync allocations to the singleton set with the stale id
            indexMetadataBuilder.putInSyncAllocationIds(shardId.id(), Collections.singleton(allocationId));
        }
    } else {
        // standard path for updating in-sync ids
        Set<String> inSyncAllocationIds = new HashSet<>(oldInSyncAllocationIds);
        inSyncAllocationIds.addAll(updates.addedAllocationIds);
        inSyncAllocationIds.removeAll(updates.removedAllocationIds);
        assert oldInSyncAllocationIds.contains(RecoverySource.ExistingStoreRecoverySource.FORCED_ALLOCATION_ID) == false || inSyncAllocationIds.contains(RecoverySource.ExistingStoreRecoverySource.FORCED_ALLOCATION_ID) == false : "fake allocation id has to be removed, inSyncAllocationIds:" + inSyncAllocationIds;
        // Prevent set of inSyncAllocationIds to grow unboundedly. This can happen for example if we don't write to a primary
        // but repeatedly shut down nodes that have active replicas.
        // We use number_of_replicas + 1 (= possible active shard copies) to bound the inSyncAllocationIds set
        // Only trim the set of allocation ids when it grows, otherwise we might trim too eagerly when the number
        // of replicas was decreased while shards were unassigned.
        // +1 for the primary
        int maxActiveShards = oldIndexMetadata.getNumberOfReplicas() + 1;
        IndexShardRoutingTable newShardRoutingTable = newRoutingTable.shardRoutingTable(shardId);
        assert newShardRoutingTable.assignedShards().stream().filter(ShardRouting::isRelocationTarget).map(s -> s.allocationId().getId()).noneMatch(inSyncAllocationIds::contains) : newShardRoutingTable.assignedShards() + " vs " + inSyncAllocationIds;
        if (inSyncAllocationIds.size() > oldInSyncAllocationIds.size() && inSyncAllocationIds.size() > maxActiveShards) {
            // trim entries that have no corresponding shard routing in the cluster state (i.e. trim unavailable copies)
            List<ShardRouting> assignedShards = newShardRoutingTable.assignedShards().stream().filter(s -> s.isRelocationTarget() == false).collect(Collectors.toList());
            assert assignedShards.size() <= maxActiveShards : "cannot have more assigned shards " + assignedShards + " than maximum possible active shards " + maxActiveShards;
            Set<String> assignedAllocations = assignedShards.stream().map(s -> s.allocationId().getId()).collect(Collectors.toSet());
            inSyncAllocationIds = inSyncAllocationIds.stream().sorted(// values with routing entries first
            Comparator.comparing(assignedAllocations::contains).reversed()).limit(maxActiveShards).collect(Collectors.toSet());
        }
        // in-sync set, this could create an empty primary on the next allocation.
        if (newShardRoutingTable.activeShards().isEmpty() && updates.firstFailedPrimary != null) {
            // add back allocation id of failed primary
            inSyncAllocationIds.add(updates.firstFailedPrimary.allocationId().getId());
        }
        assert inSyncAllocationIds.isEmpty() == false || oldInSyncAllocationIds.isEmpty() : "in-sync allocations cannot become empty after they have been non-empty: " + oldInSyncAllocationIds;
        // be extra safe here and only update in-sync set if it is non-empty
        if (inSyncAllocationIds.isEmpty() == false) {
            if (indexMetadataBuilder == null) {
                indexMetadataBuilder = IndexMetadata.builder(oldIndexMetadata);
            }
            indexMetadataBuilder.putInSyncAllocationIds(shardId.id(), inSyncAllocationIds);
        }
    }
    return indexMetadataBuilder;
}
Also used : IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) RoutingChangesObserver(org.opensearch.cluster.routing.RoutingChangesObserver) Metadata(org.opensearch.cluster.metadata.Metadata) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata) Index(org.opensearch.index.Index) Set(java.util.Set) HashMap(java.util.HashMap) Collectors(java.util.stream.Collectors) RecoverySource(org.opensearch.cluster.routing.RecoverySource) ShardRouting(org.opensearch.cluster.routing.ShardRouting) ShardId(org.opensearch.index.shard.ShardId) HashSet(java.util.HashSet) Objects(java.util.Objects) ClusterState(org.opensearch.cluster.ClusterState) Sets(org.opensearch.common.util.set.Sets) List(java.util.List) Logger(org.apache.logging.log4j.Logger) Map(java.util.Map) RoutingTable(org.opensearch.cluster.routing.RoutingTable) Comparator(java.util.Comparator) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) Collections(java.util.Collections) IndexShardRoutingTable(org.opensearch.cluster.routing.IndexShardRoutingTable) RecoverySource(org.opensearch.cluster.routing.RecoverySource) ShardRouting(org.opensearch.cluster.routing.ShardRouting) HashSet(java.util.HashSet)

Example 54 with IndexMetadata

use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.

the class AllocationService method applyFailedShards.

/**
 * Applies the failed shards. Note, only assigned ShardRouting instances that exist in the routing table should be
 * provided as parameter. Also applies a list of allocation ids to remove from the in-sync set for shard copies for which there
 * are no routing entries in the routing table.
 *
 * <p>
 * If the same instance of ClusterState is returned, then no change has been made.</p>
 */
public ClusterState applyFailedShards(final ClusterState clusterState, final List<FailedShard> failedShards, final List<StaleShard> staleShards) {
    assert assertInitialized();
    if (staleShards.isEmpty() && failedShards.isEmpty()) {
        return clusterState;
    }
    ClusterState tmpState = IndexMetadataUpdater.removeStaleIdsWithoutRoutings(clusterState, staleShards, logger);
    RoutingNodes routingNodes = getMutableRoutingNodes(tmpState);
    // shuffle the unassigned nodes, just so we won't have things like poison failed shards
    routingNodes.unassigned().shuffle();
    long currentNanoTime = currentNanoTime();
    RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, tmpState, clusterInfoService.getClusterInfo(), snapshotsInfoService.snapshotShardSizes(), currentNanoTime);
    for (FailedShard failedShardEntry : failedShards) {
        ShardRouting shardToFail = failedShardEntry.getRoutingEntry();
        IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardToFail.shardId().getIndex());
        allocation.addIgnoreShardForNode(shardToFail.shardId(), shardToFail.currentNodeId());
        // failing a primary also fails initializing replica shards, re-resolve ShardRouting
        ShardRouting failedShard = routingNodes.getByAllocationId(shardToFail.shardId(), shardToFail.allocationId().getId());
        if (failedShard != null) {
            if (failedShard != shardToFail) {
                logger.trace("{} shard routing modified in an earlier iteration (previous: {}, current: {})", shardToFail.shardId(), shardToFail, failedShard);
            }
            int failedAllocations = failedShard.unassignedInfo() != null ? failedShard.unassignedInfo().getNumFailedAllocations() : 0;
            final Set<String> failedNodeIds;
            if (failedShard.unassignedInfo() != null) {
                failedNodeIds = new HashSet<>(failedShard.unassignedInfo().getFailedNodeIds().size() + 1);
                failedNodeIds.addAll(failedShard.unassignedInfo().getFailedNodeIds());
                failedNodeIds.add(failedShard.currentNodeId());
            } else {
                failedNodeIds = Collections.emptySet();
            }
            String message = "failed shard on node [" + shardToFail.currentNodeId() + "]: " + failedShardEntry.getMessage();
            UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, message, failedShardEntry.getFailure(), failedAllocations + 1, currentNanoTime, System.currentTimeMillis(), false, UnassignedInfo.AllocationStatus.NO_ATTEMPT, failedNodeIds);
            if (failedShardEntry.markAsStale()) {
                allocation.removeAllocationId(failedShard);
            }
            logger.warn(new ParameterizedMessage("failing shard [{}]", failedShardEntry), failedShardEntry.getFailure());
            routingNodes.failShard(logger, failedShard, unassignedInfo, indexMetadata, allocation.changes());
        } else {
            logger.trace("{} shard routing failed in an earlier iteration (routing: {})", shardToFail.shardId(), shardToFail);
        }
    }
    for (final ExistingShardsAllocator allocator : existingShardsAllocators.values()) {
        allocator.applyFailedShards(failedShards, allocation);
    }
    reroute(allocation);
    String failedShardsAsString = firstListElementsToCommaDelimitedString(failedShards, s -> s.getRoutingEntry().shardId().toString(), logger.isDebugEnabled());
    return buildResultAndLogHealthChange(clusterState, allocation, "shards failed [" + failedShardsAsString + "]");
}
Also used : ClusterState(org.opensearch.cluster.ClusterState) RoutingNodes(org.opensearch.cluster.routing.RoutingNodes) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) ShardRouting(org.opensearch.cluster.routing.ShardRouting) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata)

Example 55 with IndexMetadata

use of org.opensearch.cluster.metadata.IndexMetadata in project OpenSearch by opensearch-project.

the class CancelAllocationCommand method execute.

@Override
public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) {
    DiscoveryNode discoNode = allocation.nodes().resolveNode(node);
    ShardRouting shardRouting = null;
    RoutingNodes routingNodes = allocation.routingNodes();
    RoutingNode routingNode = routingNodes.node(discoNode.getId());
    IndexMetadata indexMetadata = null;
    if (routingNode != null) {
        indexMetadata = allocation.metadata().index(index());
        if (indexMetadata == null) {
            throw new IndexNotFoundException(index());
        }
        ShardId shardId = new ShardId(indexMetadata.getIndex(), shardId());
        shardRouting = routingNode.getByShardId(shardId);
    }
    if (shardRouting == null) {
        if (explain) {
            return new RerouteExplanation(this, allocation.decision(Decision.NO, "cancel_allocation_command", "can't cancel " + shardId + ", failed to find it on node " + discoNode));
        }
        throw new IllegalArgumentException("[cancel_allocation] can't cancel " + shardId + ", failed to find it on node " + discoNode);
    }
    if (shardRouting.primary() && allowPrimary == false) {
        if ((shardRouting.initializing() && shardRouting.relocatingNodeId() != null) == false) {
            // only allow cancelling initializing shard of primary relocation without allowPrimary flag
            if (explain) {
                return new RerouteExplanation(this, allocation.decision(Decision.NO, "cancel_allocation_command", "can't cancel " + shardId + " on node " + discoNode + ", shard is primary and " + shardRouting.state().name().toLowerCase(Locale.ROOT)));
            }
            throw new IllegalArgumentException("[cancel_allocation] can't cancel " + shardId + " on node " + discoNode + ", shard is primary and " + shardRouting.state().name().toLowerCase(Locale.ROOT));
        }
    }
    routingNodes.failShard(LogManager.getLogger(CancelAllocationCommand.class), shardRouting, new UnassignedInfo(UnassignedInfo.Reason.REROUTE_CANCELLED, null), indexMetadata, allocation.changes());
    // TODO: We don't have to remove a cancelled shard from in-sync set once we have a strict resync implementation.
    allocation.removeAllocationId(shardRouting);
    return new RerouteExplanation(this, allocation.decision(Decision.YES, "cancel_allocation_command", "shard " + shardId + " on node " + discoNode + " can be cancelled"));
}
Also used : ShardId(org.opensearch.index.shard.ShardId) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) RoutingNode(org.opensearch.cluster.routing.RoutingNode) RoutingNodes(org.opensearch.cluster.routing.RoutingNodes) UnassignedInfo(org.opensearch.cluster.routing.UnassignedInfo) IndexNotFoundException(org.opensearch.index.IndexNotFoundException) RerouteExplanation(org.opensearch.cluster.routing.allocation.RerouteExplanation) ShardRouting(org.opensearch.cluster.routing.ShardRouting) IndexMetadata(org.opensearch.cluster.metadata.IndexMetadata)

Aggregations

IndexMetadata (org.opensearch.cluster.metadata.IndexMetadata)403 ClusterState (org.opensearch.cluster.ClusterState)144 Metadata (org.opensearch.cluster.metadata.Metadata)126 Settings (org.opensearch.common.settings.Settings)125 IndexSettings (org.opensearch.index.IndexSettings)87 Index (org.opensearch.index.Index)80 ShardRouting (org.opensearch.cluster.routing.ShardRouting)65 ShardId (org.opensearch.index.shard.ShardId)61 DiscoveryNode (org.opensearch.cluster.node.DiscoveryNode)53 IOException (java.io.IOException)49 ArrayList (java.util.ArrayList)49 HashMap (java.util.HashMap)49 HashSet (java.util.HashSet)48 Matchers.containsString (org.hamcrest.Matchers.containsString)47 IndexShardRoutingTable (org.opensearch.cluster.routing.IndexShardRoutingTable)46 RoutingTable (org.opensearch.cluster.routing.RoutingTable)46 Map (java.util.Map)42 ClusterService (org.opensearch.cluster.service.ClusterService)40 List (java.util.List)38 ActionListener (org.opensearch.action.ActionListener)35