use of org.opensearch.repositories.ShardGenerations in project OpenSearch by opensearch-project.
the class SnapshotsService method shards.
/**
* Calculates the assignment of shards to data nodes for a new snapshot based on the given cluster state and the
* indices that should be included in the snapshot.
*
* @param indices Indices to snapshot
* @param useShardGenerations whether to write {@link ShardGenerations} during the snapshot
* @return list of shard to be included into current snapshot
*/
private static ImmutableOpenMap<ShardId, SnapshotsInProgress.ShardSnapshotStatus> shards(SnapshotsInProgress snapshotsInProgress, @Nullable SnapshotDeletionsInProgress deletionsInProgress, Metadata metadata, RoutingTable routingTable, List<IndexId> indices, boolean useShardGenerations, RepositoryData repositoryData, String repoName) {
ImmutableOpenMap.Builder<ShardId, SnapshotsInProgress.ShardSnapshotStatus> builder = ImmutableOpenMap.builder();
final ShardGenerations shardGenerations = repositoryData.shardGenerations();
final InFlightShardSnapshotStates inFlightShardStates = InFlightShardSnapshotStates.forRepo(repoName, snapshotsInProgress.entries());
final boolean readyToExecute = deletionsInProgress == null || deletionsInProgress.getEntries().stream().noneMatch(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.STARTED);
for (IndexId index : indices) {
final String indexName = index.getName();
final boolean isNewIndex = repositoryData.getIndices().containsKey(indexName) == false;
IndexMetadata indexMetadata = metadata.index(indexName);
if (indexMetadata == null) {
// The index was deleted before we managed to start the snapshot - mark it as missing.
builder.put(new ShardId(indexName, IndexMetadata.INDEX_UUID_NA_VALUE, 0), ShardSnapshotStatus.MISSING);
} else {
final IndexRoutingTable indexRoutingTable = routingTable.index(indexName);
for (int i = 0; i < indexMetadata.getNumberOfShards(); i++) {
final ShardId shardId = indexRoutingTable.shard(i).shardId();
final String shardRepoGeneration;
if (useShardGenerations) {
final String inFlightGeneration = inFlightShardStates.generationForShard(index, shardId.id(), shardGenerations);
if (inFlightGeneration == null && isNewIndex) {
assert shardGenerations.getShardGen(index, shardId.getId()) == null : "Found shard generation for new index [" + index + "]";
shardRepoGeneration = ShardGenerations.NEW_SHARD_GEN;
} else {
shardRepoGeneration = inFlightGeneration;
}
} else {
shardRepoGeneration = null;
}
final ShardSnapshotStatus shardSnapshotStatus;
if (indexRoutingTable == null) {
shardSnapshotStatus = new SnapshotsInProgress.ShardSnapshotStatus(null, ShardState.MISSING, "missing routing table", shardRepoGeneration);
} else {
ShardRouting primary = indexRoutingTable.shard(i).primaryShard();
if (readyToExecute == false || inFlightShardStates.isActive(indexName, i)) {
shardSnapshotStatus = ShardSnapshotStatus.UNASSIGNED_QUEUED;
} else if (primary == null || !primary.assignedToNode()) {
shardSnapshotStatus = new ShardSnapshotStatus(null, ShardState.MISSING, "primary shard is not allocated", shardRepoGeneration);
} else if (primary.relocating() || primary.initializing()) {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), ShardState.WAITING, shardRepoGeneration);
} else if (!primary.started()) {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), ShardState.MISSING, "primary shard hasn't been started yet", shardRepoGeneration);
} else {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), shardRepoGeneration);
}
}
builder.put(shardId, shardSnapshotStatus);
}
}
}
return builder.build();
}
use of org.opensearch.repositories.ShardGenerations in project OpenSearch by opensearch-project.
the class SnapshotsService method buildGenerations.
private static ShardGenerations buildGenerations(SnapshotsInProgress.Entry snapshot, Metadata metadata) {
ShardGenerations.Builder builder = ShardGenerations.builder();
final Map<String, IndexId> indexLookup = new HashMap<>();
snapshot.indices().forEach(idx -> indexLookup.put(idx.getName(), idx));
if (snapshot.isClone()) {
snapshot.clones().forEach(c -> {
final IndexId indexId = indexLookup.get(c.key.indexName());
builder.put(indexId, c.key.shardId(), c.value.generation());
});
} else {
snapshot.shards().forEach(c -> {
if (metadata.index(c.key.getIndex()) == null) {
assert snapshot.partial() : "Index [" + c.key.getIndex() + "] was deleted during a snapshot but snapshot was not partial.";
return;
}
final IndexId indexId = indexLookup.get(c.key.getIndexName());
if (indexId != null) {
builder.put(indexId, c.key.id(), c.value.generation());
}
});
}
return builder.build();
}
use of org.opensearch.repositories.ShardGenerations in project OpenSearch by opensearch-project.
the class SnapshotsService method startCloning.
/**
* Determine the number of shards in each index of a clone operation and update the cluster state accordingly.
*
* @param repository repository to run operation on
* @param cloneEntry clone operation in the cluster state
*/
private void startCloning(Repository repository, SnapshotsInProgress.Entry cloneEntry) {
final List<IndexId> indices = cloneEntry.indices();
final SnapshotId sourceSnapshot = cloneEntry.source();
final Snapshot targetSnapshot = cloneEntry.snapshot();
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
// Exception handler for IO exceptions with loading index and repo metadata
final Consumer<Exception> onFailure = e -> {
initializingClones.remove(targetSnapshot);
logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e);
removeFailedSnapshotFromClusterState(targetSnapshot, e, null, null);
};
// 1. step, load SnapshotInfo to make sure that source snapshot was successful for the indices we want to clone
// TODO: we could skip this step for snapshots with state SUCCESS
final StepListener<SnapshotInfo> snapshotInfoListener = new StepListener<>();
executor.execute(ActionRunnable.supply(snapshotInfoListener, () -> repository.getSnapshotInfo(sourceSnapshot)));
final StepListener<Collection<Tuple<IndexId, Integer>>> allShardCountsListener = new StepListener<>();
final GroupedActionListener<Tuple<IndexId, Integer>> shardCountListener = new GroupedActionListener<>(allShardCountsListener, indices.size());
snapshotInfoListener.whenComplete(snapshotInfo -> {
for (IndexId indexId : indices) {
if (RestoreService.failed(snapshotInfo, indexId.getName())) {
throw new SnapshotException(targetSnapshot, "Can't clone index [" + indexId + "] because its snapshot was not successful.");
}
}
// 2. step, load the number of shards we have in each index to be cloned from the index metadata.
repository.getRepositoryData(ActionListener.wrap(repositoryData -> {
for (IndexId index : indices) {
executor.execute(ActionRunnable.supply(shardCountListener, () -> {
final IndexMetadata metadata = repository.getSnapshotIndexMetaData(repositoryData, sourceSnapshot, index);
return Tuple.tuple(index, metadata.getNumberOfShards());
}));
}
}, onFailure));
}, onFailure);
// 3. step, we have all the shard counts, now update the cluster state to have clone jobs in the snap entry
allShardCountsListener.whenComplete(counts -> repository.executeConsistentStateUpdate(repoData -> new ClusterStateUpdateTask() {
private SnapshotsInProgress.Entry updatedEntry;
@Override
public ClusterState execute(ClusterState currentState) {
final SnapshotsInProgress snapshotsInProgress = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List<SnapshotsInProgress.Entry> updatedEntries = new ArrayList<>(snapshotsInProgress.entries());
boolean changed = false;
final String localNodeId = currentState.nodes().getLocalNodeId();
final String repoName = cloneEntry.repository();
final ShardGenerations shardGenerations = repoData.shardGenerations();
for (int i = 0; i < updatedEntries.size(); i++) {
if (cloneEntry.snapshot().equals(updatedEntries.get(i).snapshot())) {
final ImmutableOpenMap.Builder<RepositoryShardId, ShardSnapshotStatus> clonesBuilder = ImmutableOpenMap.builder();
final InFlightShardSnapshotStates inFlightShardStates = InFlightShardSnapshotStates.forRepo(repoName, snapshotsInProgress.entries());
for (Tuple<IndexId, Integer> count : counts) {
for (int shardId = 0; shardId < count.v2(); shardId++) {
final RepositoryShardId repoShardId = new RepositoryShardId(count.v1(), shardId);
final String indexName = repoShardId.indexName();
if (inFlightShardStates.isActive(indexName, shardId)) {
clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED);
} else {
clonesBuilder.put(repoShardId, new ShardSnapshotStatus(localNodeId, inFlightShardStates.generationForShard(repoShardId.index(), shardId, shardGenerations)));
}
}
}
updatedEntry = cloneEntry.withClones(clonesBuilder.build());
updatedEntries.set(i, updatedEntry);
changed = true;
break;
}
}
return updateWithSnapshots(currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null);
}
@Override
public void onFailure(String source, Exception e) {
initializingClones.remove(targetSnapshot);
logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e);
failAllListenersOnMasterFailOver(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
initializingClones.remove(targetSnapshot);
if (updatedEntry != null) {
final Snapshot target = updatedEntry.snapshot();
final SnapshotId sourceSnapshot = updatedEntry.source();
for (ObjectObjectCursor<RepositoryShardId, ShardSnapshotStatus> indexClone : updatedEntry.clones()) {
final ShardSnapshotStatus shardStatusBefore = indexClone.value;
if (shardStatusBefore.state() != ShardState.INIT) {
continue;
}
final RepositoryShardId repoShardId = indexClone.key;
runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository);
}
} else {
// Extremely unlikely corner case of master failing over between between starting the clone and
// starting shard clones.
logger.warn("Did not find expected entry [{}] in the cluster state", cloneEntry);
}
}
}, "start snapshot clone", onFailure), onFailure);
}
use of org.opensearch.repositories.ShardGenerations in project OpenSearch by opensearch-project.
the class SnapshotsService method finalizeSnapshotEntry.
private void finalizeSnapshotEntry(SnapshotsInProgress.Entry entry, Metadata metadata, RepositoryData repositoryData) {
assert currentlyFinalizing.contains(entry.repository());
try {
final String failure = entry.failure();
final Snapshot snapshot = entry.snapshot();
logger.trace("[{}] finalizing snapshot in repository, state: [{}], failure[{}]", snapshot, entry.state(), failure);
ArrayList<SnapshotShardFailure> shardFailures = new ArrayList<>();
for (ObjectObjectCursor<ShardId, ShardSnapshotStatus> shardStatus : entry.shards()) {
ShardId shardId = shardStatus.key;
ShardSnapshotStatus status = shardStatus.value;
final ShardState state = status.state();
if (state.failed()) {
shardFailures.add(new SnapshotShardFailure(status.nodeId(), shardId, status.reason()));
} else if (state.completed() == false) {
shardFailures.add(new SnapshotShardFailure(status.nodeId(), shardId, "skipped"));
} else {
assert state == ShardState.SUCCESS;
}
}
final ShardGenerations shardGenerations = buildGenerations(entry, metadata);
final String repository = snapshot.getRepository();
final SnapshotInfo snapshotInfo = new SnapshotInfo(snapshot.getSnapshotId(), shardGenerations.indices().stream().map(IndexId::getName).collect(Collectors.toList()), entry.dataStreams(), entry.startTime(), failure, threadPool.absoluteTimeInMillis(), entry.partial() ? shardGenerations.totalShards() : entry.shards().size(), shardFailures, entry.includeGlobalState(), entry.userMetadata());
final StepListener<Metadata> metadataListener = new StepListener<>();
final Repository repo = repositoriesService.repository(snapshot.getRepository());
if (entry.isClone()) {
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.supply(metadataListener, () -> {
final Metadata.Builder metaBuilder = Metadata.builder(repo.getSnapshotGlobalMetadata(entry.source()));
for (IndexId index : entry.indices()) {
metaBuilder.put(repo.getSnapshotIndexMetaData(repositoryData, entry.source(), index), false);
}
return metaBuilder.build();
}));
} else {
metadataListener.onResponse(metadata);
}
metadataListener.whenComplete(meta -> repo.finalizeSnapshot(shardGenerations, repositoryData.getGenId(), metadataForSnapshot(entry, meta), snapshotInfo, entry.version(), state -> stateWithoutSnapshot(state, snapshot), ActionListener.wrap(newRepoData -> {
completeListenersIgnoringException(endAndGetListenersToResolve(snapshot), Tuple.tuple(newRepoData, snapshotInfo));
logger.info("snapshot [{}] completed with state [{}]", snapshot, snapshotInfo.state());
runNextQueuedOperation(newRepoData, repository, true);
}, e -> handleFinalizationFailure(e, entry, repositoryData))), e -> handleFinalizationFailure(e, entry, repositoryData));
} catch (Exception e) {
assert false : new AssertionError(e);
handleFinalizationFailure(e, entry, repositoryData);
}
}
use of org.opensearch.repositories.ShardGenerations in project OpenSearch by opensearch-project.
the class BlobStoreRepository method finalizeSnapshot.
@Override
public void finalizeSnapshot(final ShardGenerations shardGenerations, final long repositoryStateId, final Metadata clusterMetadata, SnapshotInfo snapshotInfo, Version repositoryMetaVersion, Function<ClusterState, ClusterState> stateTransformer, final ActionListener<RepositoryData> listener) {
assert repositoryStateId > RepositoryData.UNKNOWN_REPO_GEN : "Must finalize based on a valid repository generation but received [" + repositoryStateId + "]";
final Collection<IndexId> indices = shardGenerations.indices();
final SnapshotId snapshotId = snapshotInfo.snapshotId();
// Once we are done writing the updated index-N blob we remove the now unreferenced index-${uuid} blobs in each shard
// directory if all nodes are at least at version SnapshotsService#SHARD_GEN_IN_REPO_DATA_VERSION
// If there are older version nodes in the cluster, we don't need to run this cleanup as it will have already happened
// when writing the index-${N} to each shard directory.
final boolean writeShardGens = SnapshotsService.useShardGenerations(repositoryMetaVersion);
final Consumer<Exception> onUpdateFailure = e -> listener.onFailure(new SnapshotException(metadata.name(), snapshotId, "failed to update snapshot in repository", e));
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
final boolean writeIndexGens = SnapshotsService.useIndexGenerations(repositoryMetaVersion);
final StepListener<RepositoryData> repoDataListener = new StepListener<>();
getRepositoryData(repoDataListener);
repoDataListener.whenComplete(existingRepositoryData -> {
final Map<IndexId, String> indexMetas;
final Map<String, String> indexMetaIdentifiers;
if (writeIndexGens) {
indexMetaIdentifiers = ConcurrentCollections.newConcurrentMap();
indexMetas = ConcurrentCollections.newConcurrentMap();
} else {
indexMetas = null;
indexMetaIdentifiers = null;
}
final ActionListener<Void> allMetaListener = new GroupedActionListener<>(ActionListener.wrap(v -> {
final RepositoryData updatedRepositoryData = existingRepositoryData.addSnapshot(snapshotId, snapshotInfo.state(), Version.CURRENT, shardGenerations, indexMetas, indexMetaIdentifiers);
writeIndexGen(updatedRepositoryData, repositoryStateId, repositoryMetaVersion, stateTransformer, ActionListener.wrap(newRepoData -> {
if (writeShardGens) {
cleanupOldShardGens(existingRepositoryData, updatedRepositoryData);
}
listener.onResponse(newRepoData);
}, onUpdateFailure));
}, onUpdateFailure), 2 + indices.size());
// We ignore all FileAlreadyExistsException when writing metadata since otherwise a master failover while in this method will
// mean that no snap-${uuid}.dat blob is ever written for this snapshot. This is safe because any updated version of the
// index or global metadata will be compatible with the segments written in this snapshot as well.
// Failing on an already existing index-${repoGeneration} below ensures that the index.latest blob is not updated in a way
// that decrements the generation it points at
// Write Global MetaData
executor.execute(ActionRunnable.run(allMetaListener, () -> GLOBAL_METADATA_FORMAT.write(clusterMetadata, blobContainer(), snapshotId.getUUID(), compress)));
// write the index metadata for each index in the snapshot
for (IndexId index : indices) {
executor.execute(ActionRunnable.run(allMetaListener, () -> {
final IndexMetadata indexMetaData = clusterMetadata.index(index.getName());
if (writeIndexGens) {
final String identifiers = IndexMetaDataGenerations.buildUniqueIdentifier(indexMetaData);
String metaUUID = existingRepositoryData.indexMetaDataGenerations().getIndexMetaBlobId(identifiers);
if (metaUUID == null) {
// We don't yet have this version of the metadata so we write it
metaUUID = UUIDs.base64UUID();
INDEX_METADATA_FORMAT.write(indexMetaData, indexContainer(index), metaUUID, compress);
indexMetaIdentifiers.put(identifiers, metaUUID);
}
indexMetas.put(index, identifiers);
} else {
INDEX_METADATA_FORMAT.write(clusterMetadata.index(index.getName()), indexContainer(index), snapshotId.getUUID(), compress);
}
}));
}
executor.execute(ActionRunnable.run(allMetaListener, () -> SNAPSHOT_FORMAT.write(snapshotInfo, blobContainer(), snapshotId.getUUID(), compress)));
}, onUpdateFailure);
}
Aggregations