use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class BlobStoreRepository method finalizeSnapshot.
@Override
public void finalizeSnapshot(final ShardGenerations shardGenerations, final long repositoryStateId, final Metadata clusterMetadata, SnapshotInfo snapshotInfo, Version repositoryMetaVersion, Function<ClusterState, ClusterState> stateTransformer, final ActionListener<RepositoryData> listener) {
assert repositoryStateId > RepositoryData.UNKNOWN_REPO_GEN : "Must finalize based on a valid repository generation but received [" + repositoryStateId + "]";
final Collection<IndexId> indices = shardGenerations.indices();
final SnapshotId snapshotId = snapshotInfo.snapshotId();
// Once we are done writing the updated index-N blob we remove the now unreferenced index-${uuid} blobs in each shard
// directory if all nodes are at least at version SnapshotsService#SHARD_GEN_IN_REPO_DATA_VERSION
// If there are older version nodes in the cluster, we don't need to run this cleanup as it will have already happened
// when writing the index-${N} to each shard directory.
final boolean writeShardGens = SnapshotsService.useShardGenerations(repositoryMetaVersion);
final Consumer<Exception> onUpdateFailure = e -> listener.onFailure(new SnapshotException(metadata.name(), snapshotId, "failed to update snapshot in repository", e));
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
final boolean writeIndexGens = SnapshotsService.useIndexGenerations(repositoryMetaVersion);
final StepListener<RepositoryData> repoDataListener = new StepListener<>();
getRepositoryData(repoDataListener);
repoDataListener.whenComplete(existingRepositoryData -> {
final Map<IndexId, String> indexMetas;
final Map<String, String> indexMetaIdentifiers;
if (writeIndexGens) {
indexMetaIdentifiers = ConcurrentCollections.newConcurrentMap();
indexMetas = ConcurrentCollections.newConcurrentMap();
} else {
indexMetas = null;
indexMetaIdentifiers = null;
}
final ActionListener<Void> allMetaListener = new GroupedActionListener<>(ActionListener.wrap(v -> {
final RepositoryData updatedRepositoryData = existingRepositoryData.addSnapshot(snapshotId, snapshotInfo.state(), Version.CURRENT, shardGenerations, indexMetas, indexMetaIdentifiers);
writeIndexGen(updatedRepositoryData, repositoryStateId, repositoryMetaVersion, stateTransformer, ActionListener.wrap(newRepoData -> {
if (writeShardGens) {
cleanupOldShardGens(existingRepositoryData, updatedRepositoryData);
}
listener.onResponse(newRepoData);
}, onUpdateFailure));
}, onUpdateFailure), 2 + indices.size());
// We ignore all FileAlreadyExistsException when writing metadata since otherwise a master failover while in this method will
// mean that no snap-${uuid}.dat blob is ever written for this snapshot. This is safe because any updated version of the
// index or global metadata will be compatible with the segments written in this snapshot as well.
// Failing on an already existing index-${repoGeneration} below ensures that the index.latest blob is not updated in a way
// that decrements the generation it points at
// Write Global MetaData
executor.execute(ActionRunnable.run(allMetaListener, () -> GLOBAL_METADATA_FORMAT.write(clusterMetadata, blobContainer(), snapshotId.getUUID(), compress)));
// write the index metadata for each index in the snapshot
for (IndexId index : indices) {
executor.execute(ActionRunnable.run(allMetaListener, () -> {
final IndexMetadata indexMetaData = clusterMetadata.index(index.getName());
if (writeIndexGens) {
final String identifiers = IndexMetaDataGenerations.buildUniqueIdentifier(indexMetaData);
String metaUUID = existingRepositoryData.indexMetaDataGenerations().getIndexMetaBlobId(identifiers);
if (metaUUID == null) {
// We don't yet have this version of the metadata so we write it
metaUUID = UUIDs.base64UUID();
INDEX_METADATA_FORMAT.write(indexMetaData, indexContainer(index), metaUUID, compress);
indexMetaIdentifiers.put(identifiers, metaUUID);
}
indexMetas.put(index, identifiers);
} else {
INDEX_METADATA_FORMAT.write(clusterMetadata.index(index.getName()), indexContainer(index), snapshotId.getUUID(), compress);
}
}));
}
executor.execute(ActionRunnable.run(allMetaListener, () -> SNAPSHOT_FORMAT.write(snapshotInfo, blobContainer(), snapshotId.getUUID(), compress)));
}, onUpdateFailure);
}
use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class RestoreService method restoreSnapshot.
/**
* Restores snapshot specified in the restore request.
*
* @param request restore request
* @param listener restore listener
*/
public void restoreSnapshot(final RestoreSnapshotRequest request, final ActionListener<RestoreCompletionResponse> listener) {
try {
// Read snapshot info and metadata from the repository
final String repositoryName = request.repository();
Repository repository = repositoriesService.repository(repositoryName);
final StepListener<RepositoryData> repositoryDataListener = new StepListener<>();
repository.getRepositoryData(repositoryDataListener);
repositoryDataListener.whenComplete(repositoryData -> {
final String snapshotName = request.snapshot();
final Optional<SnapshotId> matchingSnapshotId = repositoryData.getSnapshotIds().stream().filter(s -> snapshotName.equals(s.getName())).findFirst();
if (matchingSnapshotId.isPresent() == false) {
throw new SnapshotRestoreException(repositoryName, snapshotName, "snapshot does not exist");
}
final SnapshotId snapshotId = matchingSnapshotId.get();
if (request.snapshotUuid() != null && request.snapshotUuid().equals(snapshotId.getUUID()) == false) {
throw new SnapshotRestoreException(repositoryName, snapshotName, "snapshot UUID mismatch: expected [" + request.snapshotUuid() + "] but got [" + snapshotId.getUUID() + "]");
}
final SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotId);
final Snapshot snapshot = new Snapshot(repositoryName, snapshotId);
// Make sure that we can restore from this snapshot
validateSnapshotRestorable(repositoryName, snapshotInfo);
Metadata globalMetadata = null;
// Resolve the indices from the snapshot that need to be restored
Map<String, DataStream> dataStreams;
List<String> requestIndices = new ArrayList<>(Arrays.asList(request.indices()));
List<String> requestedDataStreams = filterIndices(snapshotInfo.dataStreams(), requestIndices.toArray(new String[0]), IndicesOptions.fromOptions(true, true, true, true));
if (requestedDataStreams.isEmpty()) {
dataStreams = new HashMap<>();
} else {
globalMetadata = repository.getSnapshotGlobalMetadata(snapshotId);
final Map<String, DataStream> dataStreamsInSnapshot = globalMetadata.dataStreams();
dataStreams = new HashMap<>(requestedDataStreams.size());
for (String requestedDataStream : requestedDataStreams) {
final DataStream dataStreamInSnapshot = dataStreamsInSnapshot.get(requestedDataStream);
assert dataStreamInSnapshot != null : "DataStream [" + requestedDataStream + "] not found in snapshot";
dataStreams.put(requestedDataStream, dataStreamInSnapshot);
}
}
requestIndices.removeAll(dataStreams.keySet());
Set<String> dataStreamIndices = dataStreams.values().stream().flatMap(ds -> ds.getIndices().stream()).map(Index::getName).collect(Collectors.toSet());
requestIndices.addAll(dataStreamIndices);
final List<String> indicesInSnapshot = filterIndices(snapshotInfo.indices(), requestIndices.toArray(new String[0]), request.indicesOptions());
final Metadata.Builder metadataBuilder;
if (request.includeGlobalState()) {
if (globalMetadata == null) {
globalMetadata = repository.getSnapshotGlobalMetadata(snapshotId);
}
metadataBuilder = Metadata.builder(globalMetadata);
} else {
metadataBuilder = Metadata.builder();
}
final List<IndexId> indexIdsInSnapshot = repositoryData.resolveIndices(indicesInSnapshot);
for (IndexId indexId : indexIdsInSnapshot) {
metadataBuilder.put(repository.getSnapshotIndexMetaData(repositoryData, snapshotId, indexId), false);
}
final Metadata metadata = metadataBuilder.build();
// Apply renaming on index names, returning a map of names where
// the key is the renamed index and the value is the original name
final Map<String, String> indices = renamedIndices(request, indicesInSnapshot, dataStreamIndices);
// Now we can start the actual restore process by adding shards to be recovered in the cluster state
// and updating cluster metadata (global and index) as needed
clusterService.submitStateUpdateTask("restore_snapshot[" + snapshotName + ']', new ClusterStateUpdateTask() {
final String restoreUUID = UUIDs.randomBase64UUID();
RestoreInfo restoreInfo = null;
@Override
public ClusterState execute(ClusterState currentState) {
RestoreInProgress restoreInProgress = currentState.custom(RestoreInProgress.TYPE, RestoreInProgress.EMPTY);
if (currentState.getNodes().getMinNodeVersion().before(LegacyESVersion.V_7_0_0)) {
// same time in versions prior to 7.0
if (restoreInProgress.isEmpty() == false) {
throw new ConcurrentSnapshotExecutionException(snapshot, "Restore process is already running in this cluster");
}
}
// Check if the snapshot to restore is currently being deleted
SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY);
if (deletionsInProgress.getEntries().stream().anyMatch(entry -> entry.getSnapshots().contains(snapshotId))) {
throw new ConcurrentSnapshotExecutionException(snapshot, "cannot restore a snapshot while a snapshot deletion is in-progress [" + deletionsInProgress.getEntries().get(0) + "]");
}
// Updating cluster state
ClusterState.Builder builder = ClusterState.builder(currentState);
Metadata.Builder mdBuilder = Metadata.builder(currentState.metadata());
ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks());
RoutingTable.Builder rtBuilder = RoutingTable.builder(currentState.routingTable());
ImmutableOpenMap<ShardId, RestoreInProgress.ShardRestoreStatus> shards;
Set<String> aliases = new HashSet<>();
if (indices.isEmpty() == false) {
// We have some indices to restore
ImmutableOpenMap.Builder<ShardId, RestoreInProgress.ShardRestoreStatus> shardsBuilder = ImmutableOpenMap.builder();
final Version minIndexCompatibilityVersion = currentState.getNodes().getMaxNodeVersion().minimumIndexCompatibilityVersion();
for (Map.Entry<String, String> indexEntry : indices.entrySet()) {
String index = indexEntry.getValue();
boolean partial = checkPartial(index);
SnapshotRecoverySource recoverySource = new SnapshotRecoverySource(restoreUUID, snapshot, snapshotInfo.version(), repositoryData.resolveIndexId(index));
String renamedIndexName = indexEntry.getKey();
IndexMetadata snapshotIndexMetadata = metadata.index(index);
snapshotIndexMetadata = updateIndexSettings(snapshotIndexMetadata, request.indexSettings(), request.ignoreIndexSettings());
try {
snapshotIndexMetadata = metadataIndexUpgradeService.upgradeIndexMetadata(snapshotIndexMetadata, minIndexCompatibilityVersion);
} catch (Exception ex) {
throw new SnapshotRestoreException(snapshot, "cannot restore index [" + index + "] because it cannot be upgraded", ex);
}
// Check that the index is closed or doesn't exist
IndexMetadata currentIndexMetadata = currentState.metadata().index(renamedIndexName);
IntSet ignoreShards = new IntHashSet();
final Index renamedIndex;
if (currentIndexMetadata == null) {
// Index doesn't exist - create it and start recovery
// Make sure that the index we are about to create has a validate name
boolean isHidden = IndexMetadata.INDEX_HIDDEN_SETTING.get(snapshotIndexMetadata.getSettings());
createIndexService.validateIndexName(renamedIndexName, currentState);
createIndexService.validateDotIndex(renamedIndexName, isHidden);
createIndexService.validateIndexSettings(renamedIndexName, snapshotIndexMetadata.getSettings(), false);
IndexMetadata.Builder indexMdBuilder = IndexMetadata.builder(snapshotIndexMetadata).state(IndexMetadata.State.OPEN).index(renamedIndexName);
indexMdBuilder.settings(Settings.builder().put(snapshotIndexMetadata.getSettings()).put(IndexMetadata.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()));
shardLimitValidator.validateShardLimit(snapshotIndexMetadata.getSettings(), currentState);
if (!request.includeAliases() && !snapshotIndexMetadata.getAliases().isEmpty()) {
// Remove all aliases - they shouldn't be restored
indexMdBuilder.removeAllAliases();
} else {
for (ObjectCursor<String> alias : snapshotIndexMetadata.getAliases().keys()) {
aliases.add(alias.value);
}
}
IndexMetadata updatedIndexMetadata = indexMdBuilder.build();
if (partial) {
populateIgnoredShards(index, ignoreShards);
}
rtBuilder.addAsNewRestore(updatedIndexMetadata, recoverySource, ignoreShards);
blocks.addBlocks(updatedIndexMetadata);
mdBuilder.put(updatedIndexMetadata, true);
renamedIndex = updatedIndexMetadata.getIndex();
} else {
validateExistingIndex(currentIndexMetadata, snapshotIndexMetadata, renamedIndexName, partial);
// Index exists and it's closed - open it in metadata and start recovery
IndexMetadata.Builder indexMdBuilder = IndexMetadata.builder(snapshotIndexMetadata).state(IndexMetadata.State.OPEN);
indexMdBuilder.version(Math.max(snapshotIndexMetadata.getVersion(), 1 + currentIndexMetadata.getVersion()));
indexMdBuilder.mappingVersion(Math.max(snapshotIndexMetadata.getMappingVersion(), 1 + currentIndexMetadata.getMappingVersion()));
indexMdBuilder.settingsVersion(Math.max(snapshotIndexMetadata.getSettingsVersion(), 1 + currentIndexMetadata.getSettingsVersion()));
indexMdBuilder.aliasesVersion(Math.max(snapshotIndexMetadata.getAliasesVersion(), 1 + currentIndexMetadata.getAliasesVersion()));
for (int shard = 0; shard < snapshotIndexMetadata.getNumberOfShards(); shard++) {
indexMdBuilder.primaryTerm(shard, Math.max(snapshotIndexMetadata.primaryTerm(shard), currentIndexMetadata.primaryTerm(shard)));
}
if (!request.includeAliases()) {
// Remove all snapshot aliases
if (!snapshotIndexMetadata.getAliases().isEmpty()) {
indexMdBuilder.removeAllAliases();
}
// / Add existing aliases
for (ObjectCursor<AliasMetadata> alias : currentIndexMetadata.getAliases().values()) {
indexMdBuilder.putAlias(alias.value);
}
} else {
for (ObjectCursor<String> alias : snapshotIndexMetadata.getAliases().keys()) {
aliases.add(alias.value);
}
}
final Settings.Builder indexSettingsBuilder = Settings.builder().put(snapshotIndexMetadata.getSettings()).put(IndexMetadata.SETTING_INDEX_UUID, currentIndexMetadata.getIndexUUID());
// setting anyway
if (snapshotIndexMetadata.getCreationVersion().onOrAfter(LegacyESVersion.V_7_9_0) || currentState.nodes().getMinNodeVersion().onOrAfter(LegacyESVersion.V_7_9_0)) {
indexSettingsBuilder.put(SETTING_HISTORY_UUID, UUIDs.randomBase64UUID());
}
indexMdBuilder.settings(indexSettingsBuilder);
IndexMetadata updatedIndexMetadata = indexMdBuilder.index(renamedIndexName).build();
rtBuilder.addAsRestore(updatedIndexMetadata, recoverySource);
blocks.updateBlocks(updatedIndexMetadata);
mdBuilder.put(updatedIndexMetadata, true);
renamedIndex = updatedIndexMetadata.getIndex();
}
for (int shard = 0; shard < snapshotIndexMetadata.getNumberOfShards(); shard++) {
if (!ignoreShards.contains(shard)) {
shardsBuilder.put(new ShardId(renamedIndex, shard), new RestoreInProgress.ShardRestoreStatus(clusterService.state().nodes().getLocalNodeId()));
} else {
shardsBuilder.put(new ShardId(renamedIndex, shard), new RestoreInProgress.ShardRestoreStatus(clusterService.state().nodes().getLocalNodeId(), RestoreInProgress.State.FAILURE));
}
}
}
shards = shardsBuilder.build();
RestoreInProgress.Entry restoreEntry = new RestoreInProgress.Entry(restoreUUID, snapshot, overallState(RestoreInProgress.State.INIT, shards), Collections.unmodifiableList(new ArrayList<>(indices.keySet())), shards);
builder.putCustom(RestoreInProgress.TYPE, new RestoreInProgress.Builder(currentState.custom(RestoreInProgress.TYPE, RestoreInProgress.EMPTY)).add(restoreEntry).build());
} else {
shards = ImmutableOpenMap.of();
}
checkAliasNameConflicts(indices, aliases);
Map<String, DataStream> updatedDataStreams = new HashMap<>(currentState.metadata().dataStreams());
updatedDataStreams.putAll(dataStreams.values().stream().map(ds -> updateDataStream(ds, mdBuilder, request)).collect(Collectors.toMap(DataStream::getName, Function.identity())));
mdBuilder.dataStreams(updatedDataStreams);
// Restore global state if needed
if (request.includeGlobalState()) {
if (metadata.persistentSettings() != null) {
Settings settings = metadata.persistentSettings();
clusterSettings.validateUpdate(settings);
mdBuilder.persistentSettings(settings);
}
if (metadata.templates() != null) {
// TODO: Should all existing templates be deleted first?
for (ObjectCursor<IndexTemplateMetadata> cursor : metadata.templates().values()) {
mdBuilder.put(cursor.value);
}
}
if (metadata.customs() != null) {
for (ObjectObjectCursor<String, Metadata.Custom> cursor : metadata.customs()) {
if (RepositoriesMetadata.TYPE.equals(cursor.key) == false && DataStreamMetadata.TYPE.equals(cursor.key) == false) {
// Don't restore repositories while we are working with them
// TODO: Should we restore them at the end?
// Also, don't restore data streams here, we already added them to the metadata builder above
mdBuilder.putCustom(cursor.key, cursor.value);
}
}
}
}
if (completed(shards)) {
// We don't have any indices to restore - we are done
restoreInfo = new RestoreInfo(snapshotId.getName(), Collections.unmodifiableList(new ArrayList<>(indices.keySet())), shards.size(), shards.size() - failedShards(shards));
}
RoutingTable rt = rtBuilder.build();
ClusterState updatedState = builder.metadata(mdBuilder).blocks(blocks).routingTable(rt).build();
return allocationService.reroute(updatedState, "restored snapshot [" + snapshot + "]");
}
private void checkAliasNameConflicts(Map<String, String> renamedIndices, Set<String> aliases) {
for (Map.Entry<String, String> renamedIndex : renamedIndices.entrySet()) {
if (aliases.contains(renamedIndex.getKey())) {
throw new SnapshotRestoreException(snapshot, "cannot rename index [" + renamedIndex.getValue() + "] into [" + renamedIndex.getKey() + "] because of conflict with an alias with the same name");
}
}
}
private void populateIgnoredShards(String index, IntSet ignoreShards) {
for (SnapshotShardFailure failure : snapshotInfo.shardFailures()) {
if (index.equals(failure.index())) {
ignoreShards.add(failure.shardId());
}
}
}
private boolean checkPartial(String index) {
// Make sure that index was fully snapshotted
if (failed(snapshotInfo, index)) {
if (request.partial()) {
return true;
} else {
throw new SnapshotRestoreException(snapshot, "index [" + index + "] wasn't fully snapshotted - cannot " + "restore");
}
} else {
return false;
}
}
private void validateExistingIndex(IndexMetadata currentIndexMetadata, IndexMetadata snapshotIndexMetadata, String renamedIndex, boolean partial) {
// Index exist - checking that it's closed
if (currentIndexMetadata.getState() != IndexMetadata.State.CLOSE) {
// TODO: Enable restore for open indices
throw new SnapshotRestoreException(snapshot, "cannot restore index [" + renamedIndex + "] because an open index " + "with same name already exists in the cluster. Either close or delete the existing index or restore the " + "index under a different name by providing a rename pattern and replacement name");
}
// Index exist - checking if it's partial restore
if (partial) {
throw new SnapshotRestoreException(snapshot, "cannot restore partial index [" + renamedIndex + "] because such index already exists");
}
// Make sure that the number of shards is the same. That's the only thing that we cannot change
if (currentIndexMetadata.getNumberOfShards() != snapshotIndexMetadata.getNumberOfShards()) {
throw new SnapshotRestoreException(snapshot, "cannot restore index [" + renamedIndex + "] with [" + currentIndexMetadata.getNumberOfShards() + "] shards from a snapshot of index [" + snapshotIndexMetadata.getIndex().getName() + "] with [" + snapshotIndexMetadata.getNumberOfShards() + "] shards");
}
}
/**
* Optionally updates index settings in indexMetadata by removing settings listed in ignoreSettings and
* merging them with settings in changeSettings.
*/
private IndexMetadata updateIndexSettings(IndexMetadata indexMetadata, Settings changeSettings, String[] ignoreSettings) {
Settings normalizedChangeSettings = Settings.builder().put(changeSettings).normalizePrefix(IndexMetadata.INDEX_SETTING_PREFIX).build();
if (IndexSettings.INDEX_SOFT_DELETES_SETTING.get(indexMetadata.getSettings()) && IndexSettings.INDEX_SOFT_DELETES_SETTING.exists(changeSettings) && IndexSettings.INDEX_SOFT_DELETES_SETTING.get(changeSettings) == false) {
throw new SnapshotRestoreException(snapshot, "cannot disable setting [" + IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey() + "] on restore");
}
IndexMetadata.Builder builder = IndexMetadata.builder(indexMetadata);
Settings settings = indexMetadata.getSettings();
Set<String> keyFilters = new HashSet<>();
List<String> simpleMatchPatterns = new ArrayList<>();
for (String ignoredSetting : ignoreSettings) {
if (!Regex.isSimpleMatchPattern(ignoredSetting)) {
if (UNREMOVABLE_SETTINGS.contains(ignoredSetting)) {
throw new SnapshotRestoreException(snapshot, "cannot remove setting [" + ignoredSetting + "] on restore");
} else {
keyFilters.add(ignoredSetting);
}
} else {
simpleMatchPatterns.add(ignoredSetting);
}
}
Predicate<String> settingsFilter = k -> {
if (UNREMOVABLE_SETTINGS.contains(k) == false) {
for (String filterKey : keyFilters) {
if (k.equals(filterKey)) {
return false;
}
}
for (String pattern : simpleMatchPatterns) {
if (Regex.simpleMatch(pattern, k)) {
return false;
}
}
}
return true;
};
Settings.Builder settingsBuilder = Settings.builder().put(settings.filter(settingsFilter)).put(normalizedChangeSettings.filter(k -> {
if (UNMODIFIABLE_SETTINGS.contains(k)) {
throw new SnapshotRestoreException(snapshot, "cannot modify setting [" + k + "] on restore");
} else {
return true;
}
}));
settingsBuilder.remove(MetadataIndexStateService.VERIFIED_BEFORE_CLOSE_SETTING.getKey());
return builder.settings(settingsBuilder).build();
}
@Override
public void onFailure(String source, Exception e) {
logger.warn(() -> new ParameterizedMessage("[{}] failed to restore snapshot", snapshotId), e);
listener.onFailure(e);
}
@Override
public TimeValue timeout() {
return request.masterNodeTimeout();
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
listener.onResponse(new RestoreCompletionResponse(restoreUUID, snapshot, restoreInfo));
}
});
}, listener::onFailure);
} catch (Exception e) {
logger.warn(() -> new ParameterizedMessage("[{}] failed to restore snapshot", request.repository() + ":" + request.snapshot()), e);
listener.onFailure(e);
}
}
use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class RecoverySourceHandler method finalizeRecovery.
void finalizeRecovery(long targetLocalCheckpoint, long trimAboveSeqNo, ActionListener<Void> listener) throws IOException {
if (shard.state() == IndexShardState.CLOSED) {
throw new IndexShardClosedException(request.shardId());
}
cancellableThreads.checkForCancel();
StopWatch stopWatch = new StopWatch().start();
logger.trace("finalizing recovery");
/*
* Before marking the shard as in-sync we acquire an operation permit. We do this so that there is a barrier between marking a
* shard as in-sync and relocating a shard. If we acquire the permit then no relocation handoff can complete before we are done
* marking the shard as in-sync. If the relocation handoff holds all the permits then after the handoff completes and we acquire
* the permit then the state of the shard will be relocated and this recovery will fail.
*/
runUnderPrimaryPermit(() -> shard.markAllocationIdAsInSync(request.targetAllocationId(), targetLocalCheckpoint), shardId + " marking " + request.targetAllocationId() + " as in sync", shard, cancellableThreads, logger);
// this global checkpoint is persisted in finalizeRecovery
final long globalCheckpoint = shard.getLastKnownGlobalCheckpoint();
final StepListener<Void> finalizeListener = new StepListener<>();
cancellableThreads.checkForCancel();
recoveryTarget.finalizeRecovery(globalCheckpoint, trimAboveSeqNo, finalizeListener);
finalizeListener.whenComplete(r -> {
runUnderPrimaryPermit(() -> shard.updateGlobalCheckpointForShard(request.targetAllocationId(), globalCheckpoint), shardId + " updating " + request.targetAllocationId() + "'s global checkpoint", shard, cancellableThreads, logger);
if (request.isPrimaryRelocation()) {
logger.trace("performing relocation hand-off");
// TODO: make relocated async
// this acquires all IndexShard operation permits and will thus delay new recoveries until it is done
cancellableThreads.execute(() -> shard.relocated(request.targetAllocationId(), recoveryTarget::handoffPrimaryContext));
/*
* if the recovery process fails after disabling primary mode on the source shard, both relocation source and
* target are failed (see {@link IndexShard#updateRoutingEntry}).
*/
}
stopWatch.stop();
logger.trace("finalizing recovery took [{}]", stopWatch.totalTime());
listener.onResponse(null);
}, listener::onFailure);
}
use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class RecoverySourceHandler method phase1.
/**
* Perform phase1 of the recovery operations. Once this {@link IndexCommit}
* snapshot has been performed no commit operations (files being fsync'd)
* are effectively allowed on this index until all recovery phases are done
* <p>
* Phase1 examines the segment files on the target node and copies over the
* segments that are missing. Only segments that have the same size and
* checksum can be reused
*/
void phase1(IndexCommit snapshot, long startingSeqNo, IntSupplier translogOps, ActionListener<SendFileResult> listener) {
cancellableThreads.checkForCancel();
final Store store = shard.store();
try {
StopWatch stopWatch = new StopWatch().start();
final Store.MetadataSnapshot recoverySourceMetadata;
try {
recoverySourceMetadata = store.getMetadata(snapshot);
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
shard.failShard("recovery", ex);
throw ex;
}
for (String name : snapshot.getFileNames()) {
final StoreFileMetadata md = recoverySourceMetadata.get(name);
if (md == null) {
logger.info("Snapshot differs from actual index for file: {} meta: {}", name, recoverySourceMetadata.asMap());
throw new CorruptIndexException("Snapshot differs from actual index - maybe index was removed metadata has " + recoverySourceMetadata.asMap().size() + " files", name);
}
}
if (canSkipPhase1(recoverySourceMetadata, request.metadataSnapshot()) == false) {
final List<String> phase1FileNames = new ArrayList<>();
final List<Long> phase1FileSizes = new ArrayList<>();
final List<String> phase1ExistingFileNames = new ArrayList<>();
final List<Long> phase1ExistingFileSizes = new ArrayList<>();
// Total size of segment files that are recovered
long totalSizeInBytes = 0;
// Total size of segment files that were able to be re-used
long existingTotalSizeInBytes = 0;
// Generate a "diff" of all the identical, different, and missing
// segment files on the target node, using the existing files on
// the source node
final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
for (StoreFileMetadata md : diff.identical) {
phase1ExistingFileNames.add(md.name());
phase1ExistingFileSizes.add(md.length());
existingTotalSizeInBytes += md.length();
if (logger.isTraceEnabled()) {
logger.trace("recovery [phase1]: not recovering [{}], exist in local store and has checksum [{}]," + " size [{}]", md.name(), md.checksum(), md.length());
}
totalSizeInBytes += md.length();
}
List<StoreFileMetadata> phase1Files = new ArrayList<>(diff.different.size() + diff.missing.size());
phase1Files.addAll(diff.different);
phase1Files.addAll(diff.missing);
for (StoreFileMetadata md : phase1Files) {
if (request.metadataSnapshot().asMap().containsKey(md.name())) {
logger.trace("recovery [phase1]: recovering [{}], exists in local store, but is different: remote [{}], local [{}]", md.name(), request.metadataSnapshot().asMap().get(md.name()), md);
} else {
logger.trace("recovery [phase1]: recovering [{}], does not exist in remote", md.name());
}
phase1FileNames.add(md.name());
phase1FileSizes.add(md.length());
totalSizeInBytes += md.length();
}
logger.trace("recovery [phase1]: recovering_files [{}] with total_size [{}], reusing_files [{}] with total_size [{}]", phase1FileNames.size(), new ByteSizeValue(totalSizeInBytes), phase1ExistingFileNames.size(), new ByteSizeValue(existingTotalSizeInBytes));
final StepListener<Void> sendFileInfoStep = new StepListener<>();
final StepListener<Void> sendFilesStep = new StepListener<>();
final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
final StepListener<Void> cleanFilesStep = new StepListener<>();
cancellableThreads.checkForCancel();
recoveryTarget.receiveFileInfo(phase1FileNames, phase1FileSizes, phase1ExistingFileNames, phase1ExistingFileSizes, translogOps.getAsInt(), sendFileInfoStep);
sendFileInfoStep.whenComplete(r -> sendFiles(store, phase1Files.toArray(new StoreFileMetadata[0]), translogOps, sendFilesStep), listener::onFailure);
sendFilesStep.whenComplete(r -> createRetentionLease(startingSeqNo, createRetentionLeaseStep), listener::onFailure);
createRetentionLeaseStep.whenComplete(retentionLease -> {
final long lastKnownGlobalCheckpoint = shard.getLastKnownGlobalCheckpoint();
assert retentionLease == null || retentionLease.retainingSequenceNumber() - 1 <= lastKnownGlobalCheckpoint : retentionLease + " vs " + lastKnownGlobalCheckpoint;
// Establishes new empty translog on the replica with global checkpoint set to lastKnownGlobalCheckpoint. We want
// the commit we just copied to be a safe commit on the replica, so why not set the global checkpoint on the replica
// to the max seqno of this commit? Because (in rare corner cases) this commit might not be a safe commit here on
// the primary, and in these cases the max seqno would be too high to be valid as a global checkpoint.
cleanFiles(store, recoverySourceMetadata, translogOps, lastKnownGlobalCheckpoint, cleanFilesStep);
}, listener::onFailure);
final long totalSize = totalSizeInBytes;
final long existingTotalSize = existingTotalSizeInBytes;
cleanFilesStep.whenComplete(r -> {
final TimeValue took = stopWatch.totalTime();
logger.trace("recovery [phase1]: took [{}]", took);
listener.onResponse(new SendFileResult(phase1FileNames, phase1FileSizes, totalSize, phase1ExistingFileNames, phase1ExistingFileSizes, existingTotalSize, took));
}, listener::onFailure);
} else {
logger.trace("skipping [phase1] since source and target have identical sync id [{}]", recoverySourceMetadata.getSyncId());
// but we must still create a retention lease
final StepListener<RetentionLease> createRetentionLeaseStep = new StepListener<>();
createRetentionLease(startingSeqNo, createRetentionLeaseStep);
createRetentionLeaseStep.whenComplete(retentionLease -> {
final TimeValue took = stopWatch.totalTime();
logger.trace("recovery [phase1]: took [{}]", took);
listener.onResponse(new SendFileResult(Collections.emptyList(), Collections.emptyList(), 0L, Collections.emptyList(), Collections.emptyList(), 0L, took));
}, listener::onFailure);
}
} catch (Exception e) {
throw new RecoverFilesRecoveryException(request.shardId(), 0, new ByteSizeValue(0L), e);
}
}
use of org.opensearch.action.StepListener in project OpenSearch by opensearch-project.
the class SnapshotResiliencyTests method testConcurrentSnapshotDeleteAndDeleteIndex.
public void testConcurrentSnapshotDeleteAndDeleteIndex() throws IOException {
setupTestCluster(randomFrom(1, 3, 5), randomIntBetween(2, 10));
String repoName = "repo";
String snapshotName = "snapshot";
final String index = "test";
TestClusterNodes.TestClusterNode masterNode = testClusterNodes.currentMaster(testClusterNodes.nodes.values().iterator().next().clusterService.state());
final StepListener<Collection<CreateIndexResponse>> createIndicesListener = new StepListener<>();
final int indices = randomIntBetween(5, 20);
final SetOnce<Index> firstIndex = new SetOnce<>();
continueOrDie(createRepoAndIndex(repoName, index, 1), createIndexResponse -> {
firstIndex.set(masterNode.clusterService.state().metadata().index(index).getIndex());
// create a few more indices to make it more likely that the subsequent index delete operation happens before snapshot
// finalization
final GroupedActionListener<CreateIndexResponse> listener = new GroupedActionListener<>(createIndicesListener, indices);
for (int i = 0; i < indices; ++i) {
client().admin().indices().create(new CreateIndexRequest("index-" + i), listener);
}
});
final StepListener<CreateSnapshotResponse> createSnapshotResponseStepListener = new StepListener<>();
final boolean partialSnapshot = randomBoolean();
continueOrDie(createIndicesListener, createIndexResponses -> client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(false).setPartial(partialSnapshot).setIncludeGlobalState(randomBoolean()).execute(createSnapshotResponseStepListener));
continueOrDie(createSnapshotResponseStepListener, createSnapshotResponse -> client().admin().indices().delete(new DeleteIndexRequest(index), new ActionListener<AcknowledgedResponse>() {
@Override
public void onResponse(AcknowledgedResponse acknowledgedResponse) {
if (partialSnapshot) {
// Recreate index by the same name to test that we don't snapshot conflicting metadata in this scenario
client().admin().indices().create(new CreateIndexRequest(index), noopListener());
}
}
@Override
public void onFailure(Exception e) {
if (partialSnapshot) {
throw new AssertionError("Delete index should always work during partial snapshots", e);
}
}
}));
deterministicTaskQueue.runAllRunnableTasks();
SnapshotsInProgress finalSnapshotsInProgress = masterNode.clusterService.state().custom(SnapshotsInProgress.TYPE);
assertFalse(finalSnapshotsInProgress.entries().stream().anyMatch(entry -> entry.state().completed() == false));
final Repository repository = masterNode.repositoriesService.repository(repoName);
final RepositoryData repositoryData = getRepositoryData(repository);
Collection<SnapshotId> snapshotIds = repositoryData.getSnapshotIds();
assertThat(snapshotIds, hasSize(1));
final SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotIds.iterator().next());
if (partialSnapshot) {
assertThat(snapshotInfo.state(), either(is(SnapshotState.SUCCESS)).or(is(SnapshotState.PARTIAL)));
// Single shard for each index so we either get all indices or all except for the deleted index
assertThat(snapshotInfo.successfulShards(), either(is(indices + 1)).or(is(indices)));
if (snapshotInfo.successfulShards() == indices + 1) {
final IndexMetadata indexMetadata = repository.getSnapshotIndexMetaData(repositoryData, snapshotInfo.snapshotId(), repositoryData.resolveIndexId(index));
// Make sure we snapshotted the metadata of this index and not the recreated version
assertEquals(indexMetadata.getIndex(), firstIndex.get());
}
} else {
assertEquals(snapshotInfo.state(), SnapshotState.SUCCESS);
// Index delete must be blocked for non-partial snapshots and we get a snapshot for every index
assertEquals(snapshotInfo.successfulShards(), indices + 1);
}
assertEquals(0, snapshotInfo.failedShards());
}
Aggregations