use of org.opensearch.index.snapshots.IndexShardRestoreFailedException in project OpenSearch by opensearch-project.
the class FileRestoreContext method restore.
/**
* Performs restore operation
*/
public void restore(SnapshotFiles snapshotFiles, Store store, ActionListener<Void> listener) {
store.incRef();
try {
logger.debug("[{}] [{}] restoring to [{}] ...", snapshotId, repositoryName, shardId);
Store.MetadataSnapshot recoveryTargetMetadata;
try {
// this will throw an IOException if the store has no segments infos file. The
// store can still have existing files but they will be deleted just before being
// restored.
recoveryTargetMetadata = store.getMetadata(null, true);
} catch (org.apache.lucene.index.IndexNotFoundException e) {
// happens when restore to an empty shard, not a big deal
logger.trace("[{}] [{}] restoring from to an empty shard", shardId, snapshotId);
recoveryTargetMetadata = Store.MetadataSnapshot.EMPTY;
} catch (IOException e) {
logger.warn(new ParameterizedMessage("[{}] [{}] Can't read metadata from store, will not reuse local files during restore", shardId, snapshotId), e);
recoveryTargetMetadata = Store.MetadataSnapshot.EMPTY;
}
final List<BlobStoreIndexShardSnapshot.FileInfo> filesToRecover = new ArrayList<>();
final Map<String, StoreFileMetadata> snapshotMetadata = new HashMap<>();
final Map<String, BlobStoreIndexShardSnapshot.FileInfo> fileInfos = new HashMap<>();
for (final BlobStoreIndexShardSnapshot.FileInfo fileInfo : snapshotFiles.indexFiles()) {
snapshotMetadata.put(fileInfo.metadata().name(), fileInfo.metadata());
fileInfos.put(fileInfo.metadata().name(), fileInfo);
}
final Store.MetadataSnapshot sourceMetadata = new Store.MetadataSnapshot(unmodifiableMap(snapshotMetadata), emptyMap(), 0);
final StoreFileMetadata restoredSegmentsFile = sourceMetadata.getSegmentsFile();
if (restoredSegmentsFile == null) {
throw new IndexShardRestoreFailedException(shardId, "Snapshot has no segments file");
}
final Store.RecoveryDiff diff = sourceMetadata.recoveryDiff(recoveryTargetMetadata);
for (StoreFileMetadata md : diff.identical) {
BlobStoreIndexShardSnapshot.FileInfo fileInfo = fileInfos.get(md.name());
recoveryState.getIndex().addFileDetail(fileInfo.physicalName(), fileInfo.length(), true);
if (logger.isTraceEnabled()) {
logger.trace("[{}] [{}] not_recovering file [{}] from [{}], exists in local store and is same", shardId, snapshotId, fileInfo.physicalName(), fileInfo.name());
}
}
for (StoreFileMetadata md : concat(diff)) {
BlobStoreIndexShardSnapshot.FileInfo fileInfo = fileInfos.get(md.name());
filesToRecover.add(fileInfo);
recoveryState.getIndex().addFileDetail(fileInfo.physicalName(), fileInfo.length(), false);
if (logger.isTraceEnabled()) {
logger.trace("[{}] [{}] recovering [{}] from [{}]", shardId, snapshotId, fileInfo.physicalName(), fileInfo.name());
}
}
recoveryState.getIndex().setFileDetailsComplete();
if (filesToRecover.isEmpty()) {
logger.trace("[{}] [{}] no files to recover, all exist within the local store", shardId, snapshotId);
}
try {
// list of all existing store files
final List<String> deleteIfExistFiles = Arrays.asList(store.directory().listAll());
for (final BlobStoreIndexShardSnapshot.FileInfo fileToRecover : filesToRecover) {
// if a file with a same physical name already exist in the store we need to delete it
// before restoring it from the snapshot. We could be lenient and try to reuse the existing
// store files (and compare their names/length/checksum again with the snapshot files) but to
// avoid extra complexity we simply delete them and restore them again like StoreRecovery
// does with dangling indices. Any existing store file that is not restored from the snapshot
// will be clean up by RecoveryTarget.cleanFiles().
final String physicalName = fileToRecover.physicalName();
if (deleteIfExistFiles.contains(physicalName)) {
logger.trace("[{}] [{}] deleting pre-existing file [{}]", shardId, snapshotId, physicalName);
store.directory().deleteFile(physicalName);
}
}
restoreFiles(filesToRecover, store, ActionListener.wrap(v -> {
store.incRef();
try {
afterRestore(snapshotFiles, store, restoredSegmentsFile);
listener.onResponse(null);
} finally {
store.decRef();
}
}, listener::onFailure));
} catch (IOException ex) {
throw new IndexShardRestoreFailedException(shardId, "Failed to recover index", ex);
}
} catch (Exception e) {
listener.onFailure(e);
} finally {
store.decRef();
}
}
use of org.opensearch.index.snapshots.IndexShardRestoreFailedException in project OpenSearch by opensearch-project.
the class StoreRecovery method restore.
/**
* Restores shard from {@link SnapshotRecoverySource} associated with this shard in routing table
*/
private void restore(IndexShard indexShard, Repository repository, SnapshotRecoverySource restoreSource, ActionListener<Boolean> listener) {
logger.debug("restoring from {} ...", indexShard.recoveryState().getRecoverySource());
indexShard.preRecovery();
final RecoveryState.Translog translogState = indexShard.recoveryState().getTranslog();
if (restoreSource == null) {
listener.onFailure(new IndexShardRestoreFailedException(shardId, "empty restore source"));
return;
}
if (logger.isTraceEnabled()) {
logger.trace("[{}] restoring shard [{}]", restoreSource.snapshot(), shardId);
}
final ActionListener<Void> restoreListener = ActionListener.wrap(v -> {
final Store store = indexShard.store();
bootstrap(indexShard, store);
assert indexShard.shardRouting.primary() : "only primary shards can recover from store";
writeEmptyRetentionLeasesFile(indexShard);
indexShard.openEngineAndRecoverFromTranslog();
indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm());
indexShard.finalizeRecovery();
indexShard.postRecovery("restore done");
listener.onResponse(true);
}, e -> listener.onFailure(new IndexShardRestoreFailedException(shardId, "restore failed", e)));
try {
translogState.totalOperations(0);
translogState.totalOperationsOnStart(0);
indexShard.prepareForIndexRecovery();
final ShardId snapshotShardId;
final IndexId indexId = restoreSource.index();
if (shardId.getIndexName().equals(indexId.getName())) {
snapshotShardId = shardId;
} else {
snapshotShardId = new ShardId(indexId.getName(), IndexMetadata.INDEX_UUID_NA_VALUE, shardId.id());
}
final StepListener<IndexId> indexIdListener = new StepListener<>();
// If the index UUID was not found in the recovery source we will have to load RepositoryData and resolve it by index name
if (indexId.getId().equals(IndexMetadata.INDEX_UUID_NA_VALUE)) {
// BwC path, running against an old version master that did not add the IndexId to the recovery source
repository.getRepositoryData(ActionListener.map(indexIdListener, repositoryData -> repositoryData.resolveIndexId(indexId.getName())));
} else {
indexIdListener.onResponse(indexId);
}
assert indexShard.getEngineOrNull() == null;
indexIdListener.whenComplete(idx -> repository.restoreShard(indexShard.store(), restoreSource.snapshot().getSnapshotId(), idx, snapshotShardId, indexShard.recoveryState(), restoreListener), restoreListener::onFailure);
} catch (Exception e) {
restoreListener.onFailure(e);
}
}
use of org.opensearch.index.snapshots.IndexShardRestoreFailedException in project OpenSearch by opensearch-project.
the class FileRestoreContext method afterRestore.
private void afterRestore(SnapshotFiles snapshotFiles, Store store, StoreFileMetadata restoredSegmentsFile) {
// read the snapshot data persisted
try {
Lucene.pruneUnreferencedFiles(restoredSegmentsFile.name(), store.directory());
} catch (IOException e) {
throw new IndexShardRestoreFailedException(shardId, "Failed to fetch index version after copying it over", e);
}
// / now, go over and clean files that are in the store, but were not in the snapshot
try {
for (String storeFile : store.directory().listAll()) {
if (Store.isAutogenerated(storeFile) || snapshotFiles.containPhysicalIndexFile(storeFile)) {
// skip write.lock, checksum files and files that exist in the snapshot
continue;
}
try {
store.deleteQuiet("restore", storeFile);
store.directory().deleteFile(storeFile);
} catch (IOException e) {
logger.warn("[{}] [{}] failed to delete file [{}] during snapshot cleanup", shardId, snapshotId, storeFile);
}
}
} catch (IOException e) {
logger.warn("[{}] [{}] failed to list directory - some of files might not be deleted", shardId, snapshotId);
}
}
use of org.opensearch.index.snapshots.IndexShardRestoreFailedException in project OpenSearch by opensearch-project.
the class BlobStoreRepository method restoreShard.
@Override
public void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, ShardId snapshotShardId, RecoveryState recoveryState, ActionListener<Void> listener) {
final ShardId shardId = store.shardId();
final ActionListener<Void> restoreListener = ActionListener.delegateResponse(listener, (l, e) -> l.onFailure(new IndexShardRestoreFailedException(shardId, "failed to restore snapshot [" + snapshotId + "]", e)));
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
final BlobContainer container = shardContainer(indexId, snapshotShardId);
executor.execute(ActionRunnable.wrap(restoreListener, l -> {
final BlobStoreIndexShardSnapshot snapshot = loadShardSnapshot(container, snapshotId);
final SnapshotFiles snapshotFiles = new SnapshotFiles(snapshot.snapshot(), snapshot.indexFiles(), null);
new FileRestoreContext(metadata.name(), shardId, snapshotId, recoveryState) {
@Override
protected void restoreFiles(List<BlobStoreIndexShardSnapshot.FileInfo> filesToRecover, Store store, ActionListener<Void> listener) {
if (filesToRecover.isEmpty()) {
listener.onResponse(null);
} else {
// Start as many workers as fit into the snapshot pool at once at the most
final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), snapshotFiles.indexFiles().size());
final BlockingQueue<BlobStoreIndexShardSnapshot.FileInfo> files = new LinkedBlockingQueue<>(filesToRecover);
final ActionListener<Void> allFilesListener = fileQueueListener(files, workers, ActionListener.map(listener, v -> null));
// restore the files from the snapshot to the Lucene store
for (int i = 0; i < workers; ++i) {
try {
executeOneFileRestore(files, allFilesListener);
} catch (Exception e) {
allFilesListener.onFailure(e);
}
}
}
}
private void executeOneFileRestore(BlockingQueue<BlobStoreIndexShardSnapshot.FileInfo> files, ActionListener<Void> allFilesListener) throws InterruptedException {
final BlobStoreIndexShardSnapshot.FileInfo fileToRecover = files.poll(0L, TimeUnit.MILLISECONDS);
if (fileToRecover == null) {
allFilesListener.onResponse(null);
} else {
executor.execute(ActionRunnable.wrap(allFilesListener, filesListener -> {
store.incRef();
try {
restoreFile(fileToRecover, store);
} finally {
store.decRef();
}
executeOneFileRestore(files, filesListener);
}));
}
}
private void restoreFile(BlobStoreIndexShardSnapshot.FileInfo fileInfo, Store store) throws IOException {
ensureNotClosing(store);
logger.trace(() -> new ParameterizedMessage("[{}] restoring [{}] to [{}]", metadata.name(), fileInfo, store));
boolean success = false;
try (IndexOutput indexOutput = store.createVerifyingOutput(fileInfo.physicalName(), fileInfo.metadata(), IOContext.DEFAULT)) {
if (fileInfo.name().startsWith(VIRTUAL_DATA_BLOB_PREFIX)) {
final BytesRef hash = fileInfo.metadata().hash();
indexOutput.writeBytes(hash.bytes, hash.offset, hash.length);
recoveryState.getIndex().addRecoveredBytesToFile(fileInfo.physicalName(), hash.length);
} else {
try (InputStream stream = maybeRateLimitRestores(new SlicedInputStream(fileInfo.numberOfParts()) {
@Override
protected InputStream openSlice(int slice) throws IOException {
ensureNotClosing(store);
return container.readBlob(fileInfo.partName(slice));
}
})) {
final byte[] buffer = new byte[Math.toIntExact(Math.min(bufferSize, fileInfo.length()))];
int length;
while ((length = stream.read(buffer)) > 0) {
ensureNotClosing(store);
indexOutput.writeBytes(buffer, 0, length);
recoveryState.getIndex().addRecoveredBytesToFile(fileInfo.physicalName(), length);
}
}
}
Store.verify(indexOutput);
indexOutput.close();
store.directory().sync(Collections.singleton(fileInfo.physicalName()));
success = true;
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
try {
store.markStoreCorrupted(ex);
} catch (IOException e) {
logger.warn("store cannot be marked as corrupted", e);
}
throw ex;
} finally {
if (success == false) {
store.deleteQuiet(fileInfo.physicalName());
}
}
}
void ensureNotClosing(final Store store) throws AlreadyClosedException {
assert store.refCount() > 0;
if (store.isClosing()) {
throw new AlreadyClosedException("store is closing");
}
}
}.restore(snapshotFiles, store, l);
}));
}
Aggregations