use of org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot in project OpenSearch by opensearch-project.
the class CloneSnapshotIT method testShardClone.
public void testShardClone() throws Exception {
internalCluster().startMasterOnlyNode();
internalCluster().startDataOnlyNode();
final String repoName = "repo-name";
final Path repoPath = randomRepoPath();
createRepository(repoName, "fs", repoPath);
final boolean useBwCFormat = randomBoolean();
if (useBwCFormat) {
initWithSnapshotVersion(repoName, repoPath, SnapshotsService.OLD_SNAPSHOT_FORMAT);
// Re-create repo to clear repository data cache
assertAcked(clusterAdmin().prepareDeleteRepository(repoName).get());
createRepository(repoName, "fs", repoPath);
}
final String indexName = "test-index";
createIndexWithRandomDocs(indexName, randomIntBetween(5, 10));
final String sourceSnapshot = "source-snapshot";
final SnapshotInfo sourceSnapshotInfo = createFullSnapshot(repoName, sourceSnapshot);
final BlobStoreRepository repository = (BlobStoreRepository) internalCluster().getCurrentMasterNodeInstance(RepositoriesService.class).repository(repoName);
final RepositoryData repositoryData = getRepositoryData(repoName);
final IndexId indexId = repositoryData.resolveIndexId(indexName);
final int shardId = 0;
final RepositoryShardId repositoryShardId = new RepositoryShardId(indexId, shardId);
final SnapshotId targetSnapshotId = new SnapshotId("target-snapshot", UUIDs.randomBase64UUID(random()));
final String currentShardGen;
if (useBwCFormat) {
currentShardGen = null;
} else {
currentShardGen = repositoryData.shardGenerations().getShardGen(indexId, shardId);
}
final String newShardGeneration = PlainActionFuture.get(f -> repository.cloneShardSnapshot(sourceSnapshotInfo.snapshotId(), targetSnapshotId, repositoryShardId, currentShardGen, f));
if (useBwCFormat) {
final long gen = Long.parseLong(newShardGeneration);
// Initial snapshot brought it to 0, clone increments it to 1
assertEquals(gen, 1L);
}
final BlobStoreIndexShardSnapshot targetShardSnapshot = readShardSnapshot(repository, repositoryShardId, targetSnapshotId);
final BlobStoreIndexShardSnapshot sourceShardSnapshot = readShardSnapshot(repository, repositoryShardId, sourceSnapshotInfo.snapshotId());
assertThat(targetShardSnapshot.incrementalFileCount(), is(0));
final List<BlobStoreIndexShardSnapshot.FileInfo> sourceFiles = sourceShardSnapshot.indexFiles();
final List<BlobStoreIndexShardSnapshot.FileInfo> targetFiles = targetShardSnapshot.indexFiles();
final int fileCount = sourceFiles.size();
assertEquals(fileCount, targetFiles.size());
for (int i = 0; i < fileCount; i++) {
assertTrue(sourceFiles.get(i).isSame(targetFiles.get(i)));
}
final BlobStoreIndexShardSnapshots shardMetadata = readShardGeneration(repository, repositoryShardId, newShardGeneration);
final List<SnapshotFiles> snapshotFiles = shardMetadata.snapshots();
assertThat(snapshotFiles, hasSize(2));
assertTrue(snapshotFiles.get(0).isSame(snapshotFiles.get(1)));
// verify that repeated cloning is idempotent
final String newShardGeneration2 = PlainActionFuture.get(f -> repository.cloneShardSnapshot(sourceSnapshotInfo.snapshotId(), targetSnapshotId, repositoryShardId, newShardGeneration, f));
assertEquals(newShardGeneration, newShardGeneration2);
}
use of org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot in project OpenSearch by opensearch-project.
the class BlobStoreRepository method snapshotShard.
@Override
public void snapshotShard(Store store, MapperService mapperService, SnapshotId snapshotId, IndexId indexId, IndexCommit snapshotIndexCommit, String shardStateIdentifier, IndexShardSnapshotStatus snapshotStatus, Version repositoryMetaVersion, Map<String, Object> userMetadata, ActionListener<String> listener) {
if (isReadOnly()) {
listener.onFailure(new RepositoryException(metadata.name(), "cannot snapshot shard on a readonly repository"));
return;
}
final ShardId shardId = store.shardId();
final long startTime = threadPool.absoluteTimeInMillis();
try {
final String generation = snapshotStatus.generation();
logger.debug("[{}] [{}] snapshot to [{}] [{}] ...", shardId, snapshotId, metadata.name(), generation);
final BlobContainer shardContainer = shardContainer(indexId, shardId);
final Set<String> blobs;
if (generation == null) {
try {
blobs = shardContainer.listBlobsByPrefix(INDEX_FILE_PREFIX).keySet();
} catch (IOException e) {
throw new IndexShardSnapshotFailedException(shardId, "failed to list blobs", e);
}
} else {
blobs = Collections.singleton(INDEX_FILE_PREFIX + generation);
}
Tuple<BlobStoreIndexShardSnapshots, String> tuple = buildBlobStoreIndexShardSnapshots(blobs, shardContainer, generation);
BlobStoreIndexShardSnapshots snapshots = tuple.v1();
String fileListGeneration = tuple.v2();
if (snapshots.snapshots().stream().anyMatch(sf -> sf.snapshot().equals(snapshotId.getName()))) {
throw new IndexShardSnapshotFailedException(shardId, "Duplicate snapshot name [" + snapshotId.getName() + "] detected, aborting");
}
// First inspect all known SegmentInfos instances to see if we already have an equivalent commit in the repository
final List<BlobStoreIndexShardSnapshot.FileInfo> filesFromSegmentInfos = Optional.ofNullable(shardStateIdentifier).map(id -> {
for (SnapshotFiles snapshotFileSet : snapshots.snapshots()) {
if (id.equals(snapshotFileSet.shardStateIdentifier())) {
return snapshotFileSet.indexFiles();
}
}
return null;
}).orElse(null);
final List<BlobStoreIndexShardSnapshot.FileInfo> indexCommitPointFiles;
int indexIncrementalFileCount = 0;
int indexTotalNumberOfFiles = 0;
long indexIncrementalSize = 0;
long indexTotalFileSize = 0;
final BlockingQueue<BlobStoreIndexShardSnapshot.FileInfo> filesToSnapshot = new LinkedBlockingQueue<>();
// in the commit with files already in the repository
if (filesFromSegmentInfos == null) {
indexCommitPointFiles = new ArrayList<>();
final Collection<String> fileNames;
final Store.MetadataSnapshot metadataFromStore;
try (Releasable ignored = incrementStoreRef(store, snapshotStatus, shardId)) {
// TODO apparently we don't use the MetadataSnapshot#.recoveryDiff(...) here but we should
try {
logger.trace("[{}] [{}] Loading store metadata using index commit [{}]", shardId, snapshotId, snapshotIndexCommit);
metadataFromStore = store.getMetadata(snapshotIndexCommit);
fileNames = snapshotIndexCommit.getFileNames();
} catch (IOException e) {
throw new IndexShardSnapshotFailedException(shardId, "Failed to get store file metadata", e);
}
}
for (String fileName : fileNames) {
if (snapshotStatus.isAborted()) {
logger.debug("[{}] [{}] Aborted on the file [{}], exiting", shardId, snapshotId, fileName);
throw new AbortedSnapshotException();
}
logger.trace("[{}] [{}] Processing [{}]", shardId, snapshotId, fileName);
final StoreFileMetadata md = metadataFromStore.get(fileName);
BlobStoreIndexShardSnapshot.FileInfo existingFileInfo = null;
List<BlobStoreIndexShardSnapshot.FileInfo> filesInfo = snapshots.findPhysicalIndexFiles(fileName);
if (filesInfo != null) {
for (BlobStoreIndexShardSnapshot.FileInfo fileInfo : filesInfo) {
if (fileInfo.isSame(md)) {
// a commit point file with the same name, size and checksum was already copied to repository
// we will reuse it for this snapshot
existingFileInfo = fileInfo;
break;
}
}
}
// We can skip writing blobs where the metadata hash is equal to the blob's contents because we store the hash/contents
// directly in the shard level metadata in this case
final boolean needsWrite = md.hashEqualsContents() == false;
indexTotalFileSize += md.length();
indexTotalNumberOfFiles++;
if (existingFileInfo == null) {
indexIncrementalFileCount++;
indexIncrementalSize += md.length();
// create a new FileInfo
BlobStoreIndexShardSnapshot.FileInfo snapshotFileInfo = new BlobStoreIndexShardSnapshot.FileInfo((needsWrite ? UPLOADED_DATA_BLOB_PREFIX : VIRTUAL_DATA_BLOB_PREFIX) + UUIDs.randomBase64UUID(), md, chunkSize());
indexCommitPointFiles.add(snapshotFileInfo);
if (needsWrite) {
filesToSnapshot.add(snapshotFileInfo);
}
assert needsWrite || assertFileContentsMatchHash(snapshotFileInfo, store);
} else {
indexCommitPointFiles.add(existingFileInfo);
}
}
} else {
for (BlobStoreIndexShardSnapshot.FileInfo fileInfo : filesFromSegmentInfos) {
indexTotalNumberOfFiles++;
indexTotalFileSize += fileInfo.length();
}
indexCommitPointFiles = filesFromSegmentInfos;
}
snapshotStatus.moveToStarted(startTime, indexIncrementalFileCount, indexTotalNumberOfFiles, indexIncrementalSize, indexTotalFileSize);
final String indexGeneration;
final boolean writeShardGens = SnapshotsService.useShardGenerations(repositoryMetaVersion);
// build a new BlobStoreIndexShardSnapshot, that includes this one and all the saved ones
List<SnapshotFiles> newSnapshotsList = new ArrayList<>();
newSnapshotsList.add(new SnapshotFiles(snapshotId.getName(), indexCommitPointFiles, shardStateIdentifier));
for (SnapshotFiles point : snapshots) {
newSnapshotsList.add(point);
}
final BlobStoreIndexShardSnapshots updatedBlobStoreIndexShardSnapshots = new BlobStoreIndexShardSnapshots(newSnapshotsList);
final Runnable afterWriteSnapBlob;
if (writeShardGens) {
// When using shard generations we can safely write the index-${uuid} blob before writing out any of the actual data
// for this shard since the uuid named blob will simply not be referenced in case of error and thus we will never
// reference a generation that has not had all its files fully upload.
indexGeneration = UUIDs.randomBase64UUID();
try {
INDEX_SHARD_SNAPSHOTS_FORMAT.write(updatedBlobStoreIndexShardSnapshots, shardContainer, indexGeneration, compress);
} catch (IOException e) {
throw new IndexShardSnapshotFailedException(shardId, "Failed to write shard level snapshot metadata for [" + snapshotId + "] to [" + INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(indexGeneration) + "]", e);
}
afterWriteSnapBlob = () -> {
};
} else {
// When not using shard generations we can only write the index-${N} blob after all other work for this shard has
// completed.
// Also, in case of numeric shard generations the data node has to take care of deleting old shard generations.
final long newGen = Long.parseLong(fileListGeneration) + 1;
indexGeneration = Long.toString(newGen);
// Delete all previous index-N blobs
final List<String> blobsToDelete = blobs.stream().filter(blob -> blob.startsWith(SNAPSHOT_INDEX_PREFIX)).collect(Collectors.toList());
assert blobsToDelete.stream().mapToLong(b -> Long.parseLong(b.replaceFirst(SNAPSHOT_INDEX_PREFIX, ""))).max().orElse(-1L) < Long.parseLong(indexGeneration) : "Tried to delete an index-N blob newer than the current generation [" + indexGeneration + "] when deleting index-N blobs " + blobsToDelete;
afterWriteSnapBlob = () -> {
try {
writeShardIndexBlobAtomic(shardContainer, newGen, updatedBlobStoreIndexShardSnapshots);
} catch (IOException e) {
throw new IndexShardSnapshotFailedException(shardId, "Failed to finalize snapshot creation [" + snapshotId + "] with shard index [" + INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(indexGeneration) + "]", e);
}
try {
deleteFromContainer(shardContainer, blobsToDelete);
} catch (IOException e) {
logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to delete old index-N blobs during finalization", snapshotId, shardId), e);
}
};
}
final StepListener<Collection<Void>> allFilesUploadedListener = new StepListener<>();
allFilesUploadedListener.whenComplete(v -> {
final IndexShardSnapshotStatus.Copy lastSnapshotStatus = snapshotStatus.moveToFinalize(snapshotIndexCommit.getGeneration());
// now create and write the commit point
logger.trace("[{}] [{}] writing shard snapshot file", shardId, snapshotId);
try {
INDEX_SHARD_SNAPSHOT_FORMAT.write(new BlobStoreIndexShardSnapshot(snapshotId.getName(), lastSnapshotStatus.getIndexVersion(), indexCommitPointFiles, lastSnapshotStatus.getStartTime(), threadPool.absoluteTimeInMillis() - lastSnapshotStatus.getStartTime(), lastSnapshotStatus.getIncrementalFileCount(), lastSnapshotStatus.getIncrementalSize()), shardContainer, snapshotId.getUUID(), compress);
} catch (IOException e) {
throw new IndexShardSnapshotFailedException(shardId, "Failed to write commit point", e);
}
afterWriteSnapBlob.run();
snapshotStatus.moveToDone(threadPool.absoluteTimeInMillis(), indexGeneration);
listener.onResponse(indexGeneration);
}, listener::onFailure);
if (indexIncrementalFileCount == 0) {
allFilesUploadedListener.onResponse(Collections.emptyList());
return;
}
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
// Start as many workers as fit into the snapshot pool at once at the most
final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), indexIncrementalFileCount);
final ActionListener<Void> filesListener = fileQueueListener(filesToSnapshot, workers, allFilesUploadedListener);
for (int i = 0; i < workers; ++i) {
executeOneFileSnapshot(store, snapshotId, indexId, snapshotStatus, filesToSnapshot, executor, filesListener);
}
} catch (Exception e) {
listener.onFailure(e);
}
}
use of org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot in project OpenSearch by opensearch-project.
the class BlobStoreRepository method cloneShardSnapshot.
@Override
public void cloneShardSnapshot(SnapshotId source, SnapshotId target, RepositoryShardId shardId, @Nullable String shardGeneration, ActionListener<String> listener) {
if (isReadOnly()) {
listener.onFailure(new RepositoryException(metadata.name(), "cannot clone shard snapshot on a readonly repository"));
return;
}
final IndexId index = shardId.index();
final int shardNum = shardId.shardId();
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
executor.execute(ActionRunnable.supply(listener, () -> {
final long startTime = threadPool.absoluteTimeInMillis();
final BlobContainer shardContainer = shardContainer(index, shardNum);
final BlobStoreIndexShardSnapshots existingSnapshots;
final String newGen;
final String existingShardGen;
if (shardGeneration == null) {
Tuple<BlobStoreIndexShardSnapshots, Long> tuple = buildBlobStoreIndexShardSnapshots(shardContainer.listBlobsByPrefix(INDEX_FILE_PREFIX).keySet(), shardContainer);
existingShardGen = String.valueOf(tuple.v2());
newGen = String.valueOf(tuple.v2() + 1);
existingSnapshots = tuple.v1();
} else {
newGen = UUIDs.randomBase64UUID();
existingSnapshots = buildBlobStoreIndexShardSnapshots(Collections.emptySet(), shardContainer, shardGeneration).v1();
existingShardGen = shardGeneration;
}
SnapshotFiles existingTargetFiles = null;
SnapshotFiles sourceFiles = null;
for (SnapshotFiles existingSnapshot : existingSnapshots) {
final String snapshotName = existingSnapshot.snapshot();
if (snapshotName.equals(target.getName())) {
existingTargetFiles = existingSnapshot;
} else if (snapshotName.equals(source.getName())) {
sourceFiles = existingSnapshot;
}
if (sourceFiles != null && existingTargetFiles != null) {
break;
}
}
if (sourceFiles == null) {
throw new RepositoryException(metadata.name(), "Can't create clone of [" + shardId + "] for snapshot [" + target + "]. The source snapshot [" + source + "] was not found in the shard metadata.");
}
if (existingTargetFiles != null) {
if (existingTargetFiles.isSame(sourceFiles)) {
return existingShardGen;
}
throw new RepositoryException(metadata.name(), "Can't create clone of [" + shardId + "] for snapshot [" + target + "]. A snapshot by that name already exists for this shard.");
}
final BlobStoreIndexShardSnapshot sourceMeta = loadShardSnapshot(shardContainer, source);
logger.trace("[{}] [{}] writing shard snapshot file for clone", shardId, target);
INDEX_SHARD_SNAPSHOT_FORMAT.write(sourceMeta.asClone(target.getName(), startTime, threadPool.absoluteTimeInMillis() - startTime), shardContainer, target.getUUID(), compress);
INDEX_SHARD_SNAPSHOTS_FORMAT.write(existingSnapshots.withClone(source.getName(), target.getName()), shardContainer, newGen, compress);
return newGen;
}));
}
use of org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot in project OpenSearch by opensearch-project.
the class BlobStoreRepository method restoreShard.
@Override
public void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, ShardId snapshotShardId, RecoveryState recoveryState, ActionListener<Void> listener) {
final ShardId shardId = store.shardId();
final ActionListener<Void> restoreListener = ActionListener.delegateResponse(listener, (l, e) -> l.onFailure(new IndexShardRestoreFailedException(shardId, "failed to restore snapshot [" + snapshotId + "]", e)));
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
final BlobContainer container = shardContainer(indexId, snapshotShardId);
executor.execute(ActionRunnable.wrap(restoreListener, l -> {
final BlobStoreIndexShardSnapshot snapshot = loadShardSnapshot(container, snapshotId);
final SnapshotFiles snapshotFiles = new SnapshotFiles(snapshot.snapshot(), snapshot.indexFiles(), null);
new FileRestoreContext(metadata.name(), shardId, snapshotId, recoveryState) {
@Override
protected void restoreFiles(List<BlobStoreIndexShardSnapshot.FileInfo> filesToRecover, Store store, ActionListener<Void> listener) {
if (filesToRecover.isEmpty()) {
listener.onResponse(null);
} else {
// Start as many workers as fit into the snapshot pool at once at the most
final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), snapshotFiles.indexFiles().size());
final BlockingQueue<BlobStoreIndexShardSnapshot.FileInfo> files = new LinkedBlockingQueue<>(filesToRecover);
final ActionListener<Void> allFilesListener = fileQueueListener(files, workers, ActionListener.map(listener, v -> null));
// restore the files from the snapshot to the Lucene store
for (int i = 0; i < workers; ++i) {
try {
executeOneFileRestore(files, allFilesListener);
} catch (Exception e) {
allFilesListener.onFailure(e);
}
}
}
}
private void executeOneFileRestore(BlockingQueue<BlobStoreIndexShardSnapshot.FileInfo> files, ActionListener<Void> allFilesListener) throws InterruptedException {
final BlobStoreIndexShardSnapshot.FileInfo fileToRecover = files.poll(0L, TimeUnit.MILLISECONDS);
if (fileToRecover == null) {
allFilesListener.onResponse(null);
} else {
executor.execute(ActionRunnable.wrap(allFilesListener, filesListener -> {
store.incRef();
try {
restoreFile(fileToRecover, store);
} finally {
store.decRef();
}
executeOneFileRestore(files, filesListener);
}));
}
}
private void restoreFile(BlobStoreIndexShardSnapshot.FileInfo fileInfo, Store store) throws IOException {
ensureNotClosing(store);
logger.trace(() -> new ParameterizedMessage("[{}] restoring [{}] to [{}]", metadata.name(), fileInfo, store));
boolean success = false;
try (IndexOutput indexOutput = store.createVerifyingOutput(fileInfo.physicalName(), fileInfo.metadata(), IOContext.DEFAULT)) {
if (fileInfo.name().startsWith(VIRTUAL_DATA_BLOB_PREFIX)) {
final BytesRef hash = fileInfo.metadata().hash();
indexOutput.writeBytes(hash.bytes, hash.offset, hash.length);
recoveryState.getIndex().addRecoveredBytesToFile(fileInfo.physicalName(), hash.length);
} else {
try (InputStream stream = maybeRateLimitRestores(new SlicedInputStream(fileInfo.numberOfParts()) {
@Override
protected InputStream openSlice(int slice) throws IOException {
ensureNotClosing(store);
return container.readBlob(fileInfo.partName(slice));
}
})) {
final byte[] buffer = new byte[Math.toIntExact(Math.min(bufferSize, fileInfo.length()))];
int length;
while ((length = stream.read(buffer)) > 0) {
ensureNotClosing(store);
indexOutput.writeBytes(buffer, 0, length);
recoveryState.getIndex().addRecoveredBytesToFile(fileInfo.physicalName(), length);
}
}
}
Store.verify(indexOutput);
indexOutput.close();
store.directory().sync(Collections.singleton(fileInfo.physicalName()));
success = true;
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
try {
store.markStoreCorrupted(ex);
} catch (IOException e) {
logger.warn("store cannot be marked as corrupted", e);
}
throw ex;
} finally {
if (success == false) {
store.deleteQuiet(fileInfo.physicalName());
}
}
}
void ensureNotClosing(final Store store) throws AlreadyClosedException {
assert store.refCount() > 0;
if (store.isClosing()) {
throw new AlreadyClosedException("store is closing");
}
}
}.restore(snapshotFiles, store, l);
}));
}
Aggregations