use of org.opensearch.common.blobstore.BlobMetadata in project OpenSearch by opensearch-project.
the class HdfsBlobContainer method listBlobsByPrefix.
@Override
public Map<String, BlobMetadata> listBlobsByPrefix(@Nullable final String prefix) throws IOException {
FileStatus[] files = store.execute(fileContext -> fileContext.util().listStatus(path, path -> prefix == null || path.getName().startsWith(prefix)));
Map<String, BlobMetadata> map = new LinkedHashMap<>();
for (FileStatus file : files) {
if (file.isFile()) {
map.put(file.getPath().getName(), new PlainBlobMetadata(file.getPath().getName(), file.getLen()));
}
}
return Collections.unmodifiableMap(map);
}
use of org.opensearch.common.blobstore.BlobMetadata in project OpenSearch by opensearch-project.
the class BlobStoreRepository method cleanupStaleBlobs.
/**
* Cleans up stale blobs directly under the repository root as well as all indices paths that aren't referenced by any existing
* snapshots. This method is only to be called directly after a new {@link RepositoryData} was written to the repository and with
* parameters {@code foundIndices}, {@code rootBlobs}
*
* @param deletedSnapshots if this method is called as part of a delete operation, the snapshot ids just deleted or empty if called as
* part of a repository cleanup
* @param foundIndices all indices blob containers found in the repository before {@code newRepoData} was written
* @param rootBlobs all blobs found directly under the repository root
* @param newRepoData new repository data that was just written
* @param listener listener to invoke with the combined {@link DeleteResult} of all blobs removed in this operation
*/
private void cleanupStaleBlobs(Collection<SnapshotId> deletedSnapshots, Map<String, BlobContainer> foundIndices, Map<String, BlobMetadata> rootBlobs, RepositoryData newRepoData, ActionListener<DeleteResult> listener) {
final GroupedActionListener<DeleteResult> groupedListener = new GroupedActionListener<>(ActionListener.wrap(deleteResults -> {
DeleteResult deleteResult = DeleteResult.ZERO;
for (DeleteResult result : deleteResults) {
deleteResult = deleteResult.add(result);
}
listener.onResponse(deleteResult);
}, listener::onFailure), 2);
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
final List<String> staleRootBlobs = staleRootBlobs(newRepoData, rootBlobs.keySet());
if (staleRootBlobs.isEmpty()) {
groupedListener.onResponse(DeleteResult.ZERO);
} else {
executor.execute(ActionRunnable.supply(groupedListener, () -> {
List<String> deletedBlobs = cleanupStaleRootFiles(newRepoData.getGenId() - 1, deletedSnapshots, staleRootBlobs);
return new DeleteResult(deletedBlobs.size(), deletedBlobs.stream().mapToLong(name -> rootBlobs.get(name).length()).sum());
}));
}
final Set<String> survivingIndexIds = newRepoData.getIndices().values().stream().map(IndexId::getId).collect(Collectors.toSet());
if (foundIndices.keySet().equals(survivingIndexIds)) {
groupedListener.onResponse(DeleteResult.ZERO);
} else {
cleanupStaleIndices(foundIndices, survivingIndexIds, groupedListener);
}
}
use of org.opensearch.common.blobstore.BlobMetadata in project OpenSearch by opensearch-project.
the class BlobStoreTestUtil method assertSnapshotUUIDs.
private static void assertSnapshotUUIDs(BlobStoreRepository repository, RepositoryData repositoryData) throws IOException {
final BlobContainer repoRoot = repository.blobContainer();
final Collection<SnapshotId> snapshotIds = repositoryData.getSnapshotIds();
final List<String> expectedSnapshotUUIDs = snapshotIds.stream().map(SnapshotId::getUUID).collect(Collectors.toList());
for (String prefix : new String[] { BlobStoreRepository.SNAPSHOT_PREFIX, BlobStoreRepository.METADATA_PREFIX }) {
final Collection<String> foundSnapshotUUIDs = repoRoot.listBlobs().keySet().stream().filter(p -> p.startsWith(prefix)).map(p -> p.replace(prefix, "").replace(".dat", "")).collect(Collectors.toSet());
assertThat(foundSnapshotUUIDs, containsInAnyOrder(expectedSnapshotUUIDs.toArray(Strings.EMPTY_ARRAY)));
}
final BlobContainer indicesContainer = repository.getBlobContainer().children().get("indices");
final Map<String, BlobContainer> indices;
if (indicesContainer == null) {
indices = Collections.emptyMap();
} else {
indices = indicesContainer.children();
}
final Map<IndexId, Integer> maxShardCountsExpected = new HashMap<>();
final Map<IndexId, Integer> maxShardCountsSeen = new HashMap<>();
// Assert that for each snapshot, the relevant metadata was written to index and shard folders
for (SnapshotId snapshotId : snapshotIds) {
final SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotId);
for (String index : snapshotInfo.indices()) {
final IndexId indexId = repositoryData.resolveIndexId(index);
assertThat(indices, hasKey(indexId.getId()));
final BlobContainer indexContainer = indices.get(indexId.getId());
assertThat(indexContainer.listBlobs(), hasKey(String.format(Locale.ROOT, BlobStoreRepository.METADATA_NAME_FORMAT, repositoryData.indexMetaDataGenerations().indexMetaBlobId(snapshotId, indexId))));
final IndexMetadata indexMetadata = repository.getSnapshotIndexMetaData(repositoryData, snapshotId, indexId);
for (Map.Entry<String, BlobContainer> entry : indexContainer.children().entrySet()) {
// Skip Lucene MockFS extraN directory
if (entry.getKey().startsWith("extra")) {
continue;
}
final int shardId = Integer.parseInt(entry.getKey());
final int shardCount = indexMetadata.getNumberOfShards();
maxShardCountsExpected.compute(indexId, (i, existing) -> existing == null || existing < shardCount ? shardCount : existing);
final BlobContainer shardContainer = entry.getValue();
// becomes unreferenced. We should fix that and remove this conditional once its fixed.
if (shardContainer.listBlobs().keySet().stream().anyMatch(blob -> blob.startsWith("extra") == false)) {
final int impliedCount = shardId - 1;
maxShardCountsSeen.compute(indexId, (i, existing) -> existing == null || existing < impliedCount ? impliedCount : existing);
}
if (shardId < shardCount && snapshotInfo.shardFailures().stream().noneMatch(shardFailure -> shardFailure.index().equals(index) && shardFailure.shardId() == shardId)) {
final Map<String, BlobMetadata> shardPathContents = shardContainer.listBlobs();
assertThat(shardPathContents, hasKey(String.format(Locale.ROOT, BlobStoreRepository.SNAPSHOT_NAME_FORMAT, snapshotId.getUUID())));
assertThat(shardPathContents.keySet().stream().filter(name -> name.startsWith(BlobStoreRepository.INDEX_FILE_PREFIX)).count(), lessThanOrEqualTo(2L));
}
}
}
}
maxShardCountsSeen.forEach(((indexId, count) -> assertThat("Found unreferenced shard paths for index [" + indexId + "]", count, lessThanOrEqualTo(maxShardCountsExpected.get(indexId)))));
}
use of org.opensearch.common.blobstore.BlobMetadata in project OpenSearch by opensearch-project.
the class AzureBlobStore method listBlobsByPrefix.
public Map<String, BlobMetadata> listBlobsByPrefix(String keyPath, String prefix) throws URISyntaxException, BlobStorageException {
final Map<String, BlobMetadata> blobsBuilder = new HashMap<String, BlobMetadata>();
final Tuple<BlobServiceClient, Supplier<Context>> client = client();
final BlobContainerClient blobContainer = client.v1().getBlobContainerClient(container);
logger.trace(() -> new ParameterizedMessage("listing container [{}], keyPath [{}], prefix [{}]", container, keyPath, prefix));
// NOTE: this should be here: if (prefix == null) prefix = "";
// however, this is really inefficient since deleteBlobsByPrefix enumerates everything and
// then does a prefix match on the result; it should just call listBlobsByPrefix with the prefix!
final ListBlobsOptions listBlobsOptions = new ListBlobsOptions().setDetails(new BlobListDetails().setRetrieveMetadata(true)).setPrefix(keyPath + (prefix == null ? "" : prefix));
SocketAccess.doPrivilegedVoidException(() -> {
String continuationToken = null;
do {
// Fetch one page at a time, others are going to be fetched by continuation token
// TODO: reconsider reverting to simplified approach once https://github.com/Azure/azure-sdk-for-java/issues/26064
// gets addressed
final Optional<PagedResponse<BlobItem>> pageOpt = blobContainer.listBlobsByHierarchy("/", listBlobsOptions, timeout()).streamByPage(continuationToken).findFirst();
if (!pageOpt.isPresent()) {
// No more pages, should never happen
break;
}
final PagedResponse<BlobItem> page = pageOpt.get();
for (final BlobItem blobItem : page.getValue()) {
// Skipping over the prefixes, only look for the blobs
if (blobItem.isPrefix() != null && blobItem.isPrefix()) {
continue;
}
final String name = getBlobName(blobItem.getName(), container, keyPath);
logger.trace(() -> new ParameterizedMessage("blob name [{}]", name));
final BlobItemProperties properties = blobItem.getProperties();
logger.trace(() -> new ParameterizedMessage("blob name [{}], size [{}]", name, properties.getContentLength()));
blobsBuilder.put(name, new PlainBlobMetadata(name, properties.getContentLength()));
}
// Fetch next continuation token
continuationToken = page.getContinuationToken();
} while (StringUtils.isNotBlank(continuationToken));
});
return MapBuilder.newMapBuilder(blobsBuilder).immutableMap();
}
use of org.opensearch.common.blobstore.BlobMetadata in project OpenSearch by opensearch-project.
the class FsBlobContainer method listBlobsByPrefix.
@Override
public Map<String, BlobMetadata> listBlobsByPrefix(String blobNamePrefix) throws IOException {
Map<String, BlobMetadata> builder = new HashMap<>();
blobNamePrefix = blobNamePrefix == null ? "" : blobNamePrefix;
try (DirectoryStream<Path> stream = Files.newDirectoryStream(path, blobNamePrefix + "*")) {
for (Path file : stream) {
final BasicFileAttributes attrs;
try {
attrs = Files.readAttributes(file, BasicFileAttributes.class);
} catch (FileNotFoundException | NoSuchFileException e) {
// The file was concurrently deleted between listing files and trying to get its attributes so we skip it here
continue;
}
if (attrs.isRegularFile()) {
builder.put(file.getFileName().toString(), new PlainBlobMetadata(file.getFileName().toString(), attrs.size()));
}
}
}
return unmodifiableMap(builder);
}
Aggregations