use of org.apache.samza.storage.blobstore.index.DirIndex in project samza by apache.
the class BlobStoreUtil method removeTTL.
/**
* Recursively mark all the blobs associated with the {@link DirIndex} to never expire (remove TTL).
* @param dirIndex the {@link DirIndex} whose contents' TTL needs to be removed
* @param metadata {@link Metadata} related to the request
* @return A future that completes when all the blobs associated with this dirIndex are marked to
* never expire.
*/
private CompletableFuture<Void> removeTTL(DirIndex dirIndex, Metadata metadata) {
String dirName = dirIndex.getDirName();
if (DirIndex.ROOT_DIR_NAME.equals(dirName)) {
LOG.debug("Removing TTL for files and dirs present in DirIndex for root dir.");
} else {
LOG.debug("Removing TTL for files and dirs present in DirIndex for dir: {}", dirName);
}
List<CompletableFuture<Void>> updateTTLsFuture = new ArrayList<>();
for (DirIndex subDir : dirIndex.getSubDirsPresent()) {
updateTTLsFuture.add(removeTTL(subDir, metadata));
}
for (FileIndex file : dirIndex.getFilesPresent()) {
Metadata requestMetadata = new Metadata(file.getFileName(), Optional.of(file.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
List<FileBlob> fileBlobs = file.getBlobs();
for (FileBlob fileBlob : fileBlobs) {
String opname = "removeTTL for fileBlob: " + file.getFileName() + " with blobId: {}" + fileBlob.getBlobId();
Supplier<CompletionStage<Void>> ttlRemovalAction = () -> blobStoreManager.removeTTL(fileBlob.getBlobId(), requestMetadata).toCompletableFuture();
CompletableFuture<Void> ttlRemovalFuture = FutureUtil.executeAsyncWithRetries(opname, ttlRemovalAction, isCauseNonRetriable(), executor);
updateTTLsFuture.add(ttlRemovalFuture);
}
}
return CompletableFuture.allOf(updateTTLsFuture.toArray(new CompletableFuture[0]));
}
use of org.apache.samza.storage.blobstore.index.DirIndex in project samza by apache.
the class BlobStoreUtil method restoreDir.
/**
* Non-blocking restore of a {@link SnapshotIndex} to local store by downloading all the files and sub-dirs associated
* with this remote snapshot.
* @return A future that completes when all the async downloads completes
*/
public CompletableFuture<Void> restoreDir(File baseDir, DirIndex dirIndex, Metadata metadata) {
LOG.debug("Restoring contents of directory: {} from remote snapshot.", baseDir);
List<CompletableFuture<Void>> downloadFutures = new ArrayList<>();
try {
// create parent directories if they don't exist
Files.createDirectories(baseDir.toPath());
} catch (IOException exception) {
LOG.error("Error creating directory: {} for restore", baseDir.getAbsolutePath(), exception);
throw new SamzaException(String.format("Error creating directory: %s for restore", baseDir.getAbsolutePath()), exception);
}
// restore all files in the directory
for (FileIndex fileIndex : dirIndex.getFilesPresent()) {
File fileToRestore = Paths.get(baseDir.getAbsolutePath(), fileIndex.getFileName()).toFile();
Metadata requestMetadata = new Metadata(fileToRestore.getAbsolutePath(), Optional.of(fileIndex.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
List<FileBlob> fileBlobs = fileIndex.getBlobs();
String opName = "restoreFile: " + fileToRestore.getAbsolutePath();
CompletableFuture<Void> fileRestoreFuture = FutureUtil.executeAsyncWithRetries(opName, () -> getFile(fileBlobs, fileToRestore, requestMetadata), isCauseNonRetriable(), executor);
downloadFutures.add(fileRestoreFuture);
}
// restore any sub-directories
List<DirIndex> subDirs = dirIndex.getSubDirsPresent();
for (DirIndex subDir : subDirs) {
File subDirFile = Paths.get(baseDir.getAbsolutePath(), subDir.getDirName()).toFile();
downloadFutures.add(restoreDir(subDirFile, subDir, metadata));
}
return FutureUtil.allOf(downloadFutures);
}
use of org.apache.samza.storage.blobstore.index.DirIndex in project samza by apache.
the class BlobStoreUtil method cleanUpDir.
/**
* Recursively issue delete requests for files and dirs marked to be removed in a previously created remote snapshot.
* Note: We do not immediately delete files/dirs to be removed when uploading a snapshot to the remote
* store. We just track them for deletion during the upload, and delete them AFTER the snapshot is uploaded, and the
* blob IDs have been persisted as part of the checkpoint. This is to prevent data loss if a failure happens
* part way through the commit. We issue delete these file/subdirs in cleanUp() phase of commit lifecycle.
* @param dirIndex the dir in the remote snapshot to clean up.
* @param metadata Metadata related to the request
* @return a future that completes when all the files and subdirs marked for deletion are cleaned up.
*/
public CompletionStage<Void> cleanUpDir(DirIndex dirIndex, Metadata metadata) {
String dirName = dirIndex.getDirName();
if (DirIndex.ROOT_DIR_NAME.equals(dirName)) {
LOG.debug("Cleaning up root dir in blob store.");
} else {
LOG.debug("Cleaning up dir: {} in blob store.", dirIndex.getDirName());
}
List<CompletionStage<Void>> cleanUpFuture = new ArrayList<>();
List<FileIndex> files = dirIndex.getFilesRemoved();
for (FileIndex file : files) {
Metadata requestMetadata = new Metadata(file.getFileName(), Optional.of(file.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
cleanUpFuture.add(deleteFile(file, requestMetadata));
}
for (DirIndex subDirToDelete : dirIndex.getSubDirsRemoved()) {
// recursively delete ALL contents of the subDirToDelete.
cleanUpFuture.add(deleteDir(subDirToDelete, metadata));
}
for (DirIndex subDirToRetain : dirIndex.getSubDirsPresent()) {
// recursively clean up the subDir, only deleting files and subdirs marked for deletion.
cleanUpFuture.add(cleanUpDir(subDirToRetain, metadata));
}
return CompletableFuture.allOf(cleanUpFuture.toArray(new CompletableFuture[0]));
}
use of org.apache.samza.storage.blobstore.index.DirIndex in project samza by apache.
the class BlobStoreUtil method deleteDir.
/**
* WARNING: Recursively delete **ALL** the associated files and subdirs within the provided {@link DirIndex}.
* @param dirIndex {@link DirIndex} whose entire contents are to be deleted.
* @param metadata {@link Metadata} related to the request
* @return a future that completes when ALL the files and subdirs associated with the dirIndex have been
* marked for deleted in the remote blob store.
*/
public CompletionStage<Void> deleteDir(DirIndex dirIndex, Metadata metadata) {
LOG.debug("Completely deleting dir: {} in blob store", dirIndex.getDirName());
List<CompletionStage<Void>> deleteFutures = new ArrayList<>();
// Delete all files present in subDir
for (FileIndex file : dirIndex.getFilesPresent()) {
Metadata requestMetadata = new Metadata(file.getFileName(), Optional.of(file.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
deleteFutures.add(deleteFile(file, requestMetadata));
}
// Delete all subDirs present recursively
for (DirIndex subDir : dirIndex.getSubDirsPresent()) {
deleteFutures.add(deleteDir(subDir, metadata));
}
return CompletableFuture.allOf(deleteFutures.toArray(new CompletableFuture[0]));
}
use of org.apache.samza.storage.blobstore.index.DirIndex in project samza by apache.
the class DirDiffUtil method getDirDiff.
private static DirDiff getDirDiff(File localSnapshotDir, DirIndex remoteSnapshotDir, BiPredicate<File, FileIndex> areSameFile, boolean isRootDir) {
Preconditions.checkState(localSnapshotDir != null && localSnapshotDir.isDirectory());
Preconditions.checkNotNull(remoteSnapshotDir);
LOG.debug("Creating DirDiff between local dir: {} and remote dir: {}", localSnapshotDir.getPath(), remoteSnapshotDir.getDirName());
List<DirDiff> subDirsAdded = new ArrayList<>();
List<DirDiff> subDirsRetained = new ArrayList<>();
List<DirIndex> subDirsRemoved = new ArrayList<>();
// list files returns empty list if local snapshot directory is empty
List<File> localSnapshotFiles = Arrays.asList(Objects.requireNonNull(localSnapshotDir.listFiles(File::isFile)));
List<FileIndex> remoteSnapshotFiles = remoteSnapshotDir.getFilesPresent();
// list files returns empty list if local snapshot directory is empty
List<File> localSnapshotSubDirs = Arrays.asList(Objects.requireNonNull(localSnapshotDir.listFiles(File::isDirectory)));
Set<String> localSnapshotSubDirNames = localSnapshotSubDirs.stream().map(File::getName).collect(Collectors.toCollection(HashSet::new));
List<DirIndex> remoteSnapshotSubDirs = remoteSnapshotDir.getSubDirsPresent();
Set<String> remoteSnapshotSubDirNames = remoteSnapshotSubDirs.stream().map(DirIndex::getDirName).collect(Collectors.toCollection(HashSet::new));
// TODO MED shesharm: this compares each file in directory 3 times. Categorize files in one traversal instead.
List<File> filesToUpload = getNewFilesToUpload(remoteSnapshotFiles, localSnapshotFiles, areSameFile);
List<FileIndex> filesToRetain = getFilesToRetain(remoteSnapshotFiles, localSnapshotFiles, areSameFile);
List<FileIndex> filesToRemove = getFilesToRemove(remoteSnapshotFiles, localSnapshotFiles, areSameFile);
for (File localSnapshotSubDir : localSnapshotSubDirs) {
if (!remoteSnapshotSubDirNames.contains(localSnapshotSubDir.getName())) {
LOG.debug("Subdir {} present in local snapshot but not in remote snapshot. " + "Recursively adding subdir contents.", localSnapshotSubDir.getPath());
subDirsAdded.add(getDiffForNewDir(localSnapshotSubDir));
} else {
LOG.debug("Subdir {} present in local snapshot and in remote snapshot. " + "Recursively comparing local and remote subdirs.", localSnapshotSubDir.getPath());
DirIndex remoteSubDirIndex = remoteSnapshotSubDirs.stream().filter(indexBlob -> indexBlob.getDirName().equals(localSnapshotSubDir.getName())).findFirst().get();
subDirsRetained.add(getDirDiff(localSnapshotSubDir, remoteSubDirIndex, areSameFile, false));
}
}
// 3. Subdir in remote snapshot but not in local snapshot
for (DirIndex remoteSnapshotSubDir : remoteSnapshotSubDirs) {
if (!localSnapshotSubDirNames.contains(remoteSnapshotSubDir.getDirName())) {
LOG.debug("Subdir {} present in remote snapshot but not in local snapshot. " + "Marking for removal from remote snapshot. ", remoteSnapshotDir.getDirName());
subDirsRemoved.add(remoteSnapshotSubDir);
}
}
String dirName = isRootDir ? DirIndex.ROOT_DIR_NAME : localSnapshotDir.getName();
return new DirDiff(dirName, filesToUpload, filesToRetain, filesToRemove, subDirsAdded, subDirsRetained, subDirsRemoved);
}
Aggregations