use of org.apache.samza.storage.blobstore.index.FileIndex in project samza by apache.
the class BlobStoreUtil method removeTTL.
/**
* Recursively mark all the blobs associated with the {@link DirIndex} to never expire (remove TTL).
* @param dirIndex the {@link DirIndex} whose contents' TTL needs to be removed
* @param metadata {@link Metadata} related to the request
* @return A future that completes when all the blobs associated with this dirIndex are marked to
* never expire.
*/
private CompletableFuture<Void> removeTTL(DirIndex dirIndex, Metadata metadata) {
String dirName = dirIndex.getDirName();
if (DirIndex.ROOT_DIR_NAME.equals(dirName)) {
LOG.debug("Removing TTL for files and dirs present in DirIndex for root dir.");
} else {
LOG.debug("Removing TTL for files and dirs present in DirIndex for dir: {}", dirName);
}
List<CompletableFuture<Void>> updateTTLsFuture = new ArrayList<>();
for (DirIndex subDir : dirIndex.getSubDirsPresent()) {
updateTTLsFuture.add(removeTTL(subDir, metadata));
}
for (FileIndex file : dirIndex.getFilesPresent()) {
Metadata requestMetadata = new Metadata(file.getFileName(), Optional.of(file.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
List<FileBlob> fileBlobs = file.getBlobs();
for (FileBlob fileBlob : fileBlobs) {
String opname = "removeTTL for fileBlob: " + file.getFileName() + " with blobId: {}" + fileBlob.getBlobId();
Supplier<CompletionStage<Void>> ttlRemovalAction = () -> blobStoreManager.removeTTL(fileBlob.getBlobId(), requestMetadata).toCompletableFuture();
CompletableFuture<Void> ttlRemovalFuture = FutureUtil.executeAsyncWithRetries(opname, ttlRemovalAction, isCauseNonRetriable(), executor);
updateTTLsFuture.add(ttlRemovalFuture);
}
}
return CompletableFuture.allOf(updateTTLsFuture.toArray(new CompletableFuture[0]));
}
use of org.apache.samza.storage.blobstore.index.FileIndex in project samza by apache.
the class BlobStoreUtil method putFile.
/**
* Upload a File to blob store.
* @param file File to upload to blob store.
* @return A future containing the {@link FileIndex} for the uploaded file.
*/
@VisibleForTesting
public CompletableFuture<FileIndex> putFile(File file, SnapshotMetadata snapshotMetadata) {
if (file == null || !file.isFile()) {
String message = file != null ? "Dir or Symbolic link" : "null";
throw new SamzaException(String.format("Required a non-null parameter of type file, provided: %s", message));
}
long putFileStartTime = System.nanoTime();
String opName = "putFile: " + file.getAbsolutePath();
Supplier<CompletionStage<FileIndex>> fileUploadAction = () -> {
LOG.debug("Putting file: {} to blob store.", file.getPath());
CompletableFuture<FileIndex> fileBlobFuture;
CheckedInputStream inputStream = null;
try {
// TODO HIGH shesharm maybe use the more efficient CRC32C / PureJavaCRC32 impl
inputStream = new CheckedInputStream(new FileInputStream(file), new CRC32());
CheckedInputStream finalInputStream = inputStream;
FileMetadata fileMetadata = FileMetadata.fromFile(file);
if (backupMetrics != null) {
backupMetrics.avgFileSizeBytes.update(fileMetadata.getSize());
}
Metadata metadata = new Metadata(file.getAbsolutePath(), Optional.of(fileMetadata.getSize()), snapshotMetadata.getJobName(), snapshotMetadata.getJobId(), snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName());
fileBlobFuture = blobStoreManager.put(inputStream, metadata).thenApplyAsync(id -> {
LOG.trace("Put complete. Received Blob ID {}. Closing input stream for file: {}.", id, file.getPath());
try {
finalInputStream.close();
} catch (Exception e) {
throw new SamzaException(String.format("Error closing input stream for file: %s", file.getAbsolutePath()), e);
}
LOG.trace("Returning new FileIndex for file: {}.", file.getPath());
return new FileIndex(file.getName(), Collections.singletonList(new FileBlob(id, 0)), fileMetadata, finalInputStream.getChecksum().getValue());
}, executor).toCompletableFuture();
} catch (Exception e) {
try {
if (inputStream != null) {
inputStream.close();
}
} catch (Exception err) {
LOG.error("Error closing input stream for file: {}", file.getName(), err);
}
LOG.error("Error putting file: {}", file.getName(), e);
throw new SamzaException(String.format("Error putting file %s", file.getAbsolutePath()), e);
}
return fileBlobFuture;
};
return FutureUtil.executeAsyncWithRetries(opName, fileUploadAction, isCauseNonRetriable(), executor).whenComplete((res, ex) -> {
if (backupMetrics != null) {
backupMetrics.avgFileUploadNs.update(System.nanoTime() - putFileStartTime);
long fileSize = file.length();
backupMetrics.uploadRate.inc(fileSize);
backupMetrics.filesUploaded.getValue().addAndGet(1);
backupMetrics.bytesUploaded.getValue().addAndGet(fileSize);
backupMetrics.filesRemaining.getValue().addAndGet(-1);
backupMetrics.bytesRemaining.getValue().addAndGet(-1 * fileSize);
}
});
}
use of org.apache.samza.storage.blobstore.index.FileIndex in project samza by apache.
the class BlobStoreUtil method restoreDir.
/**
* Non-blocking restore of a {@link SnapshotIndex} to local store by downloading all the files and sub-dirs associated
* with this remote snapshot.
* @return A future that completes when all the async downloads completes
*/
public CompletableFuture<Void> restoreDir(File baseDir, DirIndex dirIndex, Metadata metadata) {
LOG.debug("Restoring contents of directory: {} from remote snapshot.", baseDir);
List<CompletableFuture<Void>> downloadFutures = new ArrayList<>();
try {
// create parent directories if they don't exist
Files.createDirectories(baseDir.toPath());
} catch (IOException exception) {
LOG.error("Error creating directory: {} for restore", baseDir.getAbsolutePath(), exception);
throw new SamzaException(String.format("Error creating directory: %s for restore", baseDir.getAbsolutePath()), exception);
}
// restore all files in the directory
for (FileIndex fileIndex : dirIndex.getFilesPresent()) {
File fileToRestore = Paths.get(baseDir.getAbsolutePath(), fileIndex.getFileName()).toFile();
Metadata requestMetadata = new Metadata(fileToRestore.getAbsolutePath(), Optional.of(fileIndex.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
List<FileBlob> fileBlobs = fileIndex.getBlobs();
String opName = "restoreFile: " + fileToRestore.getAbsolutePath();
CompletableFuture<Void> fileRestoreFuture = FutureUtil.executeAsyncWithRetries(opName, () -> getFile(fileBlobs, fileToRestore, requestMetadata), isCauseNonRetriable(), executor);
downloadFutures.add(fileRestoreFuture);
}
// restore any sub-directories
List<DirIndex> subDirs = dirIndex.getSubDirsPresent();
for (DirIndex subDir : subDirs) {
File subDirFile = Paths.get(baseDir.getAbsolutePath(), subDir.getDirName()).toFile();
downloadFutures.add(restoreDir(subDirFile, subDir, metadata));
}
return FutureUtil.allOf(downloadFutures);
}
use of org.apache.samza.storage.blobstore.index.FileIndex in project samza by apache.
the class BlobStoreUtil method cleanUpDir.
/**
* Recursively issue delete requests for files and dirs marked to be removed in a previously created remote snapshot.
* Note: We do not immediately delete files/dirs to be removed when uploading a snapshot to the remote
* store. We just track them for deletion during the upload, and delete them AFTER the snapshot is uploaded, and the
* blob IDs have been persisted as part of the checkpoint. This is to prevent data loss if a failure happens
* part way through the commit. We issue delete these file/subdirs in cleanUp() phase of commit lifecycle.
* @param dirIndex the dir in the remote snapshot to clean up.
* @param metadata Metadata related to the request
* @return a future that completes when all the files and subdirs marked for deletion are cleaned up.
*/
public CompletionStage<Void> cleanUpDir(DirIndex dirIndex, Metadata metadata) {
String dirName = dirIndex.getDirName();
if (DirIndex.ROOT_DIR_NAME.equals(dirName)) {
LOG.debug("Cleaning up root dir in blob store.");
} else {
LOG.debug("Cleaning up dir: {} in blob store.", dirIndex.getDirName());
}
List<CompletionStage<Void>> cleanUpFuture = new ArrayList<>();
List<FileIndex> files = dirIndex.getFilesRemoved();
for (FileIndex file : files) {
Metadata requestMetadata = new Metadata(file.getFileName(), Optional.of(file.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
cleanUpFuture.add(deleteFile(file, requestMetadata));
}
for (DirIndex subDirToDelete : dirIndex.getSubDirsRemoved()) {
// recursively delete ALL contents of the subDirToDelete.
cleanUpFuture.add(deleteDir(subDirToDelete, metadata));
}
for (DirIndex subDirToRetain : dirIndex.getSubDirsPresent()) {
// recursively clean up the subDir, only deleting files and subdirs marked for deletion.
cleanUpFuture.add(cleanUpDir(subDirToRetain, metadata));
}
return CompletableFuture.allOf(cleanUpFuture.toArray(new CompletableFuture[0]));
}
use of org.apache.samza.storage.blobstore.index.FileIndex in project samza by apache.
the class BlobStoreUtil method deleteDir.
/**
* WARNING: Recursively delete **ALL** the associated files and subdirs within the provided {@link DirIndex}.
* @param dirIndex {@link DirIndex} whose entire contents are to be deleted.
* @param metadata {@link Metadata} related to the request
* @return a future that completes when ALL the files and subdirs associated with the dirIndex have been
* marked for deleted in the remote blob store.
*/
public CompletionStage<Void> deleteDir(DirIndex dirIndex, Metadata metadata) {
LOG.debug("Completely deleting dir: {} in blob store", dirIndex.getDirName());
List<CompletionStage<Void>> deleteFutures = new ArrayList<>();
// Delete all files present in subDir
for (FileIndex file : dirIndex.getFilesPresent()) {
Metadata requestMetadata = new Metadata(file.getFileName(), Optional.of(file.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
deleteFutures.add(deleteFile(file, requestMetadata));
}
// Delete all subDirs present recursively
for (DirIndex subDir : dirIndex.getSubDirsPresent()) {
deleteFutures.add(deleteDir(subDir, metadata));
}
return CompletableFuture.allOf(deleteFutures.toArray(new CompletableFuture[0]));
}
Aggregations