use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.
the class BlobStoreUtil method removeTTL.
/**
* Recursively mark all the blobs associated with the {@link DirIndex} to never expire (remove TTL).
* @param dirIndex the {@link DirIndex} whose contents' TTL needs to be removed
* @param metadata {@link Metadata} related to the request
* @return A future that completes when all the blobs associated with this dirIndex are marked to
* never expire.
*/
private CompletableFuture<Void> removeTTL(DirIndex dirIndex, Metadata metadata) {
String dirName = dirIndex.getDirName();
if (DirIndex.ROOT_DIR_NAME.equals(dirName)) {
LOG.debug("Removing TTL for files and dirs present in DirIndex for root dir.");
} else {
LOG.debug("Removing TTL for files and dirs present in DirIndex for dir: {}", dirName);
}
List<CompletableFuture<Void>> updateTTLsFuture = new ArrayList<>();
for (DirIndex subDir : dirIndex.getSubDirsPresent()) {
updateTTLsFuture.add(removeTTL(subDir, metadata));
}
for (FileIndex file : dirIndex.getFilesPresent()) {
Metadata requestMetadata = new Metadata(file.getFileName(), Optional.of(file.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
List<FileBlob> fileBlobs = file.getBlobs();
for (FileBlob fileBlob : fileBlobs) {
String opname = "removeTTL for fileBlob: " + file.getFileName() + " with blobId: {}" + fileBlob.getBlobId();
Supplier<CompletionStage<Void>> ttlRemovalAction = () -> blobStoreManager.removeTTL(fileBlob.getBlobId(), requestMetadata).toCompletableFuture();
CompletableFuture<Void> ttlRemovalFuture = FutureUtil.executeAsyncWithRetries(opname, ttlRemovalAction, isCauseNonRetriable(), executor);
updateTTLsFuture.add(ttlRemovalFuture);
}
}
return CompletableFuture.allOf(updateTTLsFuture.toArray(new CompletableFuture[0]));
}
use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.
the class BlobStoreUtil method putFile.
/**
* Upload a File to blob store.
* @param file File to upload to blob store.
* @return A future containing the {@link FileIndex} for the uploaded file.
*/
@VisibleForTesting
public CompletableFuture<FileIndex> putFile(File file, SnapshotMetadata snapshotMetadata) {
if (file == null || !file.isFile()) {
String message = file != null ? "Dir or Symbolic link" : "null";
throw new SamzaException(String.format("Required a non-null parameter of type file, provided: %s", message));
}
long putFileStartTime = System.nanoTime();
String opName = "putFile: " + file.getAbsolutePath();
Supplier<CompletionStage<FileIndex>> fileUploadAction = () -> {
LOG.debug("Putting file: {} to blob store.", file.getPath());
CompletableFuture<FileIndex> fileBlobFuture;
CheckedInputStream inputStream = null;
try {
// TODO HIGH shesharm maybe use the more efficient CRC32C / PureJavaCRC32 impl
inputStream = new CheckedInputStream(new FileInputStream(file), new CRC32());
CheckedInputStream finalInputStream = inputStream;
FileMetadata fileMetadata = FileMetadata.fromFile(file);
if (backupMetrics != null) {
backupMetrics.avgFileSizeBytes.update(fileMetadata.getSize());
}
Metadata metadata = new Metadata(file.getAbsolutePath(), Optional.of(fileMetadata.getSize()), snapshotMetadata.getJobName(), snapshotMetadata.getJobId(), snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName());
fileBlobFuture = blobStoreManager.put(inputStream, metadata).thenApplyAsync(id -> {
LOG.trace("Put complete. Received Blob ID {}. Closing input stream for file: {}.", id, file.getPath());
try {
finalInputStream.close();
} catch (Exception e) {
throw new SamzaException(String.format("Error closing input stream for file: %s", file.getAbsolutePath()), e);
}
LOG.trace("Returning new FileIndex for file: {}.", file.getPath());
return new FileIndex(file.getName(), Collections.singletonList(new FileBlob(id, 0)), fileMetadata, finalInputStream.getChecksum().getValue());
}, executor).toCompletableFuture();
} catch (Exception e) {
try {
if (inputStream != null) {
inputStream.close();
}
} catch (Exception err) {
LOG.error("Error closing input stream for file: {}", file.getName(), err);
}
LOG.error("Error putting file: {}", file.getName(), e);
throw new SamzaException(String.format("Error putting file %s", file.getAbsolutePath()), e);
}
return fileBlobFuture;
};
return FutureUtil.executeAsyncWithRetries(opName, fileUploadAction, isCauseNonRetriable(), executor).whenComplete((res, ex) -> {
if (backupMetrics != null) {
backupMetrics.avgFileUploadNs.update(System.nanoTime() - putFileStartTime);
long fileSize = file.length();
backupMetrics.uploadRate.inc(fileSize);
backupMetrics.filesUploaded.getValue().addAndGet(1);
backupMetrics.bytesUploaded.getValue().addAndGet(fileSize);
backupMetrics.filesRemaining.getValue().addAndGet(-1);
backupMetrics.bytesRemaining.getValue().addAndGet(-1 * fileSize);
}
});
}
use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.
the class BlobStoreUtil method restoreDir.
/**
* Non-blocking restore of a {@link SnapshotIndex} to local store by downloading all the files and sub-dirs associated
* with this remote snapshot.
* @return A future that completes when all the async downloads completes
*/
public CompletableFuture<Void> restoreDir(File baseDir, DirIndex dirIndex, Metadata metadata) {
LOG.debug("Restoring contents of directory: {} from remote snapshot.", baseDir);
List<CompletableFuture<Void>> downloadFutures = new ArrayList<>();
try {
// create parent directories if they don't exist
Files.createDirectories(baseDir.toPath());
} catch (IOException exception) {
LOG.error("Error creating directory: {} for restore", baseDir.getAbsolutePath(), exception);
throw new SamzaException(String.format("Error creating directory: %s for restore", baseDir.getAbsolutePath()), exception);
}
// restore all files in the directory
for (FileIndex fileIndex : dirIndex.getFilesPresent()) {
File fileToRestore = Paths.get(baseDir.getAbsolutePath(), fileIndex.getFileName()).toFile();
Metadata requestMetadata = new Metadata(fileToRestore.getAbsolutePath(), Optional.of(fileIndex.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
List<FileBlob> fileBlobs = fileIndex.getBlobs();
String opName = "restoreFile: " + fileToRestore.getAbsolutePath();
CompletableFuture<Void> fileRestoreFuture = FutureUtil.executeAsyncWithRetries(opName, () -> getFile(fileBlobs, fileToRestore, requestMetadata), isCauseNonRetriable(), executor);
downloadFutures.add(fileRestoreFuture);
}
// restore any sub-directories
List<DirIndex> subDirs = dirIndex.getSubDirsPresent();
for (DirIndex subDir : subDirs) {
File subDirFile = Paths.get(baseDir.getAbsolutePath(), subDir.getDirName()).toFile();
downloadFutures.add(restoreDir(subDirFile, subDir, metadata));
}
return FutureUtil.allOf(downloadFutures);
}
use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.
the class BlobStoreUtil method deleteFile.
/**
* Delete a {@link FileIndex} from the remote store by deleting all {@link FileBlob}s associated with it.
* @param fileIndex FileIndex of the file to delete from the remote store.
* @param metadata
* @return a future that completes when the FileIndex has been marked for deletion in the remote blob store.
*/
private CompletionStage<Void> deleteFile(FileIndex fileIndex, Metadata metadata) {
List<CompletionStage<Void>> deleteFutures = new ArrayList<>();
List<FileBlob> fileBlobs = fileIndex.getBlobs();
for (FileBlob fileBlob : fileBlobs) {
LOG.debug("Deleting file: {} blobId: {} from blob store.", fileIndex.getFileName(), fileBlob.getBlobId());
String opName = "deleteFile: " + fileIndex.getFileName() + " blobId: " + fileBlob.getBlobId();
Supplier<CompletionStage<Void>> fileDeletionAction = () -> blobStoreManager.delete(fileBlob.getBlobId(), metadata).toCompletableFuture();
CompletableFuture<Void> fileDeletionFuture = FutureUtil.executeAsyncWithRetries(opName, fileDeletionAction, isCauseNonRetriable(), executor);
deleteFutures.add(fileDeletionFuture);
}
return CompletableFuture.allOf(deleteFutures.toArray(new CompletableFuture[0]));
}
use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.
the class BlobStoreTestUtil method createFileIndex.
private static FileIndex createFileIndex(String filePath, DirTreeNode node) {
long checksum;
FileMetadata fileMetadata;
try {
Path path = Paths.get(filePath);
Checksum crc32 = new CRC32();
byte[] fileBytes = Files.readAllBytes(path);
crc32.update(fileBytes, 0, fileBytes.length);
checksum = crc32.getValue();
fileMetadata = FileMetadata.fromFile(path.toFile());
} catch (Exception e) {
throw new RuntimeException(e);
}
return new FileIndex(node.fileName, ImmutableList.of(new FileBlob(node.fileName, 0)), fileMetadata, checksum);
}
Aggregations