Search in sources :

Example 1 with FileBlob

use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.

the class BlobStoreUtil method removeTTL.

/**
 * Recursively mark all the blobs associated with the {@link DirIndex} to never expire (remove TTL).
 * @param dirIndex the {@link DirIndex} whose contents' TTL needs to be removed
 * @param metadata {@link Metadata} related to the request
 * @return A future that completes when all the blobs associated with this dirIndex are marked to
 * never expire.
 */
private CompletableFuture<Void> removeTTL(DirIndex dirIndex, Metadata metadata) {
    String dirName = dirIndex.getDirName();
    if (DirIndex.ROOT_DIR_NAME.equals(dirName)) {
        LOG.debug("Removing TTL for files and dirs present in DirIndex for root dir.");
    } else {
        LOG.debug("Removing TTL for files and dirs present in DirIndex for dir: {}", dirName);
    }
    List<CompletableFuture<Void>> updateTTLsFuture = new ArrayList<>();
    for (DirIndex subDir : dirIndex.getSubDirsPresent()) {
        updateTTLsFuture.add(removeTTL(subDir, metadata));
    }
    for (FileIndex file : dirIndex.getFilesPresent()) {
        Metadata requestMetadata = new Metadata(file.getFileName(), Optional.of(file.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
        List<FileBlob> fileBlobs = file.getBlobs();
        for (FileBlob fileBlob : fileBlobs) {
            String opname = "removeTTL for fileBlob: " + file.getFileName() + " with blobId: {}" + fileBlob.getBlobId();
            Supplier<CompletionStage<Void>> ttlRemovalAction = () -> blobStoreManager.removeTTL(fileBlob.getBlobId(), requestMetadata).toCompletableFuture();
            CompletableFuture<Void> ttlRemovalFuture = FutureUtil.executeAsyncWithRetries(opname, ttlRemovalAction, isCauseNonRetriable(), executor);
            updateTTLsFuture.add(ttlRemovalFuture);
        }
    }
    return CompletableFuture.allOf(updateTTLsFuture.toArray(new CompletableFuture[0]));
}
Also used : FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) ArrayList(java.util.ArrayList) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) CompletableFuture(java.util.concurrent.CompletableFuture) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) CompletionStage(java.util.concurrent.CompletionStage)

Example 2 with FileBlob

use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.

the class BlobStoreUtil method putFile.

/**
 * Upload a File to blob store.
 * @param file File to upload to blob store.
 * @return A future containing the {@link FileIndex} for the uploaded file.
 */
@VisibleForTesting
public CompletableFuture<FileIndex> putFile(File file, SnapshotMetadata snapshotMetadata) {
    if (file == null || !file.isFile()) {
        String message = file != null ? "Dir or Symbolic link" : "null";
        throw new SamzaException(String.format("Required a non-null parameter of type file, provided: %s", message));
    }
    long putFileStartTime = System.nanoTime();
    String opName = "putFile: " + file.getAbsolutePath();
    Supplier<CompletionStage<FileIndex>> fileUploadAction = () -> {
        LOG.debug("Putting file: {} to blob store.", file.getPath());
        CompletableFuture<FileIndex> fileBlobFuture;
        CheckedInputStream inputStream = null;
        try {
            // TODO HIGH shesharm maybe use the more efficient CRC32C / PureJavaCRC32 impl
            inputStream = new CheckedInputStream(new FileInputStream(file), new CRC32());
            CheckedInputStream finalInputStream = inputStream;
            FileMetadata fileMetadata = FileMetadata.fromFile(file);
            if (backupMetrics != null) {
                backupMetrics.avgFileSizeBytes.update(fileMetadata.getSize());
            }
            Metadata metadata = new Metadata(file.getAbsolutePath(), Optional.of(fileMetadata.getSize()), snapshotMetadata.getJobName(), snapshotMetadata.getJobId(), snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName());
            fileBlobFuture = blobStoreManager.put(inputStream, metadata).thenApplyAsync(id -> {
                LOG.trace("Put complete. Received Blob ID {}. Closing input stream for file: {}.", id, file.getPath());
                try {
                    finalInputStream.close();
                } catch (Exception e) {
                    throw new SamzaException(String.format("Error closing input stream for file: %s", file.getAbsolutePath()), e);
                }
                LOG.trace("Returning new FileIndex for file: {}.", file.getPath());
                return new FileIndex(file.getName(), Collections.singletonList(new FileBlob(id, 0)), fileMetadata, finalInputStream.getChecksum().getValue());
            }, executor).toCompletableFuture();
        } catch (Exception e) {
            try {
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (Exception err) {
                LOG.error("Error closing input stream for file: {}", file.getName(), err);
            }
            LOG.error("Error putting file: {}", file.getName(), e);
            throw new SamzaException(String.format("Error putting file %s", file.getAbsolutePath()), e);
        }
        return fileBlobFuture;
    };
    return FutureUtil.executeAsyncWithRetries(opName, fileUploadAction, isCauseNonRetriable(), executor).whenComplete((res, ex) -> {
        if (backupMetrics != null) {
            backupMetrics.avgFileUploadNs.update(System.nanoTime() - putFileStartTime);
            long fileSize = file.length();
            backupMetrics.uploadRate.inc(fileSize);
            backupMetrics.filesUploaded.getValue().addAndGet(1);
            backupMetrics.bytesUploaded.getValue().addAndGet(fileSize);
            backupMetrics.filesRemaining.getValue().addAndGet(-1);
            backupMetrics.bytesRemaining.getValue().addAndGet(-1 * fileSize);
        }
    });
}
Also used : CheckedInputStream(java.util.zip.CheckedInputStream) BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) LoggerFactory(org.slf4j.LoggerFactory) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) StringUtils(org.apache.commons.lang3.StringUtils) SnapshotIndexSerde(org.apache.samza.storage.blobstore.index.serde.SnapshotIndexSerde) ByteArrayInputStream(java.io.ByteArrayInputStream) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Collectors(java.util.stream.Collectors) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) BlobStoreBackupManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreBackupManagerMetrics) Optional(java.util.Optional) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Logger(org.slf4j.Logger) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) Paths(java.nio.file.Paths) CRC32(java.util.zip.CRC32) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) CRC32(java.util.zip.CRC32) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) SamzaException(org.apache.samza.SamzaException) CheckedInputStream(java.util.zip.CheckedInputStream) FileInputStream(java.io.FileInputStream) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) CompletionException(java.util.concurrent.CompletionException) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) CompletableFuture(java.util.concurrent.CompletableFuture) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) CompletionStage(java.util.concurrent.CompletionStage) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with FileBlob

use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.

the class BlobStoreUtil method restoreDir.

/**
 * Non-blocking restore of a {@link SnapshotIndex} to local store by downloading all the files and sub-dirs associated
 * with this remote snapshot.
 * @return A future that completes when all the async downloads completes
 */
public CompletableFuture<Void> restoreDir(File baseDir, DirIndex dirIndex, Metadata metadata) {
    LOG.debug("Restoring contents of directory: {} from remote snapshot.", baseDir);
    List<CompletableFuture<Void>> downloadFutures = new ArrayList<>();
    try {
        // create parent directories if they don't exist
        Files.createDirectories(baseDir.toPath());
    } catch (IOException exception) {
        LOG.error("Error creating directory: {} for restore", baseDir.getAbsolutePath(), exception);
        throw new SamzaException(String.format("Error creating directory: %s for restore", baseDir.getAbsolutePath()), exception);
    }
    // restore all files in the directory
    for (FileIndex fileIndex : dirIndex.getFilesPresent()) {
        File fileToRestore = Paths.get(baseDir.getAbsolutePath(), fileIndex.getFileName()).toFile();
        Metadata requestMetadata = new Metadata(fileToRestore.getAbsolutePath(), Optional.of(fileIndex.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
        List<FileBlob> fileBlobs = fileIndex.getBlobs();
        String opName = "restoreFile: " + fileToRestore.getAbsolutePath();
        CompletableFuture<Void> fileRestoreFuture = FutureUtil.executeAsyncWithRetries(opName, () -> getFile(fileBlobs, fileToRestore, requestMetadata), isCauseNonRetriable(), executor);
        downloadFutures.add(fileRestoreFuture);
    }
    // restore any sub-directories
    List<DirIndex> subDirs = dirIndex.getSubDirsPresent();
    for (DirIndex subDir : subDirs) {
        File subDirFile = Paths.get(baseDir.getAbsolutePath(), subDir.getDirName()).toFile();
        downloadFutures.add(restoreDir(subDirFile, subDir, metadata));
    }
    return FutureUtil.allOf(downloadFutures);
}
Also used : FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) ArrayList(java.util.ArrayList) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) CompletableFuture(java.util.concurrent.CompletableFuture) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) File(java.io.File)

Example 4 with FileBlob

use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.

the class BlobStoreUtil method deleteFile.

/**
 * Delete a {@link FileIndex} from the remote store by deleting all {@link FileBlob}s associated with it.
 * @param fileIndex FileIndex of the file to delete from the remote store.
 * @param metadata
 * @return a future that completes when the FileIndex has been marked for deletion in the remote blob store.
 */
private CompletionStage<Void> deleteFile(FileIndex fileIndex, Metadata metadata) {
    List<CompletionStage<Void>> deleteFutures = new ArrayList<>();
    List<FileBlob> fileBlobs = fileIndex.getBlobs();
    for (FileBlob fileBlob : fileBlobs) {
        LOG.debug("Deleting file: {} blobId: {} from blob store.", fileIndex.getFileName(), fileBlob.getBlobId());
        String opName = "deleteFile: " + fileIndex.getFileName() + " blobId: " + fileBlob.getBlobId();
        Supplier<CompletionStage<Void>> fileDeletionAction = () -> blobStoreManager.delete(fileBlob.getBlobId(), metadata).toCompletableFuture();
        CompletableFuture<Void> fileDeletionFuture = FutureUtil.executeAsyncWithRetries(opName, fileDeletionAction, isCauseNonRetriable(), executor);
        deleteFutures.add(fileDeletionFuture);
    }
    return CompletableFuture.allOf(deleteFutures.toArray(new CompletableFuture[0]));
}
Also used : CompletableFuture(java.util.concurrent.CompletableFuture) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) ArrayList(java.util.ArrayList) CompletionStage(java.util.concurrent.CompletionStage)

Example 5 with FileBlob

use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.

the class BlobStoreTestUtil method createFileIndex.

private static FileIndex createFileIndex(String filePath, DirTreeNode node) {
    long checksum;
    FileMetadata fileMetadata;
    try {
        Path path = Paths.get(filePath);
        Checksum crc32 = new CRC32();
        byte[] fileBytes = Files.readAllBytes(path);
        crc32.update(fileBytes, 0, fileBytes.length);
        checksum = crc32.getValue();
        fileMetadata = FileMetadata.fromFile(path.toFile());
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    return new FileIndex(node.fileName, ImmutableList.of(new FileBlob(node.fileName, 0)), fileMetadata, checksum);
}
Also used : Path(java.nio.file.Path) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) CRC32(java.util.zip.CRC32) Checksum(java.util.zip.Checksum) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) IOException(java.io.IOException)

Aggregations

FileBlob (org.apache.samza.storage.blobstore.index.FileBlob)9 ArrayList (java.util.ArrayList)8 CompletableFuture (java.util.concurrent.CompletableFuture)8 FileIndex (org.apache.samza.storage.blobstore.index.FileIndex)8 FileMetadata (org.apache.samza.storage.blobstore.index.FileMetadata)8 IOException (java.io.IOException)7 CompletionStage (java.util.concurrent.CompletionStage)7 Metadata (org.apache.samza.storage.blobstore.Metadata)7 DirIndex (org.apache.samza.storage.blobstore.index.DirIndex)7 SnapshotMetadata (org.apache.samza.storage.blobstore.index.SnapshotMetadata)7 File (java.io.File)6 CRC32 (java.util.zip.CRC32)6 SamzaException (org.apache.samza.SamzaException)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 FileOutputStream (java.io.FileOutputStream)5 InputStream (java.io.InputStream)5 Files (java.nio.file.Files)5 Paths (java.nio.file.Paths)5 Collections (java.util.Collections)5 HashMap (java.util.HashMap)5