Search in sources :

Example 1 with Metadata

use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.

the class BlobStoreUtil method removeTTL.

/**
 * Recursively mark all the blobs associated with the {@link DirIndex} to never expire (remove TTL).
 * @param dirIndex the {@link DirIndex} whose contents' TTL needs to be removed
 * @param metadata {@link Metadata} related to the request
 * @return A future that completes when all the blobs associated with this dirIndex are marked to
 * never expire.
 */
private CompletableFuture<Void> removeTTL(DirIndex dirIndex, Metadata metadata) {
    String dirName = dirIndex.getDirName();
    if (DirIndex.ROOT_DIR_NAME.equals(dirName)) {
        LOG.debug("Removing TTL for files and dirs present in DirIndex for root dir.");
    } else {
        LOG.debug("Removing TTL for files and dirs present in DirIndex for dir: {}", dirName);
    }
    List<CompletableFuture<Void>> updateTTLsFuture = new ArrayList<>();
    for (DirIndex subDir : dirIndex.getSubDirsPresent()) {
        updateTTLsFuture.add(removeTTL(subDir, metadata));
    }
    for (FileIndex file : dirIndex.getFilesPresent()) {
        Metadata requestMetadata = new Metadata(file.getFileName(), Optional.of(file.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
        List<FileBlob> fileBlobs = file.getBlobs();
        for (FileBlob fileBlob : fileBlobs) {
            String opname = "removeTTL for fileBlob: " + file.getFileName() + " with blobId: {}" + fileBlob.getBlobId();
            Supplier<CompletionStage<Void>> ttlRemovalAction = () -> blobStoreManager.removeTTL(fileBlob.getBlobId(), requestMetadata).toCompletableFuture();
            CompletableFuture<Void> ttlRemovalFuture = FutureUtil.executeAsyncWithRetries(opname, ttlRemovalAction, isCauseNonRetriable(), executor);
            updateTTLsFuture.add(ttlRemovalFuture);
        }
    }
    return CompletableFuture.allOf(updateTTLsFuture.toArray(new CompletableFuture[0]));
}
Also used : FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) ArrayList(java.util.ArrayList) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) CompletableFuture(java.util.concurrent.CompletableFuture) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) CompletionStage(java.util.concurrent.CompletionStage)

Example 2 with Metadata

use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.

the class BlobStoreUtil method putFile.

/**
 * Upload a File to blob store.
 * @param file File to upload to blob store.
 * @return A future containing the {@link FileIndex} for the uploaded file.
 */
@VisibleForTesting
public CompletableFuture<FileIndex> putFile(File file, SnapshotMetadata snapshotMetadata) {
    if (file == null || !file.isFile()) {
        String message = file != null ? "Dir or Symbolic link" : "null";
        throw new SamzaException(String.format("Required a non-null parameter of type file, provided: %s", message));
    }
    long putFileStartTime = System.nanoTime();
    String opName = "putFile: " + file.getAbsolutePath();
    Supplier<CompletionStage<FileIndex>> fileUploadAction = () -> {
        LOG.debug("Putting file: {} to blob store.", file.getPath());
        CompletableFuture<FileIndex> fileBlobFuture;
        CheckedInputStream inputStream = null;
        try {
            // TODO HIGH shesharm maybe use the more efficient CRC32C / PureJavaCRC32 impl
            inputStream = new CheckedInputStream(new FileInputStream(file), new CRC32());
            CheckedInputStream finalInputStream = inputStream;
            FileMetadata fileMetadata = FileMetadata.fromFile(file);
            if (backupMetrics != null) {
                backupMetrics.avgFileSizeBytes.update(fileMetadata.getSize());
            }
            Metadata metadata = new Metadata(file.getAbsolutePath(), Optional.of(fileMetadata.getSize()), snapshotMetadata.getJobName(), snapshotMetadata.getJobId(), snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName());
            fileBlobFuture = blobStoreManager.put(inputStream, metadata).thenApplyAsync(id -> {
                LOG.trace("Put complete. Received Blob ID {}. Closing input stream for file: {}.", id, file.getPath());
                try {
                    finalInputStream.close();
                } catch (Exception e) {
                    throw new SamzaException(String.format("Error closing input stream for file: %s", file.getAbsolutePath()), e);
                }
                LOG.trace("Returning new FileIndex for file: {}.", file.getPath());
                return new FileIndex(file.getName(), Collections.singletonList(new FileBlob(id, 0)), fileMetadata, finalInputStream.getChecksum().getValue());
            }, executor).toCompletableFuture();
        } catch (Exception e) {
            try {
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (Exception err) {
                LOG.error("Error closing input stream for file: {}", file.getName(), err);
            }
            LOG.error("Error putting file: {}", file.getName(), e);
            throw new SamzaException(String.format("Error putting file %s", file.getAbsolutePath()), e);
        }
        return fileBlobFuture;
    };
    return FutureUtil.executeAsyncWithRetries(opName, fileUploadAction, isCauseNonRetriable(), executor).whenComplete((res, ex) -> {
        if (backupMetrics != null) {
            backupMetrics.avgFileUploadNs.update(System.nanoTime() - putFileStartTime);
            long fileSize = file.length();
            backupMetrics.uploadRate.inc(fileSize);
            backupMetrics.filesUploaded.getValue().addAndGet(1);
            backupMetrics.bytesUploaded.getValue().addAndGet(fileSize);
            backupMetrics.filesRemaining.getValue().addAndGet(-1);
            backupMetrics.bytesRemaining.getValue().addAndGet(-1 * fileSize);
        }
    });
}
Also used : CheckedInputStream(java.util.zip.CheckedInputStream) BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) LoggerFactory(org.slf4j.LoggerFactory) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) StringUtils(org.apache.commons.lang3.StringUtils) SnapshotIndexSerde(org.apache.samza.storage.blobstore.index.serde.SnapshotIndexSerde) ByteArrayInputStream(java.io.ByteArrayInputStream) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Collectors(java.util.stream.Collectors) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) BlobStoreBackupManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreBackupManagerMetrics) Optional(java.util.Optional) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Logger(org.slf4j.Logger) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) Paths(java.nio.file.Paths) CRC32(java.util.zip.CRC32) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) CRC32(java.util.zip.CRC32) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) SamzaException(org.apache.samza.SamzaException) CheckedInputStream(java.util.zip.CheckedInputStream) FileInputStream(java.io.FileInputStream) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) CompletionException(java.util.concurrent.CompletionException) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) CompletableFuture(java.util.concurrent.CompletableFuture) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) CompletionStage(java.util.concurrent.CompletionStage) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with Metadata

use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.

the class BlobStoreUtil method restoreDir.

/**
 * Non-blocking restore of a {@link SnapshotIndex} to local store by downloading all the files and sub-dirs associated
 * with this remote snapshot.
 * @return A future that completes when all the async downloads completes
 */
public CompletableFuture<Void> restoreDir(File baseDir, DirIndex dirIndex, Metadata metadata) {
    LOG.debug("Restoring contents of directory: {} from remote snapshot.", baseDir);
    List<CompletableFuture<Void>> downloadFutures = new ArrayList<>();
    try {
        // create parent directories if they don't exist
        Files.createDirectories(baseDir.toPath());
    } catch (IOException exception) {
        LOG.error("Error creating directory: {} for restore", baseDir.getAbsolutePath(), exception);
        throw new SamzaException(String.format("Error creating directory: %s for restore", baseDir.getAbsolutePath()), exception);
    }
    // restore all files in the directory
    for (FileIndex fileIndex : dirIndex.getFilesPresent()) {
        File fileToRestore = Paths.get(baseDir.getAbsolutePath(), fileIndex.getFileName()).toFile();
        Metadata requestMetadata = new Metadata(fileToRestore.getAbsolutePath(), Optional.of(fileIndex.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
        List<FileBlob> fileBlobs = fileIndex.getBlobs();
        String opName = "restoreFile: " + fileToRestore.getAbsolutePath();
        CompletableFuture<Void> fileRestoreFuture = FutureUtil.executeAsyncWithRetries(opName, () -> getFile(fileBlobs, fileToRestore, requestMetadata), isCauseNonRetriable(), executor);
        downloadFutures.add(fileRestoreFuture);
    }
    // restore any sub-directories
    List<DirIndex> subDirs = dirIndex.getSubDirsPresent();
    for (DirIndex subDir : subDirs) {
        File subDirFile = Paths.get(baseDir.getAbsolutePath(), subDir.getDirName()).toFile();
        downloadFutures.add(restoreDir(subDirFile, subDir, metadata));
    }
    return FutureUtil.allOf(downloadFutures);
}
Also used : FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) ArrayList(java.util.ArrayList) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) CompletableFuture(java.util.concurrent.CompletableFuture) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) File(java.io.File)

Example 4 with Metadata

use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.

the class BlobStoreUtil method putSnapshotIndex.

/**
 * PUTs the {@link SnapshotIndex} to the blob store.
 * @param snapshotIndex SnapshotIndex to put.
 * @return a Future containing the blob ID of the {@link SnapshotIndex}.
 */
public CompletableFuture<String> putSnapshotIndex(SnapshotIndex snapshotIndex) {
    byte[] bytes = snapshotIndexSerde.toBytes(snapshotIndex);
    String opName = "putSnapshotIndex for checkpointId: " + snapshotIndex.getSnapshotMetadata().getCheckpointId();
    return FutureUtil.executeAsyncWithRetries(opName, () -> {
        // no need to close ByteArrayInputStream
        InputStream inputStream = new ByteArrayInputStream(bytes);
        SnapshotMetadata snapshotMetadata = snapshotIndex.getSnapshotMetadata();
        Metadata metadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.of((long) bytes.length), snapshotMetadata.getJobName(), snapshotMetadata.getJobId(), snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName());
        return blobStoreManager.put(inputStream, metadata).toCompletableFuture();
    }, isCauseNonRetriable(), executor);
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) CheckedInputStream(java.util.zip.CheckedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata)

Example 5 with Metadata

use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.

the class BlobStoreUtil method cleanUpDir.

/**
 * Recursively issue delete requests for files and dirs marked to be removed in a previously created remote snapshot.
 * Note: We do not immediately delete files/dirs to be removed when uploading a snapshot to the remote
 * store. We just track them for deletion during the upload, and delete them AFTER the snapshot is uploaded, and the
 * blob IDs have been persisted as part of the checkpoint. This is to prevent data loss if a failure happens
 * part way through the commit. We issue delete these file/subdirs in cleanUp() phase of commit lifecycle.
 * @param dirIndex the dir in the remote snapshot to clean up.
 * @param metadata Metadata related to the request
 * @return a future that completes when all the files and subdirs marked for deletion are cleaned up.
 */
public CompletionStage<Void> cleanUpDir(DirIndex dirIndex, Metadata metadata) {
    String dirName = dirIndex.getDirName();
    if (DirIndex.ROOT_DIR_NAME.equals(dirName)) {
        LOG.debug("Cleaning up root dir in blob store.");
    } else {
        LOG.debug("Cleaning up dir: {} in blob store.", dirIndex.getDirName());
    }
    List<CompletionStage<Void>> cleanUpFuture = new ArrayList<>();
    List<FileIndex> files = dirIndex.getFilesRemoved();
    for (FileIndex file : files) {
        Metadata requestMetadata = new Metadata(file.getFileName(), Optional.of(file.getFileMetadata().getSize()), metadata.getJobName(), metadata.getJobId(), metadata.getTaskName(), metadata.getStoreName());
        cleanUpFuture.add(deleteFile(file, requestMetadata));
    }
    for (DirIndex subDirToDelete : dirIndex.getSubDirsRemoved()) {
        // recursively delete ALL contents of the subDirToDelete.
        cleanUpFuture.add(deleteDir(subDirToDelete, metadata));
    }
    for (DirIndex subDirToRetain : dirIndex.getSubDirsPresent()) {
        // recursively clean up the subDir, only deleting files and subdirs marked for deletion.
        cleanUpFuture.add(cleanUpDir(subDirToRetain, metadata));
    }
    return CompletableFuture.allOf(cleanUpFuture.toArray(new CompletableFuture[0]));
}
Also used : FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) CompletableFuture(java.util.concurrent.CompletableFuture) ArrayList(java.util.ArrayList) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) CompletionStage(java.util.concurrent.CompletionStage)

Aggregations

Metadata (org.apache.samza.storage.blobstore.Metadata)20 FileMetadata (org.apache.samza.storage.blobstore.index.FileMetadata)20 SnapshotMetadata (org.apache.samza.storage.blobstore.index.SnapshotMetadata)20 ArrayList (java.util.ArrayList)19 CompletableFuture (java.util.concurrent.CompletableFuture)19 DirIndex (org.apache.samza.storage.blobstore.index.DirIndex)19 FileIndex (org.apache.samza.storage.blobstore.index.FileIndex)19 CompletionStage (java.util.concurrent.CompletionStage)18 FileBlob (org.apache.samza.storage.blobstore.index.FileBlob)17 File (java.io.File)16 IOException (java.io.IOException)16 InputStream (java.io.InputStream)16 SamzaException (org.apache.samza.SamzaException)16 ImmutableMap (com.google.common.collect.ImmutableMap)15 FileOutputStream (java.io.FileOutputStream)15 Files (java.nio.file.Files)15 Paths (java.nio.file.Paths)15 Collections (java.util.Collections)15 HashMap (java.util.HashMap)15 List (java.util.List)15