Search in sources :

Example 1 with SnapshotMetadata

use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.

the class BlobStoreUtil method putFile.

/**
 * Upload a File to blob store.
 * @param file File to upload to blob store.
 * @return A future containing the {@link FileIndex} for the uploaded file.
 */
@VisibleForTesting
public CompletableFuture<FileIndex> putFile(File file, SnapshotMetadata snapshotMetadata) {
    if (file == null || !file.isFile()) {
        String message = file != null ? "Dir or Symbolic link" : "null";
        throw new SamzaException(String.format("Required a non-null parameter of type file, provided: %s", message));
    }
    long putFileStartTime = System.nanoTime();
    String opName = "putFile: " + file.getAbsolutePath();
    Supplier<CompletionStage<FileIndex>> fileUploadAction = () -> {
        LOG.debug("Putting file: {} to blob store.", file.getPath());
        CompletableFuture<FileIndex> fileBlobFuture;
        CheckedInputStream inputStream = null;
        try {
            // TODO HIGH shesharm maybe use the more efficient CRC32C / PureJavaCRC32 impl
            inputStream = new CheckedInputStream(new FileInputStream(file), new CRC32());
            CheckedInputStream finalInputStream = inputStream;
            FileMetadata fileMetadata = FileMetadata.fromFile(file);
            if (backupMetrics != null) {
                backupMetrics.avgFileSizeBytes.update(fileMetadata.getSize());
            }
            Metadata metadata = new Metadata(file.getAbsolutePath(), Optional.of(fileMetadata.getSize()), snapshotMetadata.getJobName(), snapshotMetadata.getJobId(), snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName());
            fileBlobFuture = blobStoreManager.put(inputStream, metadata).thenApplyAsync(id -> {
                LOG.trace("Put complete. Received Blob ID {}. Closing input stream for file: {}.", id, file.getPath());
                try {
                    finalInputStream.close();
                } catch (Exception e) {
                    throw new SamzaException(String.format("Error closing input stream for file: %s", file.getAbsolutePath()), e);
                }
                LOG.trace("Returning new FileIndex for file: {}.", file.getPath());
                return new FileIndex(file.getName(), Collections.singletonList(new FileBlob(id, 0)), fileMetadata, finalInputStream.getChecksum().getValue());
            }, executor).toCompletableFuture();
        } catch (Exception e) {
            try {
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (Exception err) {
                LOG.error("Error closing input stream for file: {}", file.getName(), err);
            }
            LOG.error("Error putting file: {}", file.getName(), e);
            throw new SamzaException(String.format("Error putting file %s", file.getAbsolutePath()), e);
        }
        return fileBlobFuture;
    };
    return FutureUtil.executeAsyncWithRetries(opName, fileUploadAction, isCauseNonRetriable(), executor).whenComplete((res, ex) -> {
        if (backupMetrics != null) {
            backupMetrics.avgFileUploadNs.update(System.nanoTime() - putFileStartTime);
            long fileSize = file.length();
            backupMetrics.uploadRate.inc(fileSize);
            backupMetrics.filesUploaded.getValue().addAndGet(1);
            backupMetrics.bytesUploaded.getValue().addAndGet(fileSize);
            backupMetrics.filesRemaining.getValue().addAndGet(-1);
            backupMetrics.bytesRemaining.getValue().addAndGet(-1 * fileSize);
        }
    });
}
Also used : CheckedInputStream(java.util.zip.CheckedInputStream) BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) LoggerFactory(org.slf4j.LoggerFactory) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) StringUtils(org.apache.commons.lang3.StringUtils) SnapshotIndexSerde(org.apache.samza.storage.blobstore.index.serde.SnapshotIndexSerde) ByteArrayInputStream(java.io.ByteArrayInputStream) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Collectors(java.util.stream.Collectors) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) BlobStoreBackupManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreBackupManagerMetrics) Optional(java.util.Optional) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Logger(org.slf4j.Logger) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) Paths(java.nio.file.Paths) CRC32(java.util.zip.CRC32) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) CRC32(java.util.zip.CRC32) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) SamzaException(org.apache.samza.SamzaException) CheckedInputStream(java.util.zip.CheckedInputStream) FileInputStream(java.io.FileInputStream) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) CompletionException(java.util.concurrent.CompletionException) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) CompletableFuture(java.util.concurrent.CompletableFuture) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) CompletionStage(java.util.concurrent.CompletionStage) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with SnapshotMetadata

use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.

the class BlobStoreUtil method putSnapshotIndex.

/**
 * PUTs the {@link SnapshotIndex} to the blob store.
 * @param snapshotIndex SnapshotIndex to put.
 * @return a Future containing the blob ID of the {@link SnapshotIndex}.
 */
public CompletableFuture<String> putSnapshotIndex(SnapshotIndex snapshotIndex) {
    byte[] bytes = snapshotIndexSerde.toBytes(snapshotIndex);
    String opName = "putSnapshotIndex for checkpointId: " + snapshotIndex.getSnapshotMetadata().getCheckpointId();
    return FutureUtil.executeAsyncWithRetries(opName, () -> {
        // no need to close ByteArrayInputStream
        InputStream inputStream = new ByteArrayInputStream(bytes);
        SnapshotMetadata snapshotMetadata = snapshotIndex.getSnapshotMetadata();
        Metadata metadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.of((long) bytes.length), snapshotMetadata.getJobName(), snapshotMetadata.getJobId(), snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName());
        return blobStoreManager.put(inputStream, metadata).toCompletableFuture();
    }, isCauseNonRetriable(), executor);
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) CheckedInputStream(java.util.zip.CheckedInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata)

Example 3 with SnapshotMetadata

use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.

the class BlobStoreUtil method removeTTL.

/**
 * Marks all the blobs associated with an {@link SnapshotIndex} to never expire.
 * @param snapshotIndex {@link SnapshotIndex} of the remote snapshot
 * @param metadata {@link Metadata} related to the request
 * @return A future that completes when all the files and subdirs associated with this remote snapshot are marked to
 * never expire.
 */
public CompletionStage<Void> removeTTL(String indexBlobId, SnapshotIndex snapshotIndex, Metadata metadata) {
    SnapshotMetadata snapshotMetadata = snapshotIndex.getSnapshotMetadata();
    LOG.debug("Marking contents of SnapshotIndex: {} to never expire", snapshotMetadata.toString());
    String opName = "removeTTL for SnapshotIndex for checkpointId: " + snapshotMetadata.getCheckpointId();
    Supplier<CompletionStage<Void>> removeDirIndexTTLAction = () -> removeTTL(snapshotIndex.getDirIndex(), metadata).toCompletableFuture();
    CompletableFuture<Void> dirIndexTTLRemovalFuture = FutureUtil.executeAsyncWithRetries(opName, removeDirIndexTTLAction, isCauseNonRetriable(), executor);
    return dirIndexTTLRemovalFuture.thenComposeAsync(aVoid -> {
        String op2Name = "removeTTL for indexBlobId: " + indexBlobId;
        Supplier<CompletionStage<Void>> removeIndexBlobTTLAction = () -> blobStoreManager.removeTTL(indexBlobId, metadata).toCompletableFuture();
        return FutureUtil.executeAsyncWithRetries(op2Name, removeIndexBlobTTLAction, isCauseNonRetriable(), executor);
    }, executor);
}
Also used : SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) CompletionStage(java.util.concurrent.CompletionStage)

Example 4 with SnapshotMetadata

use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.

the class TestBlobStoreRestoreManager method testRestoreDeletesCheckpointDirsIfRestoring.

@Test
public void testRestoreDeletesCheckpointDirsIfRestoring() throws IOException {
    String jobName = "testJobName";
    String jobId = "testJobId";
    TaskName taskName = mock(TaskName.class);
    BlobStoreRestoreManagerMetrics metrics = new BlobStoreRestoreManagerMetrics(new MetricsRegistryMap());
    metrics.initStoreMetrics(ImmutableList.of("storeName"));
    Set<String> storesToRestore = ImmutableSet.of("storeName");
    SnapshotIndex snapshotIndex = mock(SnapshotIndex.class);
    Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = ImmutableMap.of("storeName", Pair.of("blobId", snapshotIndex));
    DirIndex dirIndex = BlobStoreTestUtil.createDirIndex("[a]");
    when(snapshotIndex.getDirIndex()).thenReturn(dirIndex);
    CheckpointId checkpointId = CheckpointId.create();
    when(snapshotIndex.getSnapshotMetadata()).thenReturn(new SnapshotMetadata(checkpointId, "jobName", "jobId", "taskName", "storeName"));
    Path loggedBaseDir = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
    // create store dir to be deleted during restore
    Path storeDir = Files.createTempDirectory(loggedBaseDir, "storeDir");
    Path storeCheckpointDir1 = Files.createTempDirectory(loggedBaseDir, "storeDir-" + checkpointId);
    CheckpointId olderCheckpoint = CheckpointId.create();
    Path storeCheckpointDir2 = Files.createTempDirectory(loggedBaseDir, "storeDir-" + olderCheckpoint);
    StorageConfig storageConfig = mock(StorageConfig.class);
    StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
    when(storageManagerUtil.getTaskStoreDir(eq(loggedBaseDir.toFile()), eq("storeName"), eq(taskName), eq(TaskMode.Active))).thenReturn(storeDir.toFile());
    when(storageManagerUtil.getStoreCheckpointDir(eq(storeDir.toFile()), eq(checkpointId))).thenReturn(Paths.get(storeDir.toString(), checkpointId.toString()).toString());
    when(storageManagerUtil.getTaskStoreCheckpointDirs(any(File.class), anyString(), any(TaskName.class), any(TaskMode.class))).thenReturn(ImmutableList.of(storeCheckpointDir1.toFile(), storeCheckpointDir2.toFile()));
    BlobStoreUtil blobStoreUtil = mock(BlobStoreUtil.class);
    DirDiffUtil dirDiffUtil = mock(DirDiffUtil.class);
    when(dirDiffUtil.areSameDir(anySet(), anyBoolean())).thenReturn((arg1, arg2) -> true);
    // return immediately without restoring.
    when(blobStoreUtil.restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
    BlobStoreRestoreManager.restoreStores(jobName, jobId, taskName, storesToRestore, prevStoreSnapshotIndexes, loggedBaseDir.toFile(), storageConfig, metrics, storageManagerUtil, blobStoreUtil, dirDiffUtil, EXECUTOR);
    // verify that the store directory restore was called and skipped (i.e. shouldRestore == true)
    verify(blobStoreUtil, times(1)).restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class));
    // verify that the checkpoint directories were deleted prior to restore (should not exist at the end)
    assertFalse(storeCheckpointDir1.toFile().exists());
    assertFalse(storeCheckpointDir2.toFile().exists());
}
Also used : Path(java.nio.file.Path) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) StorageConfig(org.apache.samza.config.StorageConfig) StorageManagerUtil(org.apache.samza.storage.StorageManagerUtil) BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Mockito.anyString(org.mockito.Mockito.anyString) TaskMode(org.apache.samza.job.model.TaskMode) TaskName(org.apache.samza.container.TaskName) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) CheckpointId(org.apache.samza.checkpoint.CheckpointId) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) File(java.io.File) DirDiffUtil(org.apache.samza.storage.blobstore.util.DirDiffUtil) Pair(org.apache.commons.lang3.tuple.Pair) Test(org.junit.Test)

Example 5 with SnapshotMetadata

use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.

the class TestBlobStoreRestoreManager method testRestoreDeletesStoreDir.

@Test
public void testRestoreDeletesStoreDir() throws IOException {
    String jobName = "testJobName";
    String jobId = "testJobId";
    TaskName taskName = mock(TaskName.class);
    BlobStoreRestoreManagerMetrics metrics = new BlobStoreRestoreManagerMetrics(new MetricsRegistryMap());
    metrics.initStoreMetrics(ImmutableList.of("storeName"));
    Set<String> storesToRestore = ImmutableSet.of("storeName");
    SnapshotIndex snapshotIndex = mock(SnapshotIndex.class);
    Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = ImmutableMap.of("storeName", Pair.of("blobId", snapshotIndex));
    DirIndex dirIndex = BlobStoreTestUtil.createDirIndex("[a]");
    when(snapshotIndex.getDirIndex()).thenReturn(dirIndex);
    when(snapshotIndex.getSnapshotMetadata()).thenReturn(new SnapshotMetadata(CheckpointId.create(), "jobName", "jobId", "taskName", "storeName"));
    Path loggedBaseDir = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
    // create store dir to be deleted during restore
    Path storeDir = Files.createTempDirectory(loggedBaseDir, "storeDir");
    StorageConfig storageConfig = mock(StorageConfig.class);
    StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
    when(storageManagerUtil.getStoreCheckpointDir(any(File.class), any(CheckpointId.class))).thenReturn(Paths.get(storeDir.toString(), "checkpointId").toString());
    when(storageManagerUtil.getTaskStoreDir(eq(loggedBaseDir.toFile()), eq("storeName"), eq(taskName), eq(TaskMode.Active))).thenReturn(storeDir.toFile());
    BlobStoreUtil blobStoreUtil = mock(BlobStoreUtil.class);
    DirDiffUtil dirDiffUtil = mock(DirDiffUtil.class);
    // return immediately without restoring.
    when(blobStoreUtil.restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
    when(dirDiffUtil.areSameDir(anySet(), anyBoolean())).thenReturn((arg1, arg2) -> true);
    BlobStoreRestoreManager.restoreStores(jobName, jobId, taskName, storesToRestore, prevStoreSnapshotIndexes, loggedBaseDir.toFile(), storageConfig, metrics, storageManagerUtil, blobStoreUtil, dirDiffUtil, EXECUTOR);
    // verify that the store directory restore was called and skipped (i.e. shouldRestore == true)
    verify(blobStoreUtil, times(1)).restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class));
    // verify that the store directory was deleted prior to restore
    // (should still not exist at the end since restore is no-op)
    assertFalse(storeDir.toFile().exists());
}
Also used : Path(java.nio.file.Path) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) StorageConfig(org.apache.samza.config.StorageConfig) StorageManagerUtil(org.apache.samza.storage.StorageManagerUtil) BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Mockito.anyString(org.mockito.Mockito.anyString) TaskName(org.apache.samza.container.TaskName) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) CheckpointId(org.apache.samza.checkpoint.CheckpointId) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) File(java.io.File) DirDiffUtil(org.apache.samza.storage.blobstore.util.DirDiffUtil) Pair(org.apache.commons.lang3.tuple.Pair) Test(org.junit.Test)

Aggregations

SnapshotMetadata (org.apache.samza.storage.blobstore.index.SnapshotMetadata)21 DirIndex (org.apache.samza.storage.blobstore.index.DirIndex)19 SnapshotIndex (org.apache.samza.storage.blobstore.index.SnapshotIndex)19 Pair (org.apache.commons.lang3.tuple.Pair)18 File (java.io.File)17 Path (java.nio.file.Path)16 Test (org.junit.Test)15 HashMap (java.util.HashMap)14 CheckpointId (org.apache.samza.checkpoint.CheckpointId)14 ArrayList (java.util.ArrayList)13 CompletableFuture (java.util.concurrent.CompletableFuture)13 CompletionStage (java.util.concurrent.CompletionStage)13 SamzaException (org.apache.samza.SamzaException)13 Checkpoint (org.apache.samza.checkpoint.Checkpoint)13 DirDiff (org.apache.samza.storage.blobstore.diff.DirDiff)13 ImmutableMap (com.google.common.collect.ImmutableMap)12 IOException (java.io.IOException)12 Files (java.nio.file.Files)12 Collections (java.util.Collections)12 List (java.util.List)12