Search in sources :

Example 1 with BlobStoreUtil

use of org.apache.samza.storage.blobstore.util.BlobStoreUtil in project samza by apache.

the class TestBlobStoreRestoreManager method testRestoreDeletesCheckpointDirsIfRestoring.

@Test
public void testRestoreDeletesCheckpointDirsIfRestoring() throws IOException {
    String jobName = "testJobName";
    String jobId = "testJobId";
    TaskName taskName = mock(TaskName.class);
    BlobStoreRestoreManagerMetrics metrics = new BlobStoreRestoreManagerMetrics(new MetricsRegistryMap());
    metrics.initStoreMetrics(ImmutableList.of("storeName"));
    Set<String> storesToRestore = ImmutableSet.of("storeName");
    SnapshotIndex snapshotIndex = mock(SnapshotIndex.class);
    Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = ImmutableMap.of("storeName", Pair.of("blobId", snapshotIndex));
    DirIndex dirIndex = BlobStoreTestUtil.createDirIndex("[a]");
    when(snapshotIndex.getDirIndex()).thenReturn(dirIndex);
    CheckpointId checkpointId = CheckpointId.create();
    when(snapshotIndex.getSnapshotMetadata()).thenReturn(new SnapshotMetadata(checkpointId, "jobName", "jobId", "taskName", "storeName"));
    Path loggedBaseDir = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
    // create store dir to be deleted during restore
    Path storeDir = Files.createTempDirectory(loggedBaseDir, "storeDir");
    Path storeCheckpointDir1 = Files.createTempDirectory(loggedBaseDir, "storeDir-" + checkpointId);
    CheckpointId olderCheckpoint = CheckpointId.create();
    Path storeCheckpointDir2 = Files.createTempDirectory(loggedBaseDir, "storeDir-" + olderCheckpoint);
    StorageConfig storageConfig = mock(StorageConfig.class);
    StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
    when(storageManagerUtil.getTaskStoreDir(eq(loggedBaseDir.toFile()), eq("storeName"), eq(taskName), eq(TaskMode.Active))).thenReturn(storeDir.toFile());
    when(storageManagerUtil.getStoreCheckpointDir(eq(storeDir.toFile()), eq(checkpointId))).thenReturn(Paths.get(storeDir.toString(), checkpointId.toString()).toString());
    when(storageManagerUtil.getTaskStoreCheckpointDirs(any(File.class), anyString(), any(TaskName.class), any(TaskMode.class))).thenReturn(ImmutableList.of(storeCheckpointDir1.toFile(), storeCheckpointDir2.toFile()));
    BlobStoreUtil blobStoreUtil = mock(BlobStoreUtil.class);
    DirDiffUtil dirDiffUtil = mock(DirDiffUtil.class);
    when(dirDiffUtil.areSameDir(anySet(), anyBoolean())).thenReturn((arg1, arg2) -> true);
    // return immediately without restoring.
    when(blobStoreUtil.restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
    BlobStoreRestoreManager.restoreStores(jobName, jobId, taskName, storesToRestore, prevStoreSnapshotIndexes, loggedBaseDir.toFile(), storageConfig, metrics, storageManagerUtil, blobStoreUtil, dirDiffUtil, EXECUTOR);
    // verify that the store directory restore was called and skipped (i.e. shouldRestore == true)
    verify(blobStoreUtil, times(1)).restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class));
    // verify that the checkpoint directories were deleted prior to restore (should not exist at the end)
    assertFalse(storeCheckpointDir1.toFile().exists());
    assertFalse(storeCheckpointDir2.toFile().exists());
}
Also used : Path(java.nio.file.Path) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) StorageConfig(org.apache.samza.config.StorageConfig) StorageManagerUtil(org.apache.samza.storage.StorageManagerUtil) BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Mockito.anyString(org.mockito.Mockito.anyString) TaskMode(org.apache.samza.job.model.TaskMode) TaskName(org.apache.samza.container.TaskName) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) CheckpointId(org.apache.samza.checkpoint.CheckpointId) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) File(java.io.File) DirDiffUtil(org.apache.samza.storage.blobstore.util.DirDiffUtil) Pair(org.apache.commons.lang3.tuple.Pair) Test(org.junit.Test)

Example 2 with BlobStoreUtil

use of org.apache.samza.storage.blobstore.util.BlobStoreUtil in project samza by apache.

the class TestBlobStoreRestoreManager method testRestoreDeletesStoreDir.

@Test
public void testRestoreDeletesStoreDir() throws IOException {
    String jobName = "testJobName";
    String jobId = "testJobId";
    TaskName taskName = mock(TaskName.class);
    BlobStoreRestoreManagerMetrics metrics = new BlobStoreRestoreManagerMetrics(new MetricsRegistryMap());
    metrics.initStoreMetrics(ImmutableList.of("storeName"));
    Set<String> storesToRestore = ImmutableSet.of("storeName");
    SnapshotIndex snapshotIndex = mock(SnapshotIndex.class);
    Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = ImmutableMap.of("storeName", Pair.of("blobId", snapshotIndex));
    DirIndex dirIndex = BlobStoreTestUtil.createDirIndex("[a]");
    when(snapshotIndex.getDirIndex()).thenReturn(dirIndex);
    when(snapshotIndex.getSnapshotMetadata()).thenReturn(new SnapshotMetadata(CheckpointId.create(), "jobName", "jobId", "taskName", "storeName"));
    Path loggedBaseDir = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
    // create store dir to be deleted during restore
    Path storeDir = Files.createTempDirectory(loggedBaseDir, "storeDir");
    StorageConfig storageConfig = mock(StorageConfig.class);
    StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
    when(storageManagerUtil.getStoreCheckpointDir(any(File.class), any(CheckpointId.class))).thenReturn(Paths.get(storeDir.toString(), "checkpointId").toString());
    when(storageManagerUtil.getTaskStoreDir(eq(loggedBaseDir.toFile()), eq("storeName"), eq(taskName), eq(TaskMode.Active))).thenReturn(storeDir.toFile());
    BlobStoreUtil blobStoreUtil = mock(BlobStoreUtil.class);
    DirDiffUtil dirDiffUtil = mock(DirDiffUtil.class);
    // return immediately without restoring.
    when(blobStoreUtil.restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
    when(dirDiffUtil.areSameDir(anySet(), anyBoolean())).thenReturn((arg1, arg2) -> true);
    BlobStoreRestoreManager.restoreStores(jobName, jobId, taskName, storesToRestore, prevStoreSnapshotIndexes, loggedBaseDir.toFile(), storageConfig, metrics, storageManagerUtil, blobStoreUtil, dirDiffUtil, EXECUTOR);
    // verify that the store directory restore was called and skipped (i.e. shouldRestore == true)
    verify(blobStoreUtil, times(1)).restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class));
    // verify that the store directory was deleted prior to restore
    // (should still not exist at the end since restore is no-op)
    assertFalse(storeDir.toFile().exists());
}
Also used : Path(java.nio.file.Path) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) StorageConfig(org.apache.samza.config.StorageConfig) StorageManagerUtil(org.apache.samza.storage.StorageManagerUtil) BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Mockito.anyString(org.mockito.Mockito.anyString) TaskName(org.apache.samza.container.TaskName) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) CheckpointId(org.apache.samza.checkpoint.CheckpointId) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) File(java.io.File) DirDiffUtil(org.apache.samza.storage.blobstore.util.DirDiffUtil) Pair(org.apache.commons.lang3.tuple.Pair) Test(org.junit.Test)

Example 3 with BlobStoreUtil

use of org.apache.samza.storage.blobstore.util.BlobStoreUtil in project samza by apache.

the class TestBlobStoreRestoreManager method testRestoreSkipsStoresWithMissingCheckpointSCM.

@Test
public void testRestoreSkipsStoresWithMissingCheckpointSCM() {
    // store renamed from oldStoreName to newStoreName. No SCM for newStoreName in previous checkpoint.
    String jobName = "testJobName";
    String jobId = "testJobId";
    TaskName taskName = mock(TaskName.class);
    BlobStoreRestoreManagerMetrics metrics = new BlobStoreRestoreManagerMetrics(new MetricsRegistryMap());
    metrics.initStoreMetrics(ImmutableList.of("newStoreName"));
    // new store in config
    Set<String> storesToRestore = ImmutableSet.of("newStoreName");
    SnapshotIndex snapshotIndex = mock(SnapshotIndex.class);
    Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = mock(Map.class);
    when(prevStoreSnapshotIndexes.containsKey("newStoreName")).thenReturn(false);
    DirIndex dirIndex = mock(DirIndex.class);
    when(snapshotIndex.getDirIndex()).thenReturn(dirIndex);
    CheckpointId checkpointId = CheckpointId.create();
    when(snapshotIndex.getSnapshotMetadata()).thenReturn(new SnapshotMetadata(checkpointId, "jobName", "jobId", "taskName", "storeName"));
    Path loggedBaseDir = mock(Path.class);
    // create store dir to be deleted during restore
    StorageConfig storageConfig = mock(StorageConfig.class);
    StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
    BlobStoreUtil blobStoreUtil = mock(BlobStoreUtil.class);
    DirDiffUtil dirDiffUtil = mock(DirDiffUtil.class);
    BlobStoreRestoreManager.restoreStores(jobName, jobId, taskName, storesToRestore, prevStoreSnapshotIndexes, loggedBaseDir.toFile(), storageConfig, metrics, storageManagerUtil, blobStoreUtil, dirDiffUtil, EXECUTOR);
    // verify that we checked the previously checkpointed SCMs.
    verify(prevStoreSnapshotIndexes, times(1)).containsKey(eq("newStoreName"));
    // verify that the store directory restore was never called
    verify(blobStoreUtil, times(0)).restoreDir(any(File.class), any(DirIndex.class), any(Metadata.class));
}
Also used : Path(java.nio.file.Path) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) StorageConfig(org.apache.samza.config.StorageConfig) StorageManagerUtil(org.apache.samza.storage.StorageManagerUtil) BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Mockito.anyString(org.mockito.Mockito.anyString) TaskName(org.apache.samza.container.TaskName) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) CheckpointId(org.apache.samza.checkpoint.CheckpointId) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) File(java.io.File) DirDiffUtil(org.apache.samza.storage.blobstore.util.DirDiffUtil) Pair(org.apache.commons.lang3.tuple.Pair) Test(org.junit.Test)

Example 4 with BlobStoreUtil

use of org.apache.samza.storage.blobstore.util.BlobStoreUtil in project samza by apache.

the class TestBlobStoreRestoreManager method testDeleteUnusedStoresRemovesStoresDeletedFromConfig.

@Test
public void testDeleteUnusedStoresRemovesStoresDeletedFromConfig() {
    String jobName = "testJobName";
    String jobId = "testJobId";
    String taskName = "taskName";
    StorageConfig storageConfig = mock(StorageConfig.class);
    BlobStoreConfig blobStoreConfig = mock(BlobStoreConfig.class);
    SnapshotIndex mockSnapshotIndex = mock(SnapshotIndex.class);
    String blobId = "blobId";
    Map<String, Pair<String, SnapshotIndex>> initialStoreSnapshotIndexes = ImmutableMap.of("oldStoreName", Pair.of(blobId, mockSnapshotIndex));
    when(storageConfig.getStoresWithBackupFactory(eq(BlobStoreStateBackendFactory.class.getName()))).thenReturn(ImmutableList.of("newStoreName"));
    when(storageConfig.getStoresWithRestoreFactory(eq(BlobStoreStateBackendFactory.class.getName()))).thenReturn(ImmutableList.of("newStoreName"));
    DirIndex dirIndex = mock(DirIndex.class);
    when(mockSnapshotIndex.getDirIndex()).thenReturn(dirIndex);
    BlobStoreUtil blobStoreUtil = mock(BlobStoreUtil.class);
    when(blobStoreUtil.cleanUpDir(any(DirIndex.class), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
    when(blobStoreUtil.deleteDir(any(DirIndex.class), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
    when(blobStoreUtil.deleteSnapshotIndexBlob(anyString(), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
    BlobStoreRestoreManager.deleteUnusedStoresFromBlobStore(jobName, jobId, taskName, storageConfig, blobStoreConfig, initialStoreSnapshotIndexes, blobStoreUtil, EXECUTOR);
    verify(blobStoreUtil, times(1)).cleanUpDir(eq(dirIndex), any(Metadata.class));
    verify(blobStoreUtil, times(1)).deleteDir(eq(dirIndex), any(Metadata.class));
    verify(blobStoreUtil, times(1)).deleteSnapshotIndexBlob(eq(blobId), any(Metadata.class));
}
Also used : SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) StorageConfig(org.apache.samza.config.StorageConfig) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) BlobStoreConfig(org.apache.samza.config.BlobStoreConfig) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Mockito.anyString(org.mockito.Mockito.anyString) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) Pair(org.apache.commons.lang3.tuple.Pair) Test(org.junit.Test)

Example 5 with BlobStoreUtil

use of org.apache.samza.storage.blobstore.util.BlobStoreUtil in project samza by apache.

the class BlobStoreRestoreManager method deleteUnusedStoresFromBlobStore.

/**
 * Deletes blob store contents for stores that were present in the last checkpoint but are either no longer
 * present in job configs (removed by user since last deployment) or are no longer configured to be backed
 * up using blob stores.
 *
 * This method blocks until all the necessary store contents and snapshot index blobs have been marked for deletion.
 */
@VisibleForTesting
static void deleteUnusedStoresFromBlobStore(String jobName, String jobId, String taskName, StorageConfig storageConfig, BlobStoreConfig blobStoreConfig, Map<String, Pair<String, SnapshotIndex>> initialStoreSnapshotIndexes, BlobStoreUtil blobStoreUtil, ExecutorService executor) {
    List<String> storesToBackup = storageConfig.getStoresWithBackupFactory(BlobStoreStateBackendFactory.class.getName());
    List<String> storesToRestore = storageConfig.getStoresWithRestoreFactory(BlobStoreStateBackendFactory.class.getName());
    List<CompletionStage<Void>> storeDeletionFutures = new ArrayList<>();
    initialStoreSnapshotIndexes.forEach((storeName, scmAndSnapshotIndex) -> {
        if (!storesToBackup.contains(storeName) && !storesToRestore.contains(storeName)) {
            LOG.debug("Removing task: {} store: {} from blob store. It is either no longer used, " + "or is no longer configured to be backed up or restored with blob store.", taskName, storeName);
            DirIndex dirIndex = scmAndSnapshotIndex.getRight().getDirIndex();
            Metadata requestMetadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.empty(), jobName, jobId, taskName, storeName);
            CompletionStage<Void> storeDeletionFuture = // delete files and sub-dirs previously marked for removal
            blobStoreUtil.cleanUpDir(dirIndex, requestMetadata).thenComposeAsync(v -> blobStoreUtil.deleteDir(dirIndex, requestMetadata), // deleted files and dirs still present
            executor).thenComposeAsync(v -> blobStoreUtil.deleteSnapshotIndexBlob(scmAndSnapshotIndex.getLeft(), requestMetadata), // delete the snapshot index blob
            executor);
            storeDeletionFutures.add(storeDeletionFuture);
        }
    });
    FutureUtil.allOf(storeDeletionFutures).join();
}
Also used : BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TaskModel(org.apache.samza.job.model.TaskModel) ArrayList(java.util.ArrayList) FileUtil(org.apache.samza.util.FileUtil) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Path(java.nio.file.Path) ExecutorService(java.util.concurrent.ExecutorService) FutureUtil(org.apache.samza.util.FutureUtil) StorageConfig(org.apache.samza.config.StorageConfig) ImmutableSet(com.google.common.collect.ImmutableSet) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Files(java.nio.file.Files) StorageManagerUtil(org.apache.samza.storage.StorageManagerUtil) Set(java.util.Set) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Checkpoint(org.apache.samza.checkpoint.Checkpoint) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) CompletionStage(java.util.concurrent.CompletionStage) TaskRestoreManager(org.apache.samza.storage.TaskRestoreManager) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Paths(java.nio.file.Paths) Optional(java.util.Optional) DirDiffUtil(org.apache.samza.storage.blobstore.util.DirDiffUtil) VisibleForTesting(com.google.common.annotations.VisibleForTesting) BlobStoreConfig(org.apache.samza.config.BlobStoreConfig) Config(org.apache.samza.config.Config) ArrayList(java.util.ArrayList) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) CompletionStage(java.util.concurrent.CompletionStage) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

Pair (org.apache.commons.lang3.tuple.Pair)6 StorageConfig (org.apache.samza.config.StorageConfig)6 DirIndex (org.apache.samza.storage.blobstore.index.DirIndex)6 SnapshotIndex (org.apache.samza.storage.blobstore.index.SnapshotIndex)6 BlobStoreUtil (org.apache.samza.storage.blobstore.util.BlobStoreUtil)6 File (java.io.File)5 Path (java.nio.file.Path)5 CheckpointId (org.apache.samza.checkpoint.CheckpointId)5 TaskName (org.apache.samza.container.TaskName)5 StorageManagerUtil (org.apache.samza.storage.StorageManagerUtil)5 SnapshotMetadata (org.apache.samza.storage.blobstore.index.SnapshotMetadata)5 BlobStoreRestoreManagerMetrics (org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics)5 DirDiffUtil (org.apache.samza.storage.blobstore.util.DirDiffUtil)5 Test (org.junit.Test)5 Mockito.anyString (org.mockito.Mockito.anyString)5 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)4 TaskMode (org.apache.samza.job.model.TaskMode)3 BlobStoreConfig (org.apache.samza.config.BlobStoreConfig)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ImmutableSet (com.google.common.collect.ImmutableSet)1