Search in sources :

Example 11 with DirIndex

use of org.apache.samza.storage.blobstore.index.DirIndex in project samza by apache.

the class TestBlobStoreRestoreManager method testShouldRestoreIfNoCheckpointDir.

@Test
public void testShouldRestoreIfNoCheckpointDir() throws IOException {
    String taskName = "taskName";
    String storeName = "storeName";
    DirIndex dirIndex = mock(DirIndex.class);
    Path storeCheckpointDir = Paths.get("/tmp/non-existent-checkpoint-dir");
    StorageConfig storageConfig = mock(StorageConfig.class);
    when(storageConfig.cleanLoggedStoreDirsOnStart(anyString())).thenReturn(false);
    DirDiffUtil dirDiffUtil = mock(DirDiffUtil.class);
    boolean shouldRestore = BlobStoreRestoreManager.shouldRestore(taskName, storeName, dirIndex, storeCheckpointDir, storageConfig, dirDiffUtil);
    verifyZeroInteractions(dirDiffUtil);
    assertTrue(shouldRestore);
}
Also used : Path(java.nio.file.Path) StorageConfig(org.apache.samza.config.StorageConfig) Mockito.anyString(org.mockito.Mockito.anyString) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) DirDiffUtil(org.apache.samza.storage.blobstore.util.DirDiffUtil) Test(org.junit.Test)

Example 12 with DirIndex

use of org.apache.samza.storage.blobstore.index.DirIndex in project samza by apache.

the class TestBlobStoreRestoreManager method testDeleteUnusedStoresRemovesStoresDeletedFromConfig.

@Test
public void testDeleteUnusedStoresRemovesStoresDeletedFromConfig() {
    String jobName = "testJobName";
    String jobId = "testJobId";
    String taskName = "taskName";
    StorageConfig storageConfig = mock(StorageConfig.class);
    BlobStoreConfig blobStoreConfig = mock(BlobStoreConfig.class);
    SnapshotIndex mockSnapshotIndex = mock(SnapshotIndex.class);
    String blobId = "blobId";
    Map<String, Pair<String, SnapshotIndex>> initialStoreSnapshotIndexes = ImmutableMap.of("oldStoreName", Pair.of(blobId, mockSnapshotIndex));
    when(storageConfig.getStoresWithBackupFactory(eq(BlobStoreStateBackendFactory.class.getName()))).thenReturn(ImmutableList.of("newStoreName"));
    when(storageConfig.getStoresWithRestoreFactory(eq(BlobStoreStateBackendFactory.class.getName()))).thenReturn(ImmutableList.of("newStoreName"));
    DirIndex dirIndex = mock(DirIndex.class);
    when(mockSnapshotIndex.getDirIndex()).thenReturn(dirIndex);
    BlobStoreUtil blobStoreUtil = mock(BlobStoreUtil.class);
    when(blobStoreUtil.cleanUpDir(any(DirIndex.class), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
    when(blobStoreUtil.deleteDir(any(DirIndex.class), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
    when(blobStoreUtil.deleteSnapshotIndexBlob(anyString(), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
    BlobStoreRestoreManager.deleteUnusedStoresFromBlobStore(jobName, jobId, taskName, storageConfig, blobStoreConfig, initialStoreSnapshotIndexes, blobStoreUtil, EXECUTOR);
    verify(blobStoreUtil, times(1)).cleanUpDir(eq(dirIndex), any(Metadata.class));
    verify(blobStoreUtil, times(1)).deleteDir(eq(dirIndex), any(Metadata.class));
    verify(blobStoreUtil, times(1)).deleteSnapshotIndexBlob(eq(blobId), any(Metadata.class));
}
Also used : SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) StorageConfig(org.apache.samza.config.StorageConfig) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) BlobStoreConfig(org.apache.samza.config.BlobStoreConfig) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Mockito.anyString(org.mockito.Mockito.anyString) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) Pair(org.apache.commons.lang3.tuple.Pair) Test(org.junit.Test)

Example 13 with DirIndex

use of org.apache.samza.storage.blobstore.index.DirIndex in project samza by apache.

the class TestBlobStoreBackupManager method setupRemoteAndLocalSnapshots.

private Map<String, Pair<String, SnapshotIndex>> setupRemoteAndLocalSnapshots(boolean addPrevCheckpoints) throws IOException {
    // reset blob store
    testBlobStore = new HashMap<>();
    Map<String, Pair<String, SnapshotIndex>> indexBlobIdAndRemoteAndLocalSnapshotMap = new HashMap<>();
    List<String> localSnapshots = new ArrayList<>();
    List<String> previousRemoteSnapshots = new ArrayList<>();
    localSnapshots.add("[a, c, z/1, y/2, p/m/3, q/n/4]");
    previousRemoteSnapshots.add("[a, b, z/1, x/5, p/m/3, r/o/6]");
    localSnapshots.add("[a, c, z/1, y/1, p/m/1, q/n/1]");
    previousRemoteSnapshots.add("[a, z/1, p/m/1]");
    localSnapshots.add("[z/i/1, y/j/1]");
    previousRemoteSnapshots.add("[z/i/1, x/k/1]");
    // setup local and corresponding remote snapshots
    for (int i = 0; i < localSnapshots.size(); i++) {
        Path localSnapshot = BlobStoreTestUtil.createLocalDir(localSnapshots.get(i));
        String testLocalSnapshot = localSnapshot.toAbsolutePath().toString();
        DirIndex dirIndex = BlobStoreTestUtil.createDirIndex(localSnapshots.get(i));
        SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, testLocalSnapshot);
        Optional<String> prevCheckpointId = Optional.empty();
        if (addPrevCheckpoints) {
            prevCheckpointId = Optional.of(prevSnapshotIndexBlobId + "-" + i);
            DirIndex prevDirIndex = BlobStoreTestUtil.createDirIndex(previousRemoteSnapshots.get(i));
            testBlobStore.put(prevCheckpointId.get(), new SnapshotIndex(clock.currentTimeMillis(), snapshotMetadata, prevDirIndex, Optional.empty()));
        }
        SnapshotIndex testRemoteSnapshot = new SnapshotIndex(clock.currentTimeMillis(), snapshotMetadata, dirIndex, prevCheckpointId);
        indexBlobIdAndRemoteAndLocalSnapshotMap.put("blobId-" + i, Pair.of(testLocalSnapshot, testRemoteSnapshot));
        testBlobStore.put("blobId-" + i, testRemoteSnapshot);
    }
    return indexBlobIdAndRemoteAndLocalSnapshotMap;
}
Also used : Path(java.nio.file.Path) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Checkpoint(org.apache.samza.checkpoint.Checkpoint) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) Pair(org.apache.commons.lang3.tuple.Pair)

Example 14 with DirIndex

use of org.apache.samza.storage.blobstore.index.DirIndex in project samza by apache.

the class BlobStoreTestUtil method createDirIndex.

public static DirIndex createDirIndex(String files) throws IOException {
    if (files.equals("[]")) {
        // empty dir
        return new DirIndex(DirIndex.ROOT_DIR_NAME, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
    }
    String[] paths = files.substring(1, files.length() - 1).split(",");
    Arrays.sort(paths);
    // actually create the directory structure in a temp dir so that file properties and checksums can be computed
    Path localDir = createLocalDir(files);
    DirTreeNode dirTree = createDirTree(localDir.toAbsolutePath().toString(), paths);
    return createDirIndex(localDir.toAbsolutePath().toString(), dirTree);
}
Also used : Path(java.nio.file.Path) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex)

Example 15 with DirIndex

use of org.apache.samza.storage.blobstore.index.DirIndex in project samza by apache.

the class BlobStoreRestoreManager method deleteUnusedStoresFromBlobStore.

/**
 * Deletes blob store contents for stores that were present in the last checkpoint but are either no longer
 * present in job configs (removed by user since last deployment) or are no longer configured to be backed
 * up using blob stores.
 *
 * This method blocks until all the necessary store contents and snapshot index blobs have been marked for deletion.
 */
@VisibleForTesting
static void deleteUnusedStoresFromBlobStore(String jobName, String jobId, String taskName, StorageConfig storageConfig, BlobStoreConfig blobStoreConfig, Map<String, Pair<String, SnapshotIndex>> initialStoreSnapshotIndexes, BlobStoreUtil blobStoreUtil, ExecutorService executor) {
    List<String> storesToBackup = storageConfig.getStoresWithBackupFactory(BlobStoreStateBackendFactory.class.getName());
    List<String> storesToRestore = storageConfig.getStoresWithRestoreFactory(BlobStoreStateBackendFactory.class.getName());
    List<CompletionStage<Void>> storeDeletionFutures = new ArrayList<>();
    initialStoreSnapshotIndexes.forEach((storeName, scmAndSnapshotIndex) -> {
        if (!storesToBackup.contains(storeName) && !storesToRestore.contains(storeName)) {
            LOG.debug("Removing task: {} store: {} from blob store. It is either no longer used, " + "or is no longer configured to be backed up or restored with blob store.", taskName, storeName);
            DirIndex dirIndex = scmAndSnapshotIndex.getRight().getDirIndex();
            Metadata requestMetadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.empty(), jobName, jobId, taskName, storeName);
            CompletionStage<Void> storeDeletionFuture = // delete files and sub-dirs previously marked for removal
            blobStoreUtil.cleanUpDir(dirIndex, requestMetadata).thenComposeAsync(v -> blobStoreUtil.deleteDir(dirIndex, requestMetadata), // deleted files and dirs still present
            executor).thenComposeAsync(v -> blobStoreUtil.deleteSnapshotIndexBlob(scmAndSnapshotIndex.getLeft(), requestMetadata), // delete the snapshot index blob
            executor);
            storeDeletionFutures.add(storeDeletionFuture);
        }
    });
    FutureUtil.allOf(storeDeletionFutures).join();
}
Also used : BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TaskModel(org.apache.samza.job.model.TaskModel) ArrayList(java.util.ArrayList) FileUtil(org.apache.samza.util.FileUtil) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Path(java.nio.file.Path) ExecutorService(java.util.concurrent.ExecutorService) FutureUtil(org.apache.samza.util.FutureUtil) StorageConfig(org.apache.samza.config.StorageConfig) ImmutableSet(com.google.common.collect.ImmutableSet) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Files(java.nio.file.Files) StorageManagerUtil(org.apache.samza.storage.StorageManagerUtil) Set(java.util.Set) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Checkpoint(org.apache.samza.checkpoint.Checkpoint) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) CompletionStage(java.util.concurrent.CompletionStage) TaskRestoreManager(org.apache.samza.storage.TaskRestoreManager) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Paths(java.nio.file.Paths) Optional(java.util.Optional) DirDiffUtil(org.apache.samza.storage.blobstore.util.DirDiffUtil) VisibleForTesting(com.google.common.annotations.VisibleForTesting) BlobStoreConfig(org.apache.samza.config.BlobStoreConfig) Config(org.apache.samza.config.Config) ArrayList(java.util.ArrayList) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) CompletionStage(java.util.concurrent.CompletionStage) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

DirIndex (org.apache.samza.storage.blobstore.index.DirIndex)39 Path (java.nio.file.Path)29 SnapshotMetadata (org.apache.samza.storage.blobstore.index.SnapshotMetadata)27 Test (org.junit.Test)26 File (java.io.File)25 SnapshotIndex (org.apache.samza.storage.blobstore.index.SnapshotIndex)25 ArrayList (java.util.ArrayList)23 Pair (org.apache.commons.lang3.tuple.Pair)23 CompletableFuture (java.util.concurrent.CompletableFuture)21 CompletionStage (java.util.concurrent.CompletionStage)20 CheckpointId (org.apache.samza.checkpoint.CheckpointId)20 SamzaException (org.apache.samza.SamzaException)19 DirDiff (org.apache.samza.storage.blobstore.diff.DirDiff)19 IOException (java.io.IOException)18 HashMap (java.util.HashMap)18 Checkpoint (org.apache.samza.checkpoint.Checkpoint)17 Files (java.nio.file.Files)16 List (java.util.List)16 Map (java.util.Map)16 Optional (java.util.Optional)16