Search in sources :

Example 11 with DirDiff

use of org.apache.samza.storage.blobstore.diff.DirDiff in project samza by apache.

the class TestBlobStoreUtil method testRemoveTTL.

@Test
public void testRemoveTTL() throws IOException, ExecutionException, InterruptedException {
    BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
    // File, dir and recursive dir added, retained and removed in local
    // Using unique file names since test setup returns it as the blob id
    String local = "[a, c, z/1, y/2, p/m/3, q/n/4]";
    String remote = "[a, b, z/1, x/5, p/m/3, r/o/6]";
    String expectedAdded = "[c, y/2, q/n/4]";
    String expectedRetained = "[a, z/1, p/m/3]";
    SortedSet<String> expectedAddedFiles = BlobStoreTestUtil.getExpected(expectedAdded);
    SortedSet<String> expectedRetainedFiles = BlobStoreTestUtil.getExpected(expectedRetained);
    SortedSet<String> expectedPresentFiles = new TreeSet<>(expectedAddedFiles);
    expectedPresentFiles.addAll(expectedRetainedFiles);
    // Set up environment
    Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
    String basePath = localSnapshotDir.toAbsolutePath().toString();
    DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
    SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
    DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
    when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<String>>) invocation -> {
        Metadata metadata = invocation.getArgumentAt(1, Metadata.class);
        String path = metadata.getPayloadPath();
        String fileName = path.substring(path.length() - 1);
        return CompletableFuture.completedFuture(fileName);
    });
    BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
    CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
    DirIndex dirIndex = null;
    try {
        // should be already complete. if not, future composition in putDir is broken.
        dirIndex = dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
    } catch (TimeoutException e) {
        fail("Future returned from putDir should be already complete.");
    }
    SnapshotIndex mockSnapshotIndex = mock(SnapshotIndex.class);
    when(mockSnapshotIndex.getSnapshotMetadata()).thenReturn(snapshotMetadata);
    when(mockSnapshotIndex.getDirIndex()).thenReturn(dirIndex);
    SortedSet<String> allTTLRemoved = new TreeSet<>();
    when(blobStoreManager.removeTTL(anyString(), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<String>>) invocation -> {
        String blobId = invocation.getArgumentAt(0, String.class);
        allTTLRemoved.add(blobId);
        return CompletableFuture.completedFuture(null);
    });
    // Execute
    blobStoreUtil.removeTTL("snapshotIndexBlobId", mockSnapshotIndex, metadata);
    // Assert
    SortedSet<String> expectedBlobIds = new TreeSet<>();
    // test uses unique file name (last char) as the blob ID.
    expectedPresentFiles.forEach(f -> expectedBlobIds.add(f.substring(f.length() - 1)));
    expectedBlobIds.add("snapshotIndexBlobId");
    assertEquals(expectedBlobIds, allTTLRemoved);
}
Also used : Path(java.nio.file.Path) SortedSet(java.util.SortedSet) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) FileTime(java.nio.file.attribute.FileTime) TimeoutException(java.util.concurrent.TimeoutException) Random(java.util.Random) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) FileUtil(org.apache.samza.util.FileUtil) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Path(java.nio.file.Path) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableSet(com.google.common.collect.ImmutableSet) PosixFileAttributes(java.nio.file.attribute.PosixFileAttributes) ImmutableMap(com.google.common.collect.ImmutableMap) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) CheckpointId(org.apache.samza.checkpoint.CheckpointId) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Optional(java.util.Optional) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) Matchers(org.mockito.Matchers) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Answer(org.mockito.stubbing.Answer) PosixFilePermissions(java.nio.file.attribute.PosixFilePermissions) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) OutputStream(java.io.OutputStream) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Ignore(org.junit.Ignore) Paths(java.nio.file.Paths) NullOutputStream(org.apache.commons.io.output.NullOutputStream) CRC32(java.util.zip.CRC32) Assert(org.junit.Assert) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) InputStream(java.io.InputStream) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) CompletableFuture(java.util.concurrent.CompletableFuture) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) TreeSet(java.util.TreeSet) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 12 with DirDiff

use of org.apache.samza.storage.blobstore.diff.DirDiff in project samza by apache.

the class TestBlobStoreUtil method testCleanUpFailsIfAnySubDirFileDeleteFails.

@Test
public void testCleanUpFailsIfAnySubDirFileDeleteFails() throws IOException, TimeoutException, InterruptedException, ExecutionException {
    BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
    // File, dir and recursive dir added, retained and removed in local
    // Using unique file names since test util uses only the file name (leaf node)
    // as the mock blob id, not the full file path.
    String local = "[a/1, b/2]";
    String remote = "[c/3, d/4]";
    // Set up environment
    Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
    String basePath = localSnapshotDir.toAbsolutePath().toString();
    DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
    SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
    DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
    BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
    when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture("blobId"));
    CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
    DirIndex dirIndex = null;
    try {
        // should be already complete. if not, future composition in putDir is broken.
        dirIndex = dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
    } catch (TimeoutException e) {
        fail("Future returned from putDir should be already complete.");
    }
    // Set up mocks
    SamzaException exception = new SamzaException("Error deleting file");
    CompletableFuture<Void> failedFuture = new CompletableFuture<>();
    failedFuture.completeExceptionally(exception);
    when(blobStoreManager.delete(anyString(), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<Void>>) invocation -> {
        String blobId = invocation.getArgumentAt(0, String.class);
        if (blobId.equals("3")) {
            return CompletableFuture.completedFuture(null);
        } else {
            return failedFuture;
        }
    });
    // Execute
    CompletionStage<Void> cleanUpFuture = blobStoreUtil.cleanUpDir(dirIndex, metadata);
    try {
        // should be already complete. if not, future composition in putDir is broken.
        cleanUpFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
    } catch (ExecutionException e) {
        Throwable cause = e.getCause();
        // Assert that the result future fails and that the cause is propagated correctly
        assertEquals(exception, cause);
        return;
    }
    fail("Clean up future should have been completed with an exception");
}
Also used : Path(java.nio.file.Path) SortedSet(java.util.SortedSet) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) FileTime(java.nio.file.attribute.FileTime) TimeoutException(java.util.concurrent.TimeoutException) Random(java.util.Random) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) FileUtil(org.apache.samza.util.FileUtil) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Path(java.nio.file.Path) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableSet(com.google.common.collect.ImmutableSet) PosixFileAttributes(java.nio.file.attribute.PosixFileAttributes) ImmutableMap(com.google.common.collect.ImmutableMap) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) CheckpointId(org.apache.samza.checkpoint.CheckpointId) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Optional(java.util.Optional) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) Matchers(org.mockito.Matchers) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Answer(org.mockito.stubbing.Answer) PosixFilePermissions(java.nio.file.attribute.PosixFilePermissions) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) OutputStream(java.io.OutputStream) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Ignore(org.junit.Ignore) Paths(java.nio.file.Paths) NullOutputStream(org.apache.commons.io.output.NullOutputStream) CRC32(java.util.zip.CRC32) Assert(org.junit.Assert) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) InputStream(java.io.InputStream) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) SamzaException(org.apache.samza.SamzaException) CompletableFuture(java.util.concurrent.CompletableFuture) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 13 with DirDiff

use of org.apache.samza.storage.blobstore.diff.DirDiff in project samza by apache.

the class TestBlobStoreBackupManager method testUploadWithPreviousCheckpoints.

@Test
public void testUploadWithPreviousCheckpoints() throws IOException {
    // Track directory for post cleanup
    List<String> checkpointDirsToClean = new ArrayList<>();
    // Setup: init back manager with previous checkpoints
    // indexBlobIdAndLocalRemoteSnapshotsPair = setupRemoteAndLocalSnapshots(true);
    Map<String, String> previousCheckpoints = // map store name, previous snapshot index blob id
    indexBlobIdAndLocalRemoteSnapshotsPair.entrySet().stream().collect(Collectors.toMap(e -> e.getValue().getLeft(), e -> e.getValue().getRight().getPrevSnapshotIndexBlobId().get()));
    Checkpoint checkpoint = new CheckpointV2(checkpointId, new HashMap<>(), ImmutableMap.of(BlobStoreStateBackendFactory.class.getName(), previousCheckpoints));
    when(blobStoreUtil.getStoreSnapshotIndexes(anyString(), anyString(), anyString(), any(Checkpoint.class), anySetOf(String.class))).thenCallRealMethod();
    blobStoreBackupManager.init(checkpoint);
    // mock: set task store dir to return corresponding test local store and create checkpoint dir
    ArgumentCaptor<String> stringCaptor = ArgumentCaptor.forClass(String.class);
    when(storageManagerUtil.getTaskStoreDir(any(File.class), stringCaptor.capture(), any(TaskName.class), any(TaskMode.class))).then((Answer<File>) invocation -> {
        String storeName = invocation.getArgumentAt(1, String.class);
        String snapshotIndexBlobId = testStoreNameAndSCMMap.get(storeName);
        String storeDir = indexBlobIdAndLocalRemoteSnapshotsPair.get(snapshotIndexBlobId).getLeft();
        try {
            BlobStoreTestUtil.createTestCheckpointDirectory(storeDir, checkpointId.serialize());
            checkpointDirsToClean.add(storeDir + "-" + checkpointId.serialize());
        } catch (IOException e) {
            Assert.fail("Couldn't create checkpoint directory. Test failed.");
        }
        return new File(storeDir);
    });
    ArgumentCaptor<File> storeDirCaptor = ArgumentCaptor.forClass(File.class);
    when(storageManagerUtil.getStoreCheckpointDir(storeDirCaptor.capture(), eq(checkpointId))).thenAnswer(new Answer<String>() {

        @Override
        public String answer(InvocationOnMock invocation) throws Throwable {
            File storeDir = invocation.getArgumentAt(0, File.class);
            return storeDir.getAbsolutePath() + "-" + checkpointId.serialize();
        }
    });
    // mock: mock putDir and capture DirDiff
    SortedSet<DirDiff> actualDirDiffs = new TreeSet<>(Comparator.comparing(DirDiff::getDirName));
    ArgumentCaptor<DirDiff> dirDiffCaptor = ArgumentCaptor.forClass(DirDiff.class);
    ArgumentCaptor<SnapshotMetadata> snapshotMetadataCaptor = ArgumentCaptor.forClass(SnapshotMetadata.class);
    when(blobStoreUtil.putDir(dirDiffCaptor.capture(), snapshotMetadataCaptor.capture())).then((Answer<CompletableFuture<DirIndex>>) invocation -> {
        DirDiff dirDiff = invocation.getArgumentAt(0, DirDiff.class);
        SnapshotMetadata snapshotMetadata = invocation.getArgumentAt(1, SnapshotMetadata.class);
        actualDirDiffs.add(dirDiff);
        SnapshotIndex snapshotIndex = testBlobStore.get(testStoreNameAndSCMMap.get(snapshotMetadata.getStoreName()));
        return CompletableFuture.completedFuture(snapshotIndex.getDirIndex());
    });
    // mock: mock putSnapshotIndex and capture previous snapshot index
    SortedSet<SnapshotIndex> expectedSnapshotIndexesUploaded = indexBlobIdAndLocalRemoteSnapshotsPair.values().stream().map(Pair::getRight).collect(Collectors.toCollection(() -> new TreeSet<>(Comparator.comparing(SnapshotIndex::getCreationTimeMillis))));
    SortedSet<SnapshotIndex> actualSnapshotIndexesUploaded = new TreeSet<>(Comparator.comparing(SnapshotIndex::getCreationTimeMillis));
    SortedSet<String> actualPreviousSnapshotIndexBlobIds = new TreeSet<>();
    SortedSet<String> expectedPreviousSnapshotIndexBlobIds = new TreeSet<>(previousCheckpoints.values());
    ArgumentCaptor<SnapshotIndex> snapshotIndexCaptor = ArgumentCaptor.forClass(SnapshotIndex.class);
    when(blobStoreUtil.putSnapshotIndex(snapshotIndexCaptor.capture())).then((Answer<CompletableFuture<String>>) invocation -> {
        SnapshotIndex snapshotIndex = invocation.getArgumentAt(0, SnapshotIndex.class);
        actualSnapshotIndexesUploaded.add(snapshotIndex);
        if (snapshotIndex.getPrevSnapshotIndexBlobId().isPresent()) {
            actualPreviousSnapshotIndexBlobIds.add(snapshotIndex.getPrevSnapshotIndexBlobId().get());
        }
        return CompletableFuture.completedFuture("random-blob-id");
    });
    // execute
    blobStoreBackupManager.upload(checkpointId, ImmutableMap.of());
    TreeSet<DirDiff> expectedDirDiffs = indexBlobIdAndLocalRemoteSnapshotsPair.values().stream().map(localRemoteSnapshotPair -> DirDiffUtil.getDirDiff(new File(localRemoteSnapshotPair.getLeft() + "-" + checkpointId.serialize()), localRemoteSnapshotPair.getRight().getDirIndex(), DirDiffUtil.areSameFile(false))).collect(Collectors.toCollection(() -> new TreeSet<>(Comparator.comparing(DirDiff::getDirName))));
    // assert - asset all DirDiff are put to blob store
    Assert.assertEquals(actualDirDiffs, expectedDirDiffs);
    // assert - assert no previous snapshot indexes were found
    Assert.assertEquals(actualPreviousSnapshotIndexBlobIds, expectedPreviousSnapshotIndexBlobIds);
    // assert - assert all snapshot indexes are uploaded
    Assert.assertEquals(actualSnapshotIndexesUploaded, expectedSnapshotIndexesUploaded);
    // cleanup
    checkpointDirsToClean.forEach(path -> {
        try {
            FileUtils.deleteDirectory(new File(path));
        } catch (IOException exception) {
            Assert.fail("Failed to cleanup temporary checkpoint dirs.");
        }
    });
}
Also used : SortedSet(java.util.SortedSet) TaskModel(org.apache.samza.job.model.TaskModel) Counter(org.apache.samza.metrics.Counter) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) StorageEngine(org.apache.samza.storage.StorageEngine) Path(java.nio.file.Path) MapConfig(org.apache.samza.config.MapConfig) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) StorageManagerUtil(org.apache.samza.storage.StorageManagerUtil) Checkpoint(org.apache.samza.checkpoint.Checkpoint) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) BlobStoreBackupManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreBackupManagerMetrics) Optional(java.util.Optional) Config(org.apache.samza.config.Config) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Mockito.eq(org.mockito.Mockito.eq) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) Matchers(org.mockito.Matchers) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) Gauge(org.apache.samza.metrics.Gauge) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ArgumentCaptor(org.mockito.ArgumentCaptor) BlobStoreTestUtil(org.apache.samza.storage.blobstore.util.BlobStoreTestUtil) Mockito.anyLong(org.mockito.Mockito.anyLong) ExecutorService(java.util.concurrent.ExecutorService) JobModel(org.apache.samza.job.model.JobModel) Before(org.junit.Before) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Files(java.nio.file.Files) Timer(org.apache.samza.metrics.Timer) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) File(java.io.File) SamzaException(org.apache.samza.SamzaException) AtomicLong(java.util.concurrent.atomic.AtomicLong) Mockito(org.mockito.Mockito) TaskMode(org.apache.samza.job.model.TaskMode) ContainerModel(org.apache.samza.job.model.ContainerModel) DirDiffUtil(org.apache.samza.storage.blobstore.util.DirDiffUtil) Comparator(java.util.Comparator) Assert(org.junit.Assert) Collections(java.util.Collections) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) ArrayList(java.util.ArrayList) TaskMode(org.apache.samza.job.model.TaskMode) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) CompletableFuture(java.util.concurrent.CompletableFuture) TreeSet(java.util.TreeSet) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) IOException(java.io.IOException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) TaskName(org.apache.samza.container.TaskName) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) InvocationOnMock(org.mockito.invocation.InvocationOnMock) File(java.io.File) Test(org.junit.Test)

Example 14 with DirDiff

use of org.apache.samza.storage.blobstore.diff.DirDiff in project samza by apache.

the class TestBlobStoreBackupManager method testUploadWithNoPreviousCheckpoints.

@Test
public void testUploadWithNoPreviousCheckpoints() throws IOException {
    // Track directory for post cleanup
    List<String> checkpointDirsToClean = new ArrayList<>();
    // Setup: init local/remote snapshots and back manager with no previous checkpoints
    indexBlobIdAndLocalRemoteSnapshotsPair = setupRemoteAndLocalSnapshots(false);
    Checkpoint checkpoint = new CheckpointV2(checkpointId, new HashMap<>(), ImmutableMap.of(BlobStoreStateBackendFactory.class.getName(), new HashMap<>()));
    blobStoreBackupManager.init(checkpoint);
    // mock: set task store dir to return corresponding test local store and create checkpoint dir
    ArgumentCaptor<String> stringCaptor = ArgumentCaptor.forClass(String.class);
    when(storageManagerUtil.getTaskStoreDir(any(File.class), stringCaptor.capture(), any(TaskName.class), any(TaskMode.class))).then((Answer<File>) invocation -> {
        String storeName = invocation.getArgumentAt(1, String.class);
        String snapshotIndexBlobId = testStoreNameAndSCMMap.get(storeName);
        String storeDir = indexBlobIdAndLocalRemoteSnapshotsPair.get(snapshotIndexBlobId).getLeft();
        try {
            BlobStoreTestUtil.createTestCheckpointDirectory(storeDir, checkpointId.serialize());
            checkpointDirsToClean.add(storeDir + "-" + checkpointId.serialize());
        } catch (IOException e) {
            Assert.fail("Couldn't create checkpoint directory. Test failed.");
        }
        return new File(storeDir);
    });
    ArgumentCaptor<File> storeDirCaptor = ArgumentCaptor.forClass(File.class);
    when(storageManagerUtil.getStoreCheckpointDir(storeDirCaptor.capture(), eq(checkpointId))).thenAnswer(new Answer<String>() {

        @Override
        public String answer(InvocationOnMock invocation) throws Throwable {
            File storeDir = invocation.getArgumentAt(0, File.class);
            return storeDir.getAbsolutePath() + "-" + checkpointId.serialize();
        }
    });
    SortedSet<DirDiff> actualDirDiffs = new TreeSet<>(Comparator.comparing(DirDiff::getDirName));
    // mock: mock putDir and capture DirDiff
    ArgumentCaptor<DirDiff> dirDiffCaptor = ArgumentCaptor.forClass(DirDiff.class);
    ArgumentCaptor<SnapshotMetadata> snapshotMetadataCaptor = ArgumentCaptor.forClass(SnapshotMetadata.class);
    when(blobStoreUtil.putDir(dirDiffCaptor.capture(), snapshotMetadataCaptor.capture())).then((Answer<CompletableFuture<DirIndex>>) invocation -> {
        DirDiff dirDiff = invocation.getArgumentAt(0, DirDiff.class);
        SnapshotMetadata snapshotMetadata = invocation.getArgumentAt(1, SnapshotMetadata.class);
        actualDirDiffs.add(dirDiff);
        SnapshotIndex snapshotIndex = testBlobStore.get(testStoreNameAndSCMMap.get(snapshotMetadata.getStoreName()));
        return CompletableFuture.completedFuture(snapshotIndex.getDirIndex());
    });
    SortedSet<SnapshotIndex> expectedSnapshotIndexesUploaded = indexBlobIdAndLocalRemoteSnapshotsPair.values().stream().map(Pair::getRight).collect(Collectors.toCollection(() -> new TreeSet<>(Comparator.comparing(SnapshotIndex::getCreationTimeMillis))));
    String expectedPreviousSnapshotIndexBlobId = "empty";
    // mock: mock putSnapshotIndex and capture previous snapshot index
    SortedSet<SnapshotIndex> actualSnapshotIndexesUploaded = new TreeSet<>(Comparator.comparing(SnapshotIndex::getCreationTimeMillis));
    final String[] actualPreviousSnapshotIndexBlobId = { "empty" };
    ArgumentCaptor<SnapshotIndex> snapshotIndexCaptor = ArgumentCaptor.forClass(SnapshotIndex.class);
    when(blobStoreUtil.putSnapshotIndex(snapshotIndexCaptor.capture())).then((Answer<CompletableFuture<String>>) invocation -> {
        SnapshotIndex snapshotIndex = invocation.getArgumentAt(0, SnapshotIndex.class);
        actualSnapshotIndexesUploaded.add(snapshotIndex);
        if (!snapshotIndex.getPrevSnapshotIndexBlobId().equals(Optional.empty())) {
            actualPreviousSnapshotIndexBlobId[0] = "not-empty";
        }
        return CompletableFuture.completedFuture("random-blob-id");
    });
    // execute
    blobStoreBackupManager.upload(checkpointId, testStoreNameAndSCMMap);
    // setup expected dir diffs after execute: needs checkpoint dirs created in upload()
    TreeSet<DirDiff> expectedDirDiffs = indexBlobIdAndLocalRemoteSnapshotsPair.values().stream().map(localRemoteSnapshotPair -> {
        File localCheckpointDir = new File(localRemoteSnapshotPair.getLeft() + "-" + checkpointId.serialize());
        DirIndex dirIndex = new DirIndex(localCheckpointDir.getName(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
        return DirDiffUtil.getDirDiff(localCheckpointDir, dirIndex, DirDiffUtil.areSameFile(false));
    }).collect(Collectors.toCollection(() -> new TreeSet<>(Comparator.comparing(DirDiff::getDirName))));
    // assert - asset all DirDiff are put to blob store
    Assert.assertEquals(actualDirDiffs, expectedDirDiffs);
    // assert - assert no previous snapshot indexes were found
    Assert.assertEquals(actualPreviousSnapshotIndexBlobId[0], expectedPreviousSnapshotIndexBlobId);
    // assert - assert all snapshot indexes are uploaded
    Assert.assertEquals(actualSnapshotIndexesUploaded, expectedSnapshotIndexesUploaded);
    // cleanup
    checkpointDirsToClean.forEach(path -> {
        try {
            FileUtils.deleteDirectory(new File(path));
        } catch (IOException exception) {
            Assert.fail("Failed to cleanup temporary checkpoint dirs.");
        }
    });
}
Also used : SortedSet(java.util.SortedSet) TaskModel(org.apache.samza.job.model.TaskModel) Counter(org.apache.samza.metrics.Counter) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) StorageEngine(org.apache.samza.storage.StorageEngine) Path(java.nio.file.Path) MapConfig(org.apache.samza.config.MapConfig) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) StorageManagerUtil(org.apache.samza.storage.StorageManagerUtil) Checkpoint(org.apache.samza.checkpoint.Checkpoint) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) BlobStoreBackupManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreBackupManagerMetrics) Optional(java.util.Optional) Config(org.apache.samza.config.Config) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Mockito.eq(org.mockito.Mockito.eq) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) Matchers(org.mockito.Matchers) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) Gauge(org.apache.samza.metrics.Gauge) Answer(org.mockito.stubbing.Answer) InvocationOnMock(org.mockito.invocation.InvocationOnMock) ArgumentCaptor(org.mockito.ArgumentCaptor) BlobStoreTestUtil(org.apache.samza.storage.blobstore.util.BlobStoreTestUtil) Mockito.anyLong(org.mockito.Mockito.anyLong) ExecutorService(java.util.concurrent.ExecutorService) JobModel(org.apache.samza.job.model.JobModel) Before(org.junit.Before) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Files(java.nio.file.Files) Timer(org.apache.samza.metrics.Timer) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) File(java.io.File) SamzaException(org.apache.samza.SamzaException) AtomicLong(java.util.concurrent.atomic.AtomicLong) Mockito(org.mockito.Mockito) TaskMode(org.apache.samza.job.model.TaskMode) ContainerModel(org.apache.samza.job.model.ContainerModel) DirDiffUtil(org.apache.samza.storage.blobstore.util.DirDiffUtil) Comparator(java.util.Comparator) Assert(org.junit.Assert) Collections(java.util.Collections) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TaskMode(org.apache.samza.job.model.TaskMode) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) CompletableFuture(java.util.concurrent.CompletableFuture) TreeSet(java.util.TreeSet) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) IOException(java.io.IOException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) TaskName(org.apache.samza.container.TaskName) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) InvocationOnMock(org.mockito.invocation.InvocationOnMock) File(java.io.File) Test(org.junit.Test)

Example 15 with DirDiff

use of org.apache.samza.storage.blobstore.diff.DirDiff in project samza by apache.

the class TestDirDiffUtil method testGetDirDiff.

@Test
public void testGetDirDiff() throws IOException {
    // Setup
    Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(this.local);
    String basePath = localSnapshotDir.toAbsolutePath().toString();
    DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(this.remote);
    // Execute
    DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
    SortedSet<String> allAdded = new TreeSet<>();
    SortedSet<String> allRemoved = new TreeSet<>();
    SortedSet<String> allRetained = new TreeSet<>();
    BlobStoreTestUtil.getAllAddedInDiff(basePath, dirDiff, allAdded);
    BlobStoreTestUtil.getAllRemovedInDiff("", dirDiff, allRemoved);
    BlobStoreTestUtil.getAllRetainedInDiff("", dirDiff, allRetained);
    // Assert
    SortedSet<String> expectedAddedFiles = BlobStoreTestUtil.getExpected(this.expectedAdded);
    SortedSet<String> expectedRetainedFiles = BlobStoreTestUtil.getExpected(this.expectedRetained);
    SortedSet<String> expectedRemovedFiles = BlobStoreTestUtil.getExpected(this.expectedRemoved);
    assertEquals(expectedAddedFiles, allAdded);
    assertEquals(expectedRetainedFiles, allRetained);
    assertEquals(expectedRemovedFiles, allRemoved);
}
Also used : Path(java.nio.file.Path) TreeSet(java.util.TreeSet) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) Test(org.junit.Test)

Aggregations

DirDiff (org.apache.samza.storage.blobstore.diff.DirDiff)15 DirIndex (org.apache.samza.storage.blobstore.index.DirIndex)14 File (java.io.File)13 ArrayList (java.util.ArrayList)12 Path (java.nio.file.Path)11 HashMap (java.util.HashMap)11 TreeSet (java.util.TreeSet)11 CompletableFuture (java.util.concurrent.CompletableFuture)11 Pair (org.apache.commons.lang3.tuple.Pair)11 SamzaException (org.apache.samza.SamzaException)11 SnapshotIndex (org.apache.samza.storage.blobstore.index.SnapshotIndex)11 SnapshotMetadata (org.apache.samza.storage.blobstore.index.SnapshotMetadata)11 Test (org.junit.Test)11 ImmutableMap (com.google.common.collect.ImmutableMap)10 IOException (java.io.IOException)10 Files (java.nio.file.Files)10 Collections (java.util.Collections)10 List (java.util.List)10 Map (java.util.Map)10 Optional (java.util.Optional)10