Search in sources :

Example 1 with DirDiff

use of org.apache.samza.storage.blobstore.diff.DirDiff in project samza by apache.

the class DirDiffUtil method getDirDiff.

private static DirDiff getDirDiff(File localSnapshotDir, DirIndex remoteSnapshotDir, BiPredicate<File, FileIndex> areSameFile, boolean isRootDir) {
    Preconditions.checkState(localSnapshotDir != null && localSnapshotDir.isDirectory());
    Preconditions.checkNotNull(remoteSnapshotDir);
    LOG.debug("Creating DirDiff between local dir: {} and remote dir: {}", localSnapshotDir.getPath(), remoteSnapshotDir.getDirName());
    List<DirDiff> subDirsAdded = new ArrayList<>();
    List<DirDiff> subDirsRetained = new ArrayList<>();
    List<DirIndex> subDirsRemoved = new ArrayList<>();
    // list files returns empty list if local snapshot directory is empty
    List<File> localSnapshotFiles = Arrays.asList(Objects.requireNonNull(localSnapshotDir.listFiles(File::isFile)));
    List<FileIndex> remoteSnapshotFiles = remoteSnapshotDir.getFilesPresent();
    // list files returns empty list if local snapshot directory is empty
    List<File> localSnapshotSubDirs = Arrays.asList(Objects.requireNonNull(localSnapshotDir.listFiles(File::isDirectory)));
    Set<String> localSnapshotSubDirNames = localSnapshotSubDirs.stream().map(File::getName).collect(Collectors.toCollection(HashSet::new));
    List<DirIndex> remoteSnapshotSubDirs = remoteSnapshotDir.getSubDirsPresent();
    Set<String> remoteSnapshotSubDirNames = remoteSnapshotSubDirs.stream().map(DirIndex::getDirName).collect(Collectors.toCollection(HashSet::new));
    // TODO MED shesharm: this compares each file in directory 3 times. Categorize files in one traversal instead.
    List<File> filesToUpload = getNewFilesToUpload(remoteSnapshotFiles, localSnapshotFiles, areSameFile);
    List<FileIndex> filesToRetain = getFilesToRetain(remoteSnapshotFiles, localSnapshotFiles, areSameFile);
    List<FileIndex> filesToRemove = getFilesToRemove(remoteSnapshotFiles, localSnapshotFiles, areSameFile);
    for (File localSnapshotSubDir : localSnapshotSubDirs) {
        if (!remoteSnapshotSubDirNames.contains(localSnapshotSubDir.getName())) {
            LOG.debug("Subdir {} present in local snapshot but not in remote snapshot. " + "Recursively adding subdir contents.", localSnapshotSubDir.getPath());
            subDirsAdded.add(getDiffForNewDir(localSnapshotSubDir));
        } else {
            LOG.debug("Subdir {} present in local snapshot and in remote snapshot. " + "Recursively comparing local and remote subdirs.", localSnapshotSubDir.getPath());
            DirIndex remoteSubDirIndex = remoteSnapshotSubDirs.stream().filter(indexBlob -> indexBlob.getDirName().equals(localSnapshotSubDir.getName())).findFirst().get();
            subDirsRetained.add(getDirDiff(localSnapshotSubDir, remoteSubDirIndex, areSameFile, false));
        }
    }
    // 3. Subdir in remote snapshot but not in local snapshot
    for (DirIndex remoteSnapshotSubDir : remoteSnapshotSubDirs) {
        if (!localSnapshotSubDirNames.contains(remoteSnapshotSubDir.getDirName())) {
            LOG.debug("Subdir {} present in remote snapshot but not in local snapshot. " + "Marking for removal from remote snapshot. ", remoteSnapshotDir.getDirName());
            subDirsRemoved.add(remoteSnapshotSubDir);
        }
    }
    String dirName = isRootDir ? DirIndex.ROOT_DIR_NAME : localSnapshotDir.getName();
    return new DirDiff(dirName, filesToUpload, filesToRetain, filesToRemove, subDirsAdded, subDirsRetained, subDirsRemoved);
}
Also used : FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) ArrayList(java.util.ArrayList) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) File(java.io.File)

Example 2 with DirDiff

use of org.apache.samza.storage.blobstore.diff.DirDiff in project samza by apache.

the class BlobStoreBackupManager method upload.

@Override
public CompletableFuture<Map<String, String>> upload(CheckpointId checkpointId, Map<String, String> storeSCMs) {
    long uploadStartTime = System.nanoTime();
    // reset gauges for each upload
    metrics.filesToUpload.getValue().set(0L);
    metrics.bytesToUpload.getValue().set(0L);
    metrics.filesUploaded.getValue().set(0L);
    metrics.bytesUploaded.getValue().set(0L);
    metrics.filesRemaining.getValue().set(0L);
    metrics.bytesRemaining.getValue().set(0L);
    metrics.filesToRetain.getValue().set(0L);
    metrics.bytesToRetain.getValue().set(0L);
    // This map is used to atomically replace the prevStoreSnapshotIndexesFuture map at the end of the task commit
    Map<String, CompletableFuture<Pair<String, SnapshotIndex>>> storeToSCMAndSnapshotIndexPairFutures = new HashMap<>();
    // This map is used to return serialized State Checkpoint Markers to the caller
    Map<String, CompletableFuture<String>> storeToSerializedSCMFuture = new HashMap<>();
    storesToBackup.forEach((storeName) -> {
        long storeUploadStartTime = System.nanoTime();
        try {
            // metadata for the current store snapshot to upload
            SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
            // get the local store dir corresponding to the current checkpointId
            File storeDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDir, storeName, taskModel.getTaskName(), taskModel.getTaskMode());
            String checkpointDirPath = storageManagerUtil.getStoreCheckpointDir(storeDir, checkpointId);
            File checkpointDir = new File(checkpointDirPath);
            LOG.debug("Got task: {} store: {} storeDir: {} and checkpointDir: {}", taskName, storeName, storeDir, checkpointDir);
            // guaranteed to be available since a new task commit may not start until the previous one is complete
            Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = prevStoreSnapshotIndexesFuture.get(0, TimeUnit.MILLISECONDS);
            // get the previous store directory contents
            DirIndex prevDirIndex;
            if (prevStoreSnapshotIndexes.containsKey(storeName)) {
                prevDirIndex = prevStoreSnapshotIndexes.get(storeName).getRight().getDirIndex();
            } else {
                // no previous SnapshotIndex means that this is the first commit for this store. Create an empty DirIndex.
                prevDirIndex = new DirIndex(checkpointDir.getName(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
            }
            long dirDiffStartTime = System.nanoTime();
            // get the diff between previous and current store directories
            DirDiff dirDiff = DirDiffUtil.getDirDiff(checkpointDir, prevDirIndex, DirDiffUtil.areSameFile(false));
            metrics.storeDirDiffNs.get(storeName).update(System.nanoTime() - dirDiffStartTime);
            DirDiff.Stats stats = DirDiff.getStats(dirDiff);
            updateStoreDiffMetrics(storeName, stats);
            metrics.filesToUpload.getValue().addAndGet(stats.filesAdded);
            metrics.bytesToUpload.getValue().addAndGet(stats.bytesAdded);
            // Note: FilesRemaining metric is set to FilesAdded in the beginning of the current upload and then counted down
            // for each upload.
            metrics.filesRemaining.getValue().addAndGet(stats.filesAdded);
            metrics.bytesRemaining.getValue().addAndGet(stats.bytesAdded);
            metrics.filesToRetain.getValue().addAndGet(stats.filesRetained);
            metrics.bytesToRetain.getValue().addAndGet(stats.bytesRetained);
            // upload the diff to the blob store and get the new directory index
            CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
            CompletionStage<SnapshotIndex> snapshotIndexFuture = dirIndexFuture.thenApplyAsync(dirIndex -> {
                LOG.trace("Dir upload complete. Returning new SnapshotIndex for task: {} store: {}.", taskName, storeName);
                Optional<String> prevSnapshotIndexBlobId = Optional.ofNullable(prevStoreSnapshotIndexes.get(storeName)).map(Pair::getLeft);
                return new SnapshotIndex(clock.currentTimeMillis(), snapshotMetadata, dirIndex, prevSnapshotIndexBlobId);
            }, executor);
            // upload the new snapshot index to the blob store and get its blob id
            CompletionStage<String> snapshotIndexBlobIdFuture = snapshotIndexFuture.thenComposeAsync(si -> {
                LOG.trace("Uploading Snapshot index for task: {} store: {}", taskName, storeName);
                return blobStoreUtil.putSnapshotIndex(si);
            }, executor);
            // save store name and it's SnapshotIndex blob id and SnapshotIndex pair. At the end of the upload, atomically
            // update previous snapshot index map with this.
            CompletableFuture<Pair<String, SnapshotIndex>> scmAndSnapshotIndexPairFuture = FutureUtil.toFutureOfPair(Pair.of(snapshotIndexBlobIdFuture.toCompletableFuture(), snapshotIndexFuture.toCompletableFuture()));
            scmAndSnapshotIndexPairFuture.whenComplete((res, ex) -> {
                long uploadTimeNs = System.nanoTime() - storeUploadStartTime;
                metrics.storeUploadNs.get(storeName).update(uploadTimeNs);
            });
            storeToSCMAndSnapshotIndexPairFutures.put(storeName, scmAndSnapshotIndexPairFuture);
            storeToSerializedSCMFuture.put(storeName, snapshotIndexBlobIdFuture.toCompletableFuture());
        } catch (Exception e) {
            throw new SamzaException(String.format("Error uploading store snapshot to blob store for task: %s, store: %s, checkpointId: %s", taskName, storeName, checkpointId), e);
        }
    });
    // replace the previous storeName to snapshot index mapping with the new mapping.
    this.prevStoreSnapshotIndexesFuture = FutureUtil.toFutureOfMap(storeToSCMAndSnapshotIndexPairFutures);
    return FutureUtil.toFutureOfMap(storeToSerializedSCMFuture).whenComplete((res, ex) -> metrics.uploadNs.update(System.nanoTime() - uploadStartTime));
}
Also used : SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) HashMap(java.util.HashMap) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) SamzaException(org.apache.samza.SamzaException) SamzaException(org.apache.samza.SamzaException) CompletableFuture(java.util.concurrent.CompletableFuture) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) File(java.io.File) Pair(org.apache.commons.lang3.tuple.Pair)

Example 3 with DirDiff

use of org.apache.samza.storage.blobstore.diff.DirDiff in project samza by apache.

the class TestBlobStoreUtil method testCleanup.

@Test
public void testCleanup() throws IOException, ExecutionException, InterruptedException {
    BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
    // File, dir and recursive dir added, retained and removed in local
    // Using unique file names since test util uses only the file name (leaf node)
    // as the mock blob id, not the full file path.
    String local = "[a, c, z/1, y/2, p/m/3, q/n/4]";
    String remote = "[a, b, z/1, x/5, p/m/3, r/o/6]";
    String expectedRemoved = "[b, 5, 6]";
    // keep only the last character (the file name).
    SortedSet<String> expectedRemovedFiles = BlobStoreTestUtil.getExpected(expectedRemoved);
    // Set up environment
    Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
    String basePath = localSnapshotDir.toAbsolutePath().toString();
    DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
    SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
    DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
    BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
    when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture("blobId"));
    CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
    DirIndex dirIndex = null;
    try {
        // should be already complete. if not, future composition in putDir is broken.
        dirIndex = dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
    } catch (TimeoutException e) {
        fail("Future returned from putDir should be already complete.");
    }
    // Set up mocks
    SortedSet<String> allDeleted = new TreeSet<>();
    when(blobStoreManager.delete(anyString(), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<Void>>) invocation -> {
        String blobId = invocation.getArgumentAt(0, String.class);
        allDeleted.add(blobId);
        return CompletableFuture.completedFuture(null);
    });
    // Execute
    CompletionStage<Void> cleanUpFuture = blobStoreUtil.cleanUpDir(dirIndex, metadata);
    try {
        // should be already complete. if not, future composition in putDir is broken.
        cleanUpFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
    } catch (TimeoutException e) {
        fail("Future returned from putDir should be already complete.");
    }
    // Assert
    assertEquals(expectedRemovedFiles, allDeleted);
}
Also used : Path(java.nio.file.Path) SortedSet(java.util.SortedSet) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) FileTime(java.nio.file.attribute.FileTime) TimeoutException(java.util.concurrent.TimeoutException) Random(java.util.Random) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) FileUtil(org.apache.samza.util.FileUtil) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Path(java.nio.file.Path) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableSet(com.google.common.collect.ImmutableSet) PosixFileAttributes(java.nio.file.attribute.PosixFileAttributes) ImmutableMap(com.google.common.collect.ImmutableMap) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) CheckpointId(org.apache.samza.checkpoint.CheckpointId) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Optional(java.util.Optional) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) Matchers(org.mockito.Matchers) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Answer(org.mockito.stubbing.Answer) PosixFilePermissions(java.nio.file.attribute.PosixFilePermissions) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) OutputStream(java.io.OutputStream) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Ignore(org.junit.Ignore) Paths(java.nio.file.Paths) NullOutputStream(org.apache.commons.io.output.NullOutputStream) CRC32(java.util.zip.CRC32) Assert(org.junit.Assert) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) InputStream(java.io.InputStream) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) CompletableFuture(java.util.concurrent.CompletableFuture) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) TreeSet(java.util.TreeSet) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 4 with DirDiff

use of org.apache.samza.storage.blobstore.diff.DirDiff in project samza by apache.

the class TestBlobStoreUtil method testPutDirFailsIfAnyFileUploadFails.

@Test
public void testPutDirFailsIfAnyFileUploadFails() throws IOException, TimeoutException, InterruptedException {
    BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
    // File, dir and recursive dir added, retained and removed in local
    String local = "[a, b]";
    String remote = "[]";
    // Set up environment
    Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
    String basePath = localSnapshotDir.toAbsolutePath().toString();
    DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
    SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
    DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
    // Set up mocks
    SamzaException exception = new SamzaException("Error uploading file");
    CompletableFuture<String> failedFuture = new CompletableFuture<>();
    failedFuture.completeExceptionally(exception);
    when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<String>>) invocation -> {
        Metadata metadata = invocation.getArgumentAt(1, Metadata.class);
        String path = metadata.getPayloadPath();
        if (path.endsWith("a")) {
            return CompletableFuture.completedFuture("aBlobId");
        } else {
            return failedFuture;
        }
    });
    // Execute
    BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
    CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
    try {
        // should be already complete. if not, future composition in putDir is broken.
        dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
    } catch (ExecutionException e) {
        Throwable cause = e.getCause();
        // Assert that the result future fails and that the cause is propagated correctly
        assertEquals(exception, cause);
        return;
    }
    fail("DirIndex future should have been completed with an exception");
}
Also used : Path(java.nio.file.Path) SortedSet(java.util.SortedSet) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) FileTime(java.nio.file.attribute.FileTime) TimeoutException(java.util.concurrent.TimeoutException) Random(java.util.Random) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) FileUtil(org.apache.samza.util.FileUtil) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Path(java.nio.file.Path) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableSet(com.google.common.collect.ImmutableSet) PosixFileAttributes(java.nio.file.attribute.PosixFileAttributes) ImmutableMap(com.google.common.collect.ImmutableMap) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) CheckpointId(org.apache.samza.checkpoint.CheckpointId) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Optional(java.util.Optional) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) Matchers(org.mockito.Matchers) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Answer(org.mockito.stubbing.Answer) PosixFilePermissions(java.nio.file.attribute.PosixFilePermissions) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) OutputStream(java.io.OutputStream) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Ignore(org.junit.Ignore) Paths(java.nio.file.Paths) NullOutputStream(org.apache.commons.io.output.NullOutputStream) CRC32(java.util.zip.CRC32) Assert(org.junit.Assert) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) InputStream(java.io.InputStream) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) SamzaException(org.apache.samza.SamzaException) CompletableFuture(java.util.concurrent.CompletableFuture) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 5 with DirDiff

use of org.apache.samza.storage.blobstore.diff.DirDiff in project samza by apache.

the class TestBlobStoreUtil method testCleanUpFailsIfAnyFileDeleteFails.

@Test
public void testCleanUpFailsIfAnyFileDeleteFails() throws IOException, TimeoutException, InterruptedException, ExecutionException {
    BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
    // File, dir and recursive dir added, retained and removed in local
    // Using unique file names since test util uses only the file name (leaf node)
    // as the mock blob id, not the full file path.
    String local = "[a, b]";
    String remote = "[c, d]";
    // Set up environment
    Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
    String basePath = localSnapshotDir.toAbsolutePath().toString();
    DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
    SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
    DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
    BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
    when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture("blobId"));
    CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
    DirIndex dirIndex = null;
    try {
        // should be already complete. if not, future composition in putDir is broken.
        dirIndex = dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
    } catch (TimeoutException e) {
        fail("Future returned from putDir should be already complete.");
    }
    // Set up mocks
    SamzaException exception = new SamzaException("Error deleting file");
    CompletableFuture<Void> failedFuture = new CompletableFuture<>();
    failedFuture.completeExceptionally(exception);
    when(blobStoreManager.delete(anyString(), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<Void>>) invocation -> {
        String blobId = invocation.getArgumentAt(0, String.class);
        if (blobId.equals("c")) {
            return CompletableFuture.completedFuture(null);
        } else {
            return failedFuture;
        }
    });
    // Execute
    CompletionStage<Void> cleanUpFuture = blobStoreUtil.cleanUpDir(dirIndex, metadata);
    try {
        // should be already complete. if not, future composition in putDir is broken.
        cleanUpFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
    } catch (ExecutionException e) {
        Throwable cause = e.getCause();
        // Assert that the result future fails and that the cause is propagated correctly
        assertEquals(exception, cause);
        return;
    }
    fail("Clean up future should have been completed with an exception");
}
Also used : Path(java.nio.file.Path) SortedSet(java.util.SortedSet) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) FileTime(java.nio.file.attribute.FileTime) TimeoutException(java.util.concurrent.TimeoutException) Random(java.util.Random) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) FileUtil(org.apache.samza.util.FileUtil) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Path(java.nio.file.Path) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableSet(com.google.common.collect.ImmutableSet) PosixFileAttributes(java.nio.file.attribute.PosixFileAttributes) ImmutableMap(com.google.common.collect.ImmutableMap) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) CheckpointId(org.apache.samza.checkpoint.CheckpointId) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Optional(java.util.Optional) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) Matchers(org.mockito.Matchers) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Answer(org.mockito.stubbing.Answer) PosixFilePermissions(java.nio.file.attribute.PosixFilePermissions) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) OutputStream(java.io.OutputStream) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Ignore(org.junit.Ignore) Paths(java.nio.file.Paths) NullOutputStream(org.apache.commons.io.output.NullOutputStream) CRC32(java.util.zip.CRC32) Assert(org.junit.Assert) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) InputStream(java.io.InputStream) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) SamzaException(org.apache.samza.SamzaException) CompletableFuture(java.util.concurrent.CompletableFuture) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Aggregations

DirDiff (org.apache.samza.storage.blobstore.diff.DirDiff)15 DirIndex (org.apache.samza.storage.blobstore.index.DirIndex)14 File (java.io.File)13 ArrayList (java.util.ArrayList)12 Path (java.nio.file.Path)11 HashMap (java.util.HashMap)11 TreeSet (java.util.TreeSet)11 CompletableFuture (java.util.concurrent.CompletableFuture)11 Pair (org.apache.commons.lang3.tuple.Pair)11 SamzaException (org.apache.samza.SamzaException)11 SnapshotIndex (org.apache.samza.storage.blobstore.index.SnapshotIndex)11 SnapshotMetadata (org.apache.samza.storage.blobstore.index.SnapshotMetadata)11 Test (org.junit.Test)11 ImmutableMap (com.google.common.collect.ImmutableMap)10 IOException (java.io.IOException)10 Files (java.nio.file.Files)10 Collections (java.util.Collections)10 List (java.util.List)10 Map (java.util.Map)10 Optional (java.util.Optional)10