use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class TestBlobStoreUtil method testCleanUpFailsIfAnyFileDeleteFails.
@Test
public void testCleanUpFailsIfAnyFileDeleteFails() throws IOException, TimeoutException, InterruptedException, ExecutionException {
BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
// File, dir and recursive dir added, retained and removed in local
// Using unique file names since test util uses only the file name (leaf node)
// as the mock blob id, not the full file path.
String local = "[a, b]";
String remote = "[c, d]";
// Set up environment
Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
String basePath = localSnapshotDir.toAbsolutePath().toString();
DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture("blobId"));
CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
DirIndex dirIndex = null;
try {
// should be already complete. if not, future composition in putDir is broken.
dirIndex = dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
fail("Future returned from putDir should be already complete.");
}
// Set up mocks
SamzaException exception = new SamzaException("Error deleting file");
CompletableFuture<Void> failedFuture = new CompletableFuture<>();
failedFuture.completeExceptionally(exception);
when(blobStoreManager.delete(anyString(), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<Void>>) invocation -> {
String blobId = invocation.getArgumentAt(0, String.class);
if (blobId.equals("c")) {
return CompletableFuture.completedFuture(null);
} else {
return failedFuture;
}
});
// Execute
CompletionStage<Void> cleanUpFuture = blobStoreUtil.cleanUpDir(dirIndex, metadata);
try {
// should be already complete. if not, future composition in putDir is broken.
cleanUpFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
} catch (ExecutionException e) {
Throwable cause = e.getCause();
// Assert that the result future fails and that the cause is propagated correctly
assertEquals(exception, cause);
return;
}
fail("Clean up future should have been completed with an exception");
}
use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class TestBlobStoreUtil method testPutFileChecksumAndMetadata.
@Test
public void testPutFileChecksumAndMetadata() throws IOException, ExecutionException, InterruptedException {
// Setup
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
Path path = Files.createTempFile("samza-testPutFileChecksum-", ".tmp");
FileUtil fileUtil = new FileUtil();
fileUtil.writeToTextFile(path.toFile(), RandomStringUtils.random(1000), false);
long expectedChecksum = FileUtils.checksumCRC32(path.toFile());
BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
ArgumentCaptor<Metadata> argumentCaptor = ArgumentCaptor.forClass(Metadata.class);
when(blobStoreManager.put(any(InputStream.class), argumentCaptor.capture())).thenAnswer((Answer<CompletionStage<String>>) invocation -> {
InputStream inputStream = invocation.getArgumentAt(0, InputStream.class);
IOUtils.copy(inputStream, NullOutputStream.NULL_OUTPUT_STREAM);
return CompletableFuture.completedFuture("blobId");
});
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
CompletionStage<FileIndex> fileIndexFuture = blobStoreUtil.putFile(path.toFile(), snapshotMetadata);
FileIndex fileIndex = null;
try {
// should be already complete. if not, future composition in putFile is broken.
fileIndex = fileIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
fail("Future returned from putFile should be already complete.");
}
// Assert
Metadata metadata = (Metadata) argumentCaptor.getValue();
assertEquals(path.toAbsolutePath().toString(), metadata.getPayloadPath());
assertEquals(path.toFile().length(), Long.valueOf(metadata.getPayloadSize()).longValue());
assertEquals(expectedChecksum, fileIndex.getChecksum());
}
use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class TestBlobStoreUtil method testPutDirFailsIfAnySubDirFileUploadFails.
@Test
public void testPutDirFailsIfAnySubDirFileUploadFails() throws IOException, TimeoutException, InterruptedException {
BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
// File, dir and recursive dir added, retained and removed in local
String local = "[a/1, b/2]";
String remote = "[]";
// Set up environment
Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
String basePath = localSnapshotDir.toAbsolutePath().toString();
DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
// Set up mocks
SamzaException exception = new SamzaException("Error uploading file");
CompletableFuture<String> failedFuture = new CompletableFuture<>();
failedFuture.completeExceptionally(exception);
when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<String>>) invocation -> {
Metadata metadata = invocation.getArgumentAt(1, Metadata.class);
String path = metadata.getPayloadPath();
if (path.endsWith("1")) {
return CompletableFuture.completedFuture("a1BlobId");
} else {
return failedFuture;
}
});
// Execute
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
try {
// should be already complete. if not, future composition in putDir is broken.
dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
} catch (ExecutionException e) {
Throwable cause = e.getCause();
// Assert that the result future fails and that the cause is propagated correctly
assertEquals(exception, cause);
return;
}
fail("DirIndex future should have been completed with an exception");
}
use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class BlobStoreUtil method putDir.
/**
* Recursively upload all new files and upload or update contents of all subdirs in the {@link DirDiff} and return a
* Future containing the {@link DirIndex} associated with the directory.
* @param dirDiff diff for the contents of this directory
* @return A future with the {@link DirIndex} if the upload completed successfully.
*/
public CompletionStage<DirIndex> putDir(DirDiff dirDiff, SnapshotMetadata snapshotMetadata) {
// Upload all new files in the dir
List<File> filesToUpload = dirDiff.getFilesAdded();
List<CompletionStage<FileIndex>> fileFutures = filesToUpload.stream().map(file -> putFile(file, snapshotMetadata)).collect(Collectors.toList());
CompletableFuture<Void> allFilesFuture = CompletableFuture.allOf(fileFutures.toArray(new CompletableFuture[0]));
List<CompletionStage<DirIndex>> subDirFutures = new ArrayList<>();
// recursively upload all new subdirs of this dir
for (DirDiff subDirAdded : dirDiff.getSubDirsAdded()) {
subDirFutures.add(putDir(subDirAdded, snapshotMetadata));
}
// recursively update contents of all subdirs that are retained but might have been modified
for (DirDiff subDirRetained : dirDiff.getSubDirsRetained()) {
subDirFutures.add(putDir(subDirRetained, snapshotMetadata));
}
CompletableFuture<Void> allDirBlobsFuture = CompletableFuture.allOf(subDirFutures.toArray(new CompletableFuture[0]));
return CompletableFuture.allOf(allDirBlobsFuture, allFilesFuture).thenApplyAsync(f -> {
LOG.trace("All file and dir uploads complete for task: {} store: {}", snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName());
List<FileIndex> filesPresent = fileFutures.stream().map(blob -> blob.toCompletableFuture().join()).collect(Collectors.toList());
filesPresent.addAll(dirDiff.getFilesRetained());
List<DirIndex> subDirsPresent = subDirFutures.stream().map(subDir -> subDir.toCompletableFuture().join()).collect(Collectors.toList());
LOG.debug("Uploaded diff for task: {} store: {} with statistics: {}", snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName(), DirDiff.getStats(dirDiff));
LOG.trace("Returning new DirIndex for task: {} store: {}", snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName());
return new DirIndex(dirDiff.getDirName(), filesPresent, dirDiff.getFilesRemoved(), subDirsPresent, dirDiff.getSubDirsRemoved());
}, executor);
}
use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class TestBlobStoreUtil method testPutDir.
@Test
public // TODO HIGH shesharm test with empty (0 byte) files
void testPutDir() throws IOException, InterruptedException, ExecutionException {
BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
// File, dir and recursive dir added, retained and removed in local
String local = "[a, c, z/1, y/1, p/m/1, q/n/1]";
String remote = "[a, b, z/1, x/1, p/m/1, p/m/2, r/o/1]";
String expectedAdded = "[c, y/1, q/n/1]";
String expectedRetained = "[a, z/1, p/m/1]";
String expectedRemoved = "[b, x/1, r/o/1, p/m/2]";
SortedSet<String> expectedAddedFiles = BlobStoreTestUtil.getExpected(expectedAdded);
SortedSet<String> expectedRetainedFiles = BlobStoreTestUtil.getExpected(expectedRetained);
SortedSet<String> expectedPresentFiles = new TreeSet<>(expectedAddedFiles);
expectedPresentFiles.addAll(expectedRetainedFiles);
SortedSet<String> expectedRemovedFiles = BlobStoreTestUtil.getExpected(expectedRemoved);
// Set up environment
Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
String basePath = localSnapshotDir.toAbsolutePath().toString();
DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
SortedSet<String> allUploaded = new TreeSet<>();
// Set up mocks
when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<String>>) invocation -> {
Metadata metadata = invocation.getArgumentAt(1, Metadata.class);
String path = metadata.getPayloadPath();
allUploaded.add(path.substring(localSnapshotDir.toAbsolutePath().toString().length() + 1));
return CompletableFuture.completedFuture(path);
});
// Execute
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
DirIndex dirIndex = null;
try {
// should be already complete. if not, future composition in putDir is broken.
dirIndex = dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
fail("Future returned from putDir should be already complete.");
}
SortedSet<String> allPresent = new TreeSet<>();
SortedSet<String> allRemoved = new TreeSet<>();
BlobStoreTestUtil.getAllPresentInIndex("", dirIndex, allPresent);
BlobStoreTestUtil.getAllRemovedInIndex("", dirIndex, allRemoved);
// Assert
assertEquals(expectedAddedFiles, allUploaded);
assertEquals(expectedPresentFiles, allPresent);
assertEquals(expectedRemovedFiles, allRemoved);
}
Aggregations