use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.
the class TestBlobStoreUtil method testRestoreDirRestoresMultiPartFilesCorrectly.
@Test
public void testRestoreDirRestoresMultiPartFilesCorrectly() throws IOException {
Path restoreDirBasePath = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
// remote file == 26 blobs, blob ids from a to z, blob contents from a to z, offsets 0 to 25.
DirIndex mockDirIndex = mock(DirIndex.class);
when(mockDirIndex.getDirName()).thenReturn(DirIndex.ROOT_DIR_NAME);
FileIndex mockFileIndex = mock(FileIndex.class);
when(mockFileIndex.getFileName()).thenReturn("1.sst");
// setup mock file attributes. create a temp file to get current user/group/permissions so that they
// match with restored files.
File tmpFile = Paths.get(restoreDirBasePath.toString(), "tempfile-" + new Random().nextInt()).toFile();
tmpFile.createNewFile();
PosixFileAttributes attrs = Files.readAttributes(tmpFile.toPath(), PosixFileAttributes.class);
FileMetadata fileMetadata = new // ctime mtime does not matter. size == 26
FileMetadata(// ctime mtime does not matter. size == 26
1234L, // ctime mtime does not matter. size == 26
1243L, // ctime mtime does not matter. size == 26
26, attrs.owner().getName(), attrs.group().getName(), PosixFilePermissions.toString(attrs.permissions()));
when(mockFileIndex.getFileMetadata()).thenReturn(fileMetadata);
// delete so that it doesn't show up in restored dir contents.
Files.delete(tmpFile.toPath());
List<FileBlob> mockFileBlobs = new ArrayList<>();
StringBuilder fileContents = new StringBuilder();
for (int i = 0; i < 26; i++) {
FileBlob mockFileBlob = mock(FileBlob.class);
char c = (char) ('a' + i);
// blob contents == blobId
fileContents.append(c);
when(mockFileBlob.getBlobId()).thenReturn(String.valueOf(c));
when(mockFileBlob.getOffset()).thenReturn(i);
mockFileBlobs.add(mockFileBlob);
}
when(mockFileIndex.getBlobs()).thenReturn(mockFileBlobs);
CRC32 checksum = new CRC32();
checksum.update(fileContents.toString().getBytes());
when(mockFileIndex.getChecksum()).thenReturn(checksum.getValue());
when(mockDirIndex.getFilesPresent()).thenReturn(ImmutableList.of(mockFileIndex));
BlobStoreManager mockBlobStoreManager = mock(BlobStoreManager.class);
when(mockBlobStoreManager.get(anyString(), any(OutputStream.class), any(Metadata.class))).thenAnswer((Answer<CompletionStage<Void>>) invocationOnMock -> {
String blobId = invocationOnMock.getArgumentAt(0, String.class);
OutputStream outputStream = invocationOnMock.getArgumentAt(1, OutputStream.class);
outputStream.write(blobId.getBytes());
((FileOutputStream) outputStream).getFD().sync();
return CompletableFuture.completedFuture(null);
});
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(mockBlobStoreManager, EXECUTOR, null, null);
blobStoreUtil.restoreDir(restoreDirBasePath.toFile(), mockDirIndex, metadata).join();
assertTrue(new DirDiffUtil().areSameDir(Collections.emptySet(), false).test(restoreDirBasePath.toFile(), mockDirIndex));
}
use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.
the class BlobStoreUtil method getFile.
/**
* Gets a file from the blob store.
* @param fileBlobs List of {@link FileBlob}s that constitute this file.
* @param fileToRestore File pointing to the local path where the file will be restored.
* @param requestMetadata {@link Metadata} associated with this request
* @return a future that completes when the file is downloaded and written or if an exception occurs.
*/
@VisibleForTesting
CompletableFuture<Void> getFile(List<FileBlob> fileBlobs, File fileToRestore, Metadata requestMetadata) {
FileOutputStream outputStream = null;
try {
long restoreFileStartTime = System.nanoTime();
if (fileToRestore.exists()) {
// delete the file if it already exists, e.g. from a previous retry.
Files.delete(fileToRestore.toPath());
}
outputStream = new FileOutputStream(fileToRestore);
final FileOutputStream finalOutputStream = outputStream;
// TODO HIGH shesharm add integration tests to ensure empty files and directories are handled correctly E2E.
// create file for 0 byte files (fileIndex entry but no fileBlobs).
fileToRestore.createNewFile();
// create a copy to ensure list being sorted is mutable.
List<FileBlob> fileBlobsCopy = new ArrayList<>(fileBlobs);
// sort by offset.
fileBlobsCopy.sort(Comparator.comparingInt(FileBlob::getOffset));
// chain the futures such that write to file for blobs is sequential.
// can be optimized to write concurrently to the file later.
CompletableFuture<Void> resultFuture = CompletableFuture.completedFuture(null);
for (FileBlob fileBlob : fileBlobsCopy) {
resultFuture = resultFuture.thenComposeAsync(v -> {
LOG.debug("Starting restore for file: {} with blob id: {} at offset: {}", fileToRestore, fileBlob.getBlobId(), fileBlob.getOffset());
return blobStoreManager.get(fileBlob.getBlobId(), finalOutputStream, requestMetadata);
}, executor);
}
resultFuture = resultFuture.thenRunAsync(() -> {
LOG.debug("Finished restore for file: {}. Closing output stream.", fileToRestore);
try {
// flush the file contents to disk
finalOutputStream.getFD().sync();
finalOutputStream.close();
} catch (Exception e) {
throw new SamzaException(String.format("Error closing output stream for file: %s", fileToRestore.getAbsolutePath()), e);
}
}, executor);
resultFuture.whenComplete((res, ex) -> {
if (restoreMetrics != null) {
restoreMetrics.avgFileRestoreNs.update(System.nanoTime() - restoreFileStartTime);
long fileSize = requestMetadata.getPayloadSize();
restoreMetrics.restoreRate.inc(fileSize);
restoreMetrics.filesRestored.getValue().addAndGet(1);
restoreMetrics.bytesRestored.getValue().addAndGet(fileSize);
restoreMetrics.filesRemaining.getValue().addAndGet(-1);
restoreMetrics.bytesRemaining.getValue().addAndGet(-1 * fileSize);
}
});
return resultFuture;
} catch (Exception exception) {
try {
if (outputStream != null) {
outputStream.close();
}
} catch (Exception err) {
LOG.error("Error closing output stream for file: {}", fileToRestore.getAbsolutePath(), err);
}
throw new SamzaException(String.format("Error restoring file: %s in path: %s", fileToRestore.getName(), requestMetadata.getPayloadPath()), exception);
}
}
use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.
the class TestBlobStoreUtil method testRestoreDirFailsRestoreOnNonRetriableExceptions.
@Test
public void testRestoreDirFailsRestoreOnNonRetriableExceptions() throws IOException {
Path restoreDirBasePath = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
DirIndex mockDirIndex = mock(DirIndex.class);
when(mockDirIndex.getDirName()).thenReturn(DirIndex.ROOT_DIR_NAME);
FileIndex mockFileIndex = mock(FileIndex.class);
when(mockFileIndex.getFileName()).thenReturn("1.sst");
// setup mock file attributes. create a temp file to get current user/group/permissions so that they
// match with restored files.
File tmpFile = Paths.get(restoreDirBasePath.toString(), "tempfile-" + new Random().nextInt()).toFile();
tmpFile.createNewFile();
byte[] fileContents = "fileContents".getBytes();
PosixFileAttributes attrs = Files.readAttributes(tmpFile.toPath(), PosixFileAttributes.class);
FileMetadata fileMetadata = new // ctime mtime does not matter. size == 26
FileMetadata(// ctime mtime does not matter. size == 26
1234L, // ctime mtime does not matter. size == 26
1243L, // ctime mtime does not matter. size == 26
fileContents.length, attrs.owner().getName(), attrs.group().getName(), PosixFilePermissions.toString(attrs.permissions()));
when(mockFileIndex.getFileMetadata()).thenReturn(fileMetadata);
// delete so that it doesn't show up in restored dir contents.
Files.delete(tmpFile.toPath());
List<FileBlob> mockFileBlobs = new ArrayList<>();
FileBlob mockFileBlob = mock(FileBlob.class);
when(mockFileBlob.getBlobId()).thenReturn("fileBlobId");
when(mockFileBlob.getOffset()).thenReturn(0);
mockFileBlobs.add(mockFileBlob);
when(mockFileIndex.getBlobs()).thenReturn(mockFileBlobs);
CRC32 checksum = new CRC32();
checksum.update(fileContents);
when(mockFileIndex.getChecksum()).thenReturn(checksum.getValue());
when(mockDirIndex.getFilesPresent()).thenReturn(ImmutableList.of(mockFileIndex));
BlobStoreManager mockBlobStoreManager = mock(BlobStoreManager.class);
when(mockBlobStoreManager.get(anyString(), any(OutputStream.class), any(Metadata.class))).thenReturn(// non retriable error
FutureUtil.failedFuture(new IllegalArgumentException())).thenAnswer((Answer<CompletionStage<Void>>) invocationOnMock -> {
String blobId = invocationOnMock.getArgumentAt(0, String.class);
OutputStream outputStream = invocationOnMock.getArgumentAt(1, OutputStream.class);
outputStream.write(fileContents);
((FileOutputStream) outputStream).getFD().sync();
return CompletableFuture.completedFuture(null);
});
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(mockBlobStoreManager, EXECUTOR, null, null);
try {
blobStoreUtil.restoreDir(restoreDirBasePath.toFile(), mockDirIndex, metadata).join();
fail("Should have failed on non-retriable errors during file restore");
} catch (CompletionException e) {
assertTrue(e.getCause() instanceof IllegalArgumentException);
}
}
use of org.apache.samza.storage.blobstore.index.FileBlob in project samza by apache.
the class TestBlobStoreUtil method testRestoreDirRetriesFileRestoreOnRetriableExceptions.
@Test
public void testRestoreDirRetriesFileRestoreOnRetriableExceptions() throws IOException {
Path restoreDirBasePath = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
DirIndex mockDirIndex = mock(DirIndex.class);
when(mockDirIndex.getDirName()).thenReturn(DirIndex.ROOT_DIR_NAME);
FileIndex mockFileIndex = mock(FileIndex.class);
when(mockFileIndex.getFileName()).thenReturn("1.sst");
// setup mock file attributes. create a temp file to get current user/group/permissions so that they
// match with restored files.
File tmpFile = Paths.get(restoreDirBasePath.toString(), "tempfile-" + new Random().nextInt()).toFile();
tmpFile.createNewFile();
byte[] fileContents = "fileContents".getBytes();
PosixFileAttributes attrs = Files.readAttributes(tmpFile.toPath(), PosixFileAttributes.class);
FileMetadata fileMetadata = new // ctime mtime does not matter. size == 26
FileMetadata(// ctime mtime does not matter. size == 26
1234L, // ctime mtime does not matter. size == 26
1243L, // ctime mtime does not matter. size == 26
fileContents.length, attrs.owner().getName(), attrs.group().getName(), PosixFilePermissions.toString(attrs.permissions()));
when(mockFileIndex.getFileMetadata()).thenReturn(fileMetadata);
// delete so that it doesn't show up in restored dir contents.
Files.delete(tmpFile.toPath());
List<FileBlob> mockFileBlobs = new ArrayList<>();
FileBlob mockFileBlob = mock(FileBlob.class);
when(mockFileBlob.getBlobId()).thenReturn("fileBlobId");
when(mockFileBlob.getOffset()).thenReturn(0);
mockFileBlobs.add(mockFileBlob);
when(mockFileIndex.getBlobs()).thenReturn(mockFileBlobs);
CRC32 checksum = new CRC32();
checksum.update(fileContents);
when(mockFileIndex.getChecksum()).thenReturn(checksum.getValue());
when(mockDirIndex.getFilesPresent()).thenReturn(ImmutableList.of(mockFileIndex));
BlobStoreManager mockBlobStoreManager = mock(BlobStoreManager.class);
when(mockBlobStoreManager.get(anyString(), any(OutputStream.class), any(Metadata.class))).thenAnswer(// first try, retriable error
(Answer<CompletionStage<Void>>) invocationOnMock -> {
String blobId = invocationOnMock.getArgumentAt(0, String.class);
OutputStream outputStream = invocationOnMock.getArgumentAt(1, OutputStream.class);
outputStream.write("bad-data".getBytes());
((FileOutputStream) outputStream).getFD().sync();
return FutureUtil.failedFuture(new RetriableException());
}).thenAnswer(// 2nd try
(Answer<CompletionStage<Void>>) invocationOnMock -> {
String blobId = invocationOnMock.getArgumentAt(0, String.class);
OutputStream outputStream = invocationOnMock.getArgumentAt(1, OutputStream.class);
outputStream.write(fileContents);
((FileOutputStream) outputStream).getFD().sync();
return CompletableFuture.completedFuture(null);
});
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(mockBlobStoreManager, EXECUTOR, null, null);
blobStoreUtil.restoreDir(restoreDirBasePath.toFile(), mockDirIndex, metadata).join();
assertTrue(new DirDiffUtil().areSameDir(Collections.emptySet(), false).test(restoreDirBasePath.toFile(), mockDirIndex));
}
Aggregations