use of org.apache.samza.storage.blobstore.index.FileMetadata in project samza by apache.
the class BlobStoreUtil method putFile.
/**
* Upload a File to blob store.
* @param file File to upload to blob store.
* @return A future containing the {@link FileIndex} for the uploaded file.
*/
@VisibleForTesting
public CompletableFuture<FileIndex> putFile(File file, SnapshotMetadata snapshotMetadata) {
if (file == null || !file.isFile()) {
String message = file != null ? "Dir or Symbolic link" : "null";
throw new SamzaException(String.format("Required a non-null parameter of type file, provided: %s", message));
}
long putFileStartTime = System.nanoTime();
String opName = "putFile: " + file.getAbsolutePath();
Supplier<CompletionStage<FileIndex>> fileUploadAction = () -> {
LOG.debug("Putting file: {} to blob store.", file.getPath());
CompletableFuture<FileIndex> fileBlobFuture;
CheckedInputStream inputStream = null;
try {
// TODO HIGH shesharm maybe use the more efficient CRC32C / PureJavaCRC32 impl
inputStream = new CheckedInputStream(new FileInputStream(file), new CRC32());
CheckedInputStream finalInputStream = inputStream;
FileMetadata fileMetadata = FileMetadata.fromFile(file);
if (backupMetrics != null) {
backupMetrics.avgFileSizeBytes.update(fileMetadata.getSize());
}
Metadata metadata = new Metadata(file.getAbsolutePath(), Optional.of(fileMetadata.getSize()), snapshotMetadata.getJobName(), snapshotMetadata.getJobId(), snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName());
fileBlobFuture = blobStoreManager.put(inputStream, metadata).thenApplyAsync(id -> {
LOG.trace("Put complete. Received Blob ID {}. Closing input stream for file: {}.", id, file.getPath());
try {
finalInputStream.close();
} catch (Exception e) {
throw new SamzaException(String.format("Error closing input stream for file: %s", file.getAbsolutePath()), e);
}
LOG.trace("Returning new FileIndex for file: {}.", file.getPath());
return new FileIndex(file.getName(), Collections.singletonList(new FileBlob(id, 0)), fileMetadata, finalInputStream.getChecksum().getValue());
}, executor).toCompletableFuture();
} catch (Exception e) {
try {
if (inputStream != null) {
inputStream.close();
}
} catch (Exception err) {
LOG.error("Error closing input stream for file: {}", file.getName(), err);
}
LOG.error("Error putting file: {}", file.getName(), e);
throw new SamzaException(String.format("Error putting file %s", file.getAbsolutePath()), e);
}
return fileBlobFuture;
};
return FutureUtil.executeAsyncWithRetries(opName, fileUploadAction, isCauseNonRetriable(), executor).whenComplete((res, ex) -> {
if (backupMetrics != null) {
backupMetrics.avgFileUploadNs.update(System.nanoTime() - putFileStartTime);
long fileSize = file.length();
backupMetrics.uploadRate.inc(fileSize);
backupMetrics.filesUploaded.getValue().addAndGet(1);
backupMetrics.bytesUploaded.getValue().addAndGet(fileSize);
backupMetrics.filesRemaining.getValue().addAndGet(-1);
backupMetrics.bytesRemaining.getValue().addAndGet(-1 * fileSize);
}
});
}
use of org.apache.samza.storage.blobstore.index.FileMetadata in project samza by apache.
the class BlobStoreTestUtil method createFileIndex.
private static FileIndex createFileIndex(String filePath, DirTreeNode node) {
long checksum;
FileMetadata fileMetadata;
try {
Path path = Paths.get(filePath);
Checksum crc32 = new CRC32();
byte[] fileBytes = Files.readAllBytes(path);
crc32.update(fileBytes, 0, fileBytes.length);
checksum = crc32.getValue();
fileMetadata = FileMetadata.fromFile(path.toFile());
} catch (Exception e) {
throw new RuntimeException(e);
}
return new FileIndex(node.fileName, ImmutableList.of(new FileBlob(node.fileName, 0)), fileMetadata, checksum);
}
use of org.apache.samza.storage.blobstore.index.FileMetadata in project samza by apache.
the class TestBlobStoreUtil method testRestoreDirRestoresMultiPartFilesCorrectly.
@Test
public void testRestoreDirRestoresMultiPartFilesCorrectly() throws IOException {
Path restoreDirBasePath = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
// remote file == 26 blobs, blob ids from a to z, blob contents from a to z, offsets 0 to 25.
DirIndex mockDirIndex = mock(DirIndex.class);
when(mockDirIndex.getDirName()).thenReturn(DirIndex.ROOT_DIR_NAME);
FileIndex mockFileIndex = mock(FileIndex.class);
when(mockFileIndex.getFileName()).thenReturn("1.sst");
// setup mock file attributes. create a temp file to get current user/group/permissions so that they
// match with restored files.
File tmpFile = Paths.get(restoreDirBasePath.toString(), "tempfile-" + new Random().nextInt()).toFile();
tmpFile.createNewFile();
PosixFileAttributes attrs = Files.readAttributes(tmpFile.toPath(), PosixFileAttributes.class);
FileMetadata fileMetadata = new // ctime mtime does not matter. size == 26
FileMetadata(// ctime mtime does not matter. size == 26
1234L, // ctime mtime does not matter. size == 26
1243L, // ctime mtime does not matter. size == 26
26, attrs.owner().getName(), attrs.group().getName(), PosixFilePermissions.toString(attrs.permissions()));
when(mockFileIndex.getFileMetadata()).thenReturn(fileMetadata);
// delete so that it doesn't show up in restored dir contents.
Files.delete(tmpFile.toPath());
List<FileBlob> mockFileBlobs = new ArrayList<>();
StringBuilder fileContents = new StringBuilder();
for (int i = 0; i < 26; i++) {
FileBlob mockFileBlob = mock(FileBlob.class);
char c = (char) ('a' + i);
// blob contents == blobId
fileContents.append(c);
when(mockFileBlob.getBlobId()).thenReturn(String.valueOf(c));
when(mockFileBlob.getOffset()).thenReturn(i);
mockFileBlobs.add(mockFileBlob);
}
when(mockFileIndex.getBlobs()).thenReturn(mockFileBlobs);
CRC32 checksum = new CRC32();
checksum.update(fileContents.toString().getBytes());
when(mockFileIndex.getChecksum()).thenReturn(checksum.getValue());
when(mockDirIndex.getFilesPresent()).thenReturn(ImmutableList.of(mockFileIndex));
BlobStoreManager mockBlobStoreManager = mock(BlobStoreManager.class);
when(mockBlobStoreManager.get(anyString(), any(OutputStream.class), any(Metadata.class))).thenAnswer((Answer<CompletionStage<Void>>) invocationOnMock -> {
String blobId = invocationOnMock.getArgumentAt(0, String.class);
OutputStream outputStream = invocationOnMock.getArgumentAt(1, OutputStream.class);
outputStream.write(blobId.getBytes());
((FileOutputStream) outputStream).getFD().sync();
return CompletableFuture.completedFuture(null);
});
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(mockBlobStoreManager, EXECUTOR, null, null);
blobStoreUtil.restoreDir(restoreDirBasePath.toFile(), mockDirIndex, metadata).join();
assertTrue(new DirDiffUtil().areSameDir(Collections.emptySet(), false).test(restoreDirBasePath.toFile(), mockDirIndex));
}
use of org.apache.samza.storage.blobstore.index.FileMetadata in project samza by apache.
the class TestBlobStoreUtil method testAreSameFile.
@Test
public void testAreSameFile() throws IOException {
FileUtil fileUtil = new FileUtil();
// 1. test with sst file with same attributes
Path sstFile = Files.createTempFile("samza-testAreSameFiles-", ".sst");
PosixFileAttributes sstFileAttribs = Files.readAttributes(sstFile, PosixFileAttributes.class);
FileMetadata sstFileMetadata = new FileMetadata(sstFileAttribs.creationTime().toMillis(), sstFileAttribs.lastModifiedTime().toMillis(), sstFileAttribs.size(), sstFileAttribs.owner().toString(), sstFileAttribs.group().toString(), PosixFilePermissions.toString(sstFileAttribs.permissions()));
// checksum should be ignored for sst file. Set any dummy value
FileIndex sstFileIndex = new FileIndex(sstFile.getFileName().toString(), Collections.emptyList(), sstFileMetadata, 0L);
assertTrue(DirDiffUtil.areSameFile(false).test(sstFile.toFile(), sstFileIndex));
// 2. test with sst file with different timestamps
// Update last modified time
Files.setLastModifiedTime(sstFile, FileTime.fromMillis(System.currentTimeMillis() + 1000L));
assertTrue(DirDiffUtil.areSameFile(false).test(sstFile.toFile(), sstFileIndex));
// 3. test with non-sst files with same metadata and content
Path tmpFile = Files.createTempFile("samza-testAreSameFiles-", ".tmp");
fileUtil.writeToTextFile(tmpFile.toFile(), RandomStringUtils.random(1000), false);
PosixFileAttributes tmpFileAttribs = Files.readAttributes(tmpFile, PosixFileAttributes.class);
FileMetadata tmpFileMetadata = new FileMetadata(tmpFileAttribs.creationTime().toMillis(), tmpFileAttribs.lastModifiedTime().toMillis(), tmpFileAttribs.size(), tmpFileAttribs.owner().toString(), tmpFileAttribs.group().toString(), PosixFilePermissions.toString(tmpFileAttribs.permissions()));
FileIndex tmpFileIndex = new FileIndex(tmpFile.getFileName().toString(), Collections.emptyList(), tmpFileMetadata, FileUtils.checksumCRC32(tmpFile.toFile()));
assertTrue(DirDiffUtil.areSameFile(false).test(tmpFile.toFile(), tmpFileIndex));
// 4. test with non-sst files with different attributes
// change lastModifiedTime of local file
FileTime prevLastModified = tmpFileAttribs.lastModifiedTime();
Files.setLastModifiedTime(tmpFile, FileTime.fromMillis(System.currentTimeMillis() + 1000L));
assertTrue(DirDiffUtil.areSameFile(false).test(tmpFile.toFile(), tmpFileIndex));
// change content/checksum of local file
// reset attributes to match with remote file
Files.setLastModifiedTime(tmpFile, prevLastModified);
// new content
fileUtil.writeToTextFile(tmpFile.toFile(), RandomStringUtils.random(1000), false);
assertFalse(DirDiffUtil.areSameFile(false).test(tmpFile.toFile(), tmpFileIndex));
}
use of org.apache.samza.storage.blobstore.index.FileMetadata in project samza by apache.
the class TestBlobStoreUtil method testRestoreDirFailsRestoreOnNonRetriableExceptions.
@Test
public void testRestoreDirFailsRestoreOnNonRetriableExceptions() throws IOException {
Path restoreDirBasePath = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
DirIndex mockDirIndex = mock(DirIndex.class);
when(mockDirIndex.getDirName()).thenReturn(DirIndex.ROOT_DIR_NAME);
FileIndex mockFileIndex = mock(FileIndex.class);
when(mockFileIndex.getFileName()).thenReturn("1.sst");
// setup mock file attributes. create a temp file to get current user/group/permissions so that they
// match with restored files.
File tmpFile = Paths.get(restoreDirBasePath.toString(), "tempfile-" + new Random().nextInt()).toFile();
tmpFile.createNewFile();
byte[] fileContents = "fileContents".getBytes();
PosixFileAttributes attrs = Files.readAttributes(tmpFile.toPath(), PosixFileAttributes.class);
FileMetadata fileMetadata = new // ctime mtime does not matter. size == 26
FileMetadata(// ctime mtime does not matter. size == 26
1234L, // ctime mtime does not matter. size == 26
1243L, // ctime mtime does not matter. size == 26
fileContents.length, attrs.owner().getName(), attrs.group().getName(), PosixFilePermissions.toString(attrs.permissions()));
when(mockFileIndex.getFileMetadata()).thenReturn(fileMetadata);
// delete so that it doesn't show up in restored dir contents.
Files.delete(tmpFile.toPath());
List<FileBlob> mockFileBlobs = new ArrayList<>();
FileBlob mockFileBlob = mock(FileBlob.class);
when(mockFileBlob.getBlobId()).thenReturn("fileBlobId");
when(mockFileBlob.getOffset()).thenReturn(0);
mockFileBlobs.add(mockFileBlob);
when(mockFileIndex.getBlobs()).thenReturn(mockFileBlobs);
CRC32 checksum = new CRC32();
checksum.update(fileContents);
when(mockFileIndex.getChecksum()).thenReturn(checksum.getValue());
when(mockDirIndex.getFilesPresent()).thenReturn(ImmutableList.of(mockFileIndex));
BlobStoreManager mockBlobStoreManager = mock(BlobStoreManager.class);
when(mockBlobStoreManager.get(anyString(), any(OutputStream.class), any(Metadata.class))).thenReturn(// non retriable error
FutureUtil.failedFuture(new IllegalArgumentException())).thenAnswer((Answer<CompletionStage<Void>>) invocationOnMock -> {
String blobId = invocationOnMock.getArgumentAt(0, String.class);
OutputStream outputStream = invocationOnMock.getArgumentAt(1, OutputStream.class);
outputStream.write(fileContents);
((FileOutputStream) outputStream).getFD().sync();
return CompletableFuture.completedFuture(null);
});
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(mockBlobStoreManager, EXECUTOR, null, null);
try {
blobStoreUtil.restoreDir(restoreDirBasePath.toFile(), mockDirIndex, metadata).join();
fail("Should have failed on non-retriable errors during file restore");
} catch (CompletionException e) {
assertTrue(e.getCause() instanceof IllegalArgumentException);
}
}
Aggregations