use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.
the class TestBlobStoreUtil method testPutFileChecksumAndMetadata.
@Test
public void testPutFileChecksumAndMetadata() throws IOException, ExecutionException, InterruptedException {
// Setup
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
Path path = Files.createTempFile("samza-testPutFileChecksum-", ".tmp");
FileUtil fileUtil = new FileUtil();
fileUtil.writeToTextFile(path.toFile(), RandomStringUtils.random(1000), false);
long expectedChecksum = FileUtils.checksumCRC32(path.toFile());
BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
ArgumentCaptor<Metadata> argumentCaptor = ArgumentCaptor.forClass(Metadata.class);
when(blobStoreManager.put(any(InputStream.class), argumentCaptor.capture())).thenAnswer((Answer<CompletionStage<String>>) invocation -> {
InputStream inputStream = invocation.getArgumentAt(0, InputStream.class);
IOUtils.copy(inputStream, NullOutputStream.NULL_OUTPUT_STREAM);
return CompletableFuture.completedFuture("blobId");
});
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
CompletionStage<FileIndex> fileIndexFuture = blobStoreUtil.putFile(path.toFile(), snapshotMetadata);
FileIndex fileIndex = null;
try {
// should be already complete. if not, future composition in putFile is broken.
fileIndex = fileIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
fail("Future returned from putFile should be already complete.");
}
// Assert
Metadata metadata = (Metadata) argumentCaptor.getValue();
assertEquals(path.toAbsolutePath().toString(), metadata.getPayloadPath());
assertEquals(path.toFile().length(), Long.valueOf(metadata.getPayloadSize()).longValue());
assertEquals(expectedChecksum, fileIndex.getChecksum());
}
use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.
the class TestBlobStoreUtil method testPutDirFailsIfAnySubDirFileUploadFails.
@Test
public void testPutDirFailsIfAnySubDirFileUploadFails() throws IOException, TimeoutException, InterruptedException {
BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
// File, dir and recursive dir added, retained and removed in local
String local = "[a/1, b/2]";
String remote = "[]";
// Set up environment
Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
String basePath = localSnapshotDir.toAbsolutePath().toString();
DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
// Set up mocks
SamzaException exception = new SamzaException("Error uploading file");
CompletableFuture<String> failedFuture = new CompletableFuture<>();
failedFuture.completeExceptionally(exception);
when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<String>>) invocation -> {
Metadata metadata = invocation.getArgumentAt(1, Metadata.class);
String path = metadata.getPayloadPath();
if (path.endsWith("1")) {
return CompletableFuture.completedFuture("a1BlobId");
} else {
return failedFuture;
}
});
// Execute
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
try {
// should be already complete. if not, future composition in putDir is broken.
dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
} catch (ExecutionException e) {
Throwable cause = e.getCause();
// Assert that the result future fails and that the cause is propagated correctly
assertEquals(exception, cause);
return;
}
fail("DirIndex future should have been completed with an exception");
}
use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.
the class TestBlobStoreUtil method testRestoreDirCreatesCorrectDirectoryStructure.
@Test
public void testRestoreDirCreatesCorrectDirectoryStructure() throws IOException {
String prevSnapshotFiles = "[a, b, z/1, y/1, p/m/1, q/n/1]";
DirIndex dirIndex = BlobStoreTestUtil.createDirIndex(prevSnapshotFiles);
BlobStoreManager mockBlobStoreManager = mock(BlobStoreManager.class);
when(mockBlobStoreManager.get(anyString(), any(OutputStream.class), any(Metadata.class))).thenAnswer((Answer<CompletionStage<Void>>) invocationOnMock -> {
String blobId = invocationOnMock.getArgumentAt(0, String.class);
OutputStream outputStream = invocationOnMock.getArgumentAt(1, OutputStream.class);
outputStream.write(blobId.getBytes());
return CompletableFuture.completedFuture(null);
});
Path restoreDirBasePath = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(mockBlobStoreManager, EXECUTOR, null, null);
blobStoreUtil.restoreDir(restoreDirBasePath.toFile(), dirIndex, metadata).join();
assertTrue(new DirDiffUtil().areSameDir(Collections.emptySet(), false).test(restoreDirBasePath.toFile(), dirIndex));
}
use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.
the class BlobStoreUtil method getStoreSnapshotIndexes.
/**
* Get the blob id of {@link SnapshotIndex} and {@link SnapshotIndex}es for the provided {@code task}
* in the provided {@code checkpoint}.
* @param jobName job name is used to build request metadata
* @param jobId job id is used to build request metadata
* @param taskName task name to get the store state checkpoint markers and snapshot indexes for
* @param checkpoint {@link Checkpoint} instance to get the store state checkpoint markers from. Only
* {@link CheckpointV2} and newer are supported for blob stores.
* @param storesToBackupOrRestore set of store names to be backed up or restored
* @return Map of store name to its blob id of snapshot indices and their corresponding snapshot indices for the task.
*/
public Map<String, Pair<String, SnapshotIndex>> getStoreSnapshotIndexes(String jobName, String jobId, String taskName, Checkpoint checkpoint, Set<String> storesToBackupOrRestore) {
// TODO MED shesharma document error handling (checkpoint ver, blob not found, getBlob)
if (checkpoint == null) {
LOG.debug("No previous checkpoint found for taskName: {}", taskName);
return ImmutableMap.of();
}
if (checkpoint.getVersion() == 1) {
LOG.warn("Checkpoint version 1 is not supported for blob store backup and restore.");
return ImmutableMap.of();
}
Map<String, CompletableFuture<Pair<String, SnapshotIndex>>> storeSnapshotIndexFutures = new HashMap<>();
CheckpointV2 checkpointV2 = (CheckpointV2) checkpoint;
Map<String, Map<String, String>> factoryToStoreSCMs = checkpointV2.getStateCheckpointMarkers();
Map<String, String> storeSnapshotIndexBlobIds = factoryToStoreSCMs.get(BlobStoreStateBackendFactory.class.getName());
if (storeSnapshotIndexBlobIds != null) {
storeSnapshotIndexBlobIds.forEach((storeName, snapshotIndexBlobId) -> {
if (storesToBackupOrRestore.contains(storeName)) {
try {
LOG.debug("Getting snapshot index for taskName: {} store: {} blobId: {}", taskName, storeName, snapshotIndexBlobId);
Metadata requestMetadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.empty(), jobName, jobId, taskName, storeName);
CompletableFuture<SnapshotIndex> snapshotIndexFuture = getSnapshotIndex(snapshotIndexBlobId, requestMetadata).toCompletableFuture();
Pair<CompletableFuture<String>, CompletableFuture<SnapshotIndex>> pairOfFutures = Pair.of(CompletableFuture.completedFuture(snapshotIndexBlobId), snapshotIndexFuture);
// save the future and block once in the end instead of blocking for each request.
storeSnapshotIndexFutures.put(storeName, FutureUtil.toFutureOfPair(pairOfFutures));
} catch (Exception e) {
throw new SamzaException(String.format("Error getting SnapshotIndex for blobId: %s for taskName: %s store: %s", snapshotIndexBlobId, taskName, storeName), e);
}
} else {
LOG.debug("SnapshotIndex blob id {} for store {} is not present in the set of stores to be backed up/restores: {}", snapshotIndexBlobId, storeName, storesToBackupOrRestore);
}
});
} else {
LOG.debug("No store SCMs found for blob store state backend in for taskName: {} in checkpoint {}", taskName, checkpointV2.getCheckpointId());
}
try {
return FutureUtil.toFutureOfMap(t -> {
Throwable unwrappedException = FutureUtil.unwrapExceptions(CompletionException.class, t);
if (unwrappedException instanceof DeletedException) {
LOG.warn("Ignoring already deleted snapshot index for taskName: {}", taskName, t);
return true;
} else {
return false;
}
}, storeSnapshotIndexFutures).join();
} catch (Exception e) {
throw new SamzaException(String.format("Error while waiting to get store snapshot indexes for task %s", taskName), e);
}
}
use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.
the class BlobStoreUtil method getFile.
/**
* Gets a file from the blob store.
* @param fileBlobs List of {@link FileBlob}s that constitute this file.
* @param fileToRestore File pointing to the local path where the file will be restored.
* @param requestMetadata {@link Metadata} associated with this request
* @return a future that completes when the file is downloaded and written or if an exception occurs.
*/
@VisibleForTesting
CompletableFuture<Void> getFile(List<FileBlob> fileBlobs, File fileToRestore, Metadata requestMetadata) {
FileOutputStream outputStream = null;
try {
long restoreFileStartTime = System.nanoTime();
if (fileToRestore.exists()) {
// delete the file if it already exists, e.g. from a previous retry.
Files.delete(fileToRestore.toPath());
}
outputStream = new FileOutputStream(fileToRestore);
final FileOutputStream finalOutputStream = outputStream;
// TODO HIGH shesharm add integration tests to ensure empty files and directories are handled correctly E2E.
// create file for 0 byte files (fileIndex entry but no fileBlobs).
fileToRestore.createNewFile();
// create a copy to ensure list being sorted is mutable.
List<FileBlob> fileBlobsCopy = new ArrayList<>(fileBlobs);
// sort by offset.
fileBlobsCopy.sort(Comparator.comparingInt(FileBlob::getOffset));
// chain the futures such that write to file for blobs is sequential.
// can be optimized to write concurrently to the file later.
CompletableFuture<Void> resultFuture = CompletableFuture.completedFuture(null);
for (FileBlob fileBlob : fileBlobsCopy) {
resultFuture = resultFuture.thenComposeAsync(v -> {
LOG.debug("Starting restore for file: {} with blob id: {} at offset: {}", fileToRestore, fileBlob.getBlobId(), fileBlob.getOffset());
return blobStoreManager.get(fileBlob.getBlobId(), finalOutputStream, requestMetadata);
}, executor);
}
resultFuture = resultFuture.thenRunAsync(() -> {
LOG.debug("Finished restore for file: {}. Closing output stream.", fileToRestore);
try {
// flush the file contents to disk
finalOutputStream.getFD().sync();
finalOutputStream.close();
} catch (Exception e) {
throw new SamzaException(String.format("Error closing output stream for file: %s", fileToRestore.getAbsolutePath()), e);
}
}, executor);
resultFuture.whenComplete((res, ex) -> {
if (restoreMetrics != null) {
restoreMetrics.avgFileRestoreNs.update(System.nanoTime() - restoreFileStartTime);
long fileSize = requestMetadata.getPayloadSize();
restoreMetrics.restoreRate.inc(fileSize);
restoreMetrics.filesRestored.getValue().addAndGet(1);
restoreMetrics.bytesRestored.getValue().addAndGet(fileSize);
restoreMetrics.filesRemaining.getValue().addAndGet(-1);
restoreMetrics.bytesRemaining.getValue().addAndGet(-1 * fileSize);
}
});
return resultFuture;
} catch (Exception exception) {
try {
if (outputStream != null) {
outputStream.close();
}
} catch (Exception err) {
LOG.error("Error closing output stream for file: {}", fileToRestore.getAbsolutePath(), err);
}
throw new SamzaException(String.format("Error restoring file: %s in path: %s", fileToRestore.getName(), requestMetadata.getPayloadPath()), exception);
}
}
Aggregations