use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class TestBlobStoreRestoreManager method testRestoreSkipsStoresWithMissingCheckpointSCM.
@Test
public void testRestoreSkipsStoresWithMissingCheckpointSCM() {
// store renamed from oldStoreName to newStoreName. No SCM for newStoreName in previous checkpoint.
String jobName = "testJobName";
String jobId = "testJobId";
TaskName taskName = mock(TaskName.class);
BlobStoreRestoreManagerMetrics metrics = new BlobStoreRestoreManagerMetrics(new MetricsRegistryMap());
metrics.initStoreMetrics(ImmutableList.of("newStoreName"));
// new store in config
Set<String> storesToRestore = ImmutableSet.of("newStoreName");
SnapshotIndex snapshotIndex = mock(SnapshotIndex.class);
Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = mock(Map.class);
when(prevStoreSnapshotIndexes.containsKey("newStoreName")).thenReturn(false);
DirIndex dirIndex = mock(DirIndex.class);
when(snapshotIndex.getDirIndex()).thenReturn(dirIndex);
CheckpointId checkpointId = CheckpointId.create();
when(snapshotIndex.getSnapshotMetadata()).thenReturn(new SnapshotMetadata(checkpointId, "jobName", "jobId", "taskName", "storeName"));
Path loggedBaseDir = mock(Path.class);
// create store dir to be deleted during restore
StorageConfig storageConfig = mock(StorageConfig.class);
StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
BlobStoreUtil blobStoreUtil = mock(BlobStoreUtil.class);
DirDiffUtil dirDiffUtil = mock(DirDiffUtil.class);
BlobStoreRestoreManager.restoreStores(jobName, jobId, taskName, storesToRestore, prevStoreSnapshotIndexes, loggedBaseDir.toFile(), storageConfig, metrics, storageManagerUtil, blobStoreUtil, dirDiffUtil, EXECUTOR);
// verify that we checked the previously checkpointed SCMs.
verify(prevStoreSnapshotIndexes, times(1)).containsKey(eq("newStoreName"));
// verify that the store directory restore was never called
verify(blobStoreUtil, times(0)).restoreDir(any(File.class), any(DirIndex.class), any(Metadata.class));
}
use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class TestBlobStoreBackupManager method setupRemoteAndLocalSnapshots.
private Map<String, Pair<String, SnapshotIndex>> setupRemoteAndLocalSnapshots(boolean addPrevCheckpoints) throws IOException {
// reset blob store
testBlobStore = new HashMap<>();
Map<String, Pair<String, SnapshotIndex>> indexBlobIdAndRemoteAndLocalSnapshotMap = new HashMap<>();
List<String> localSnapshots = new ArrayList<>();
List<String> previousRemoteSnapshots = new ArrayList<>();
localSnapshots.add("[a, c, z/1, y/2, p/m/3, q/n/4]");
previousRemoteSnapshots.add("[a, b, z/1, x/5, p/m/3, r/o/6]");
localSnapshots.add("[a, c, z/1, y/1, p/m/1, q/n/1]");
previousRemoteSnapshots.add("[a, z/1, p/m/1]");
localSnapshots.add("[z/i/1, y/j/1]");
previousRemoteSnapshots.add("[z/i/1, x/k/1]");
// setup local and corresponding remote snapshots
for (int i = 0; i < localSnapshots.size(); i++) {
Path localSnapshot = BlobStoreTestUtil.createLocalDir(localSnapshots.get(i));
String testLocalSnapshot = localSnapshot.toAbsolutePath().toString();
DirIndex dirIndex = BlobStoreTestUtil.createDirIndex(localSnapshots.get(i));
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, testLocalSnapshot);
Optional<String> prevCheckpointId = Optional.empty();
if (addPrevCheckpoints) {
prevCheckpointId = Optional.of(prevSnapshotIndexBlobId + "-" + i);
DirIndex prevDirIndex = BlobStoreTestUtil.createDirIndex(previousRemoteSnapshots.get(i));
testBlobStore.put(prevCheckpointId.get(), new SnapshotIndex(clock.currentTimeMillis(), snapshotMetadata, prevDirIndex, Optional.empty()));
}
SnapshotIndex testRemoteSnapshot = new SnapshotIndex(clock.currentTimeMillis(), snapshotMetadata, dirIndex, prevCheckpointId);
indexBlobIdAndRemoteAndLocalSnapshotMap.put("blobId-" + i, Pair.of(testLocalSnapshot, testRemoteSnapshot));
testBlobStore.put("blobId-" + i, testRemoteSnapshot);
}
return indexBlobIdAndRemoteAndLocalSnapshotMap;
}
use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class BlobStoreBackupManager method upload.
@Override
public CompletableFuture<Map<String, String>> upload(CheckpointId checkpointId, Map<String, String> storeSCMs) {
long uploadStartTime = System.nanoTime();
// reset gauges for each upload
metrics.filesToUpload.getValue().set(0L);
metrics.bytesToUpload.getValue().set(0L);
metrics.filesUploaded.getValue().set(0L);
metrics.bytesUploaded.getValue().set(0L);
metrics.filesRemaining.getValue().set(0L);
metrics.bytesRemaining.getValue().set(0L);
metrics.filesToRetain.getValue().set(0L);
metrics.bytesToRetain.getValue().set(0L);
// This map is used to atomically replace the prevStoreSnapshotIndexesFuture map at the end of the task commit
Map<String, CompletableFuture<Pair<String, SnapshotIndex>>> storeToSCMAndSnapshotIndexPairFutures = new HashMap<>();
// This map is used to return serialized State Checkpoint Markers to the caller
Map<String, CompletableFuture<String>> storeToSerializedSCMFuture = new HashMap<>();
storesToBackup.forEach((storeName) -> {
long storeUploadStartTime = System.nanoTime();
try {
// metadata for the current store snapshot to upload
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
// get the local store dir corresponding to the current checkpointId
File storeDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDir, storeName, taskModel.getTaskName(), taskModel.getTaskMode());
String checkpointDirPath = storageManagerUtil.getStoreCheckpointDir(storeDir, checkpointId);
File checkpointDir = new File(checkpointDirPath);
LOG.debug("Got task: {} store: {} storeDir: {} and checkpointDir: {}", taskName, storeName, storeDir, checkpointDir);
// guaranteed to be available since a new task commit may not start until the previous one is complete
Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = prevStoreSnapshotIndexesFuture.get(0, TimeUnit.MILLISECONDS);
// get the previous store directory contents
DirIndex prevDirIndex;
if (prevStoreSnapshotIndexes.containsKey(storeName)) {
prevDirIndex = prevStoreSnapshotIndexes.get(storeName).getRight().getDirIndex();
} else {
// no previous SnapshotIndex means that this is the first commit for this store. Create an empty DirIndex.
prevDirIndex = new DirIndex(checkpointDir.getName(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
}
long dirDiffStartTime = System.nanoTime();
// get the diff between previous and current store directories
DirDiff dirDiff = DirDiffUtil.getDirDiff(checkpointDir, prevDirIndex, DirDiffUtil.areSameFile(false));
metrics.storeDirDiffNs.get(storeName).update(System.nanoTime() - dirDiffStartTime);
DirDiff.Stats stats = DirDiff.getStats(dirDiff);
updateStoreDiffMetrics(storeName, stats);
metrics.filesToUpload.getValue().addAndGet(stats.filesAdded);
metrics.bytesToUpload.getValue().addAndGet(stats.bytesAdded);
// Note: FilesRemaining metric is set to FilesAdded in the beginning of the current upload and then counted down
// for each upload.
metrics.filesRemaining.getValue().addAndGet(stats.filesAdded);
metrics.bytesRemaining.getValue().addAndGet(stats.bytesAdded);
metrics.filesToRetain.getValue().addAndGet(stats.filesRetained);
metrics.bytesToRetain.getValue().addAndGet(stats.bytesRetained);
// upload the diff to the blob store and get the new directory index
CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
CompletionStage<SnapshotIndex> snapshotIndexFuture = dirIndexFuture.thenApplyAsync(dirIndex -> {
LOG.trace("Dir upload complete. Returning new SnapshotIndex for task: {} store: {}.", taskName, storeName);
Optional<String> prevSnapshotIndexBlobId = Optional.ofNullable(prevStoreSnapshotIndexes.get(storeName)).map(Pair::getLeft);
return new SnapshotIndex(clock.currentTimeMillis(), snapshotMetadata, dirIndex, prevSnapshotIndexBlobId);
}, executor);
// upload the new snapshot index to the blob store and get its blob id
CompletionStage<String> snapshotIndexBlobIdFuture = snapshotIndexFuture.thenComposeAsync(si -> {
LOG.trace("Uploading Snapshot index for task: {} store: {}", taskName, storeName);
return blobStoreUtil.putSnapshotIndex(si);
}, executor);
// save store name and it's SnapshotIndex blob id and SnapshotIndex pair. At the end of the upload, atomically
// update previous snapshot index map with this.
CompletableFuture<Pair<String, SnapshotIndex>> scmAndSnapshotIndexPairFuture = FutureUtil.toFutureOfPair(Pair.of(snapshotIndexBlobIdFuture.toCompletableFuture(), snapshotIndexFuture.toCompletableFuture()));
scmAndSnapshotIndexPairFuture.whenComplete((res, ex) -> {
long uploadTimeNs = System.nanoTime() - storeUploadStartTime;
metrics.storeUploadNs.get(storeName).update(uploadTimeNs);
});
storeToSCMAndSnapshotIndexPairFutures.put(storeName, scmAndSnapshotIndexPairFuture);
storeToSerializedSCMFuture.put(storeName, snapshotIndexBlobIdFuture.toCompletableFuture());
} catch (Exception e) {
throw new SamzaException(String.format("Error uploading store snapshot to blob store for task: %s, store: %s, checkpointId: %s", taskName, storeName, checkpointId), e);
}
});
// replace the previous storeName to snapshot index mapping with the new mapping.
this.prevStoreSnapshotIndexesFuture = FutureUtil.toFutureOfMap(storeToSCMAndSnapshotIndexPairFutures);
return FutureUtil.toFutureOfMap(storeToSerializedSCMFuture).whenComplete((res, ex) -> metrics.uploadNs.update(System.nanoTime() - uploadStartTime));
}
use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class TestBlobStoreUtil method testCleanup.
@Test
public void testCleanup() throws IOException, ExecutionException, InterruptedException {
BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
// File, dir and recursive dir added, retained and removed in local
// Using unique file names since test util uses only the file name (leaf node)
// as the mock blob id, not the full file path.
String local = "[a, c, z/1, y/2, p/m/3, q/n/4]";
String remote = "[a, b, z/1, x/5, p/m/3, r/o/6]";
String expectedRemoved = "[b, 5, 6]";
// keep only the last character (the file name).
SortedSet<String> expectedRemovedFiles = BlobStoreTestUtil.getExpected(expectedRemoved);
// Set up environment
Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
String basePath = localSnapshotDir.toAbsolutePath().toString();
DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture("blobId"));
CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
DirIndex dirIndex = null;
try {
// should be already complete. if not, future composition in putDir is broken.
dirIndex = dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
fail("Future returned from putDir should be already complete.");
}
// Set up mocks
SortedSet<String> allDeleted = new TreeSet<>();
when(blobStoreManager.delete(anyString(), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<Void>>) invocation -> {
String blobId = invocation.getArgumentAt(0, String.class);
allDeleted.add(blobId);
return CompletableFuture.completedFuture(null);
});
// Execute
CompletionStage<Void> cleanUpFuture = blobStoreUtil.cleanUpDir(dirIndex, metadata);
try {
// should be already complete. if not, future composition in putDir is broken.
cleanUpFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
fail("Future returned from putDir should be already complete.");
}
// Assert
assertEquals(expectedRemovedFiles, allDeleted);
}
use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class TestBlobStoreUtil method testPutDirFailsIfAnyFileUploadFails.
@Test
public void testPutDirFailsIfAnyFileUploadFails() throws IOException, TimeoutException, InterruptedException {
BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
// File, dir and recursive dir added, retained and removed in local
String local = "[a, b]";
String remote = "[]";
// Set up environment
Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
String basePath = localSnapshotDir.toAbsolutePath().toString();
DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
// Set up mocks
SamzaException exception = new SamzaException("Error uploading file");
CompletableFuture<String> failedFuture = new CompletableFuture<>();
failedFuture.completeExceptionally(exception);
when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<String>>) invocation -> {
Metadata metadata = invocation.getArgumentAt(1, Metadata.class);
String path = metadata.getPayloadPath();
if (path.endsWith("a")) {
return CompletableFuture.completedFuture("aBlobId");
} else {
return failedFuture;
}
});
// Execute
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
try {
// should be already complete. if not, future composition in putDir is broken.
dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
} catch (ExecutionException e) {
Throwable cause = e.getCause();
// Assert that the result future fails and that the cause is propagated correctly
assertEquals(exception, cause);
return;
}
fail("DirIndex future should have been completed with an exception");
}
Aggregations