use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.
the class TestBlobStoreUtil method testRemoveTTL.
@Test
public void testRemoveTTL() throws IOException, ExecutionException, InterruptedException {
BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
// File, dir and recursive dir added, retained and removed in local
// Using unique file names since test setup returns it as the blob id
String local = "[a, c, z/1, y/2, p/m/3, q/n/4]";
String remote = "[a, b, z/1, x/5, p/m/3, r/o/6]";
String expectedAdded = "[c, y/2, q/n/4]";
String expectedRetained = "[a, z/1, p/m/3]";
SortedSet<String> expectedAddedFiles = BlobStoreTestUtil.getExpected(expectedAdded);
SortedSet<String> expectedRetainedFiles = BlobStoreTestUtil.getExpected(expectedRetained);
SortedSet<String> expectedPresentFiles = new TreeSet<>(expectedAddedFiles);
expectedPresentFiles.addAll(expectedRetainedFiles);
// Set up environment
Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
String basePath = localSnapshotDir.toAbsolutePath().toString();
DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<String>>) invocation -> {
Metadata metadata = invocation.getArgumentAt(1, Metadata.class);
String path = metadata.getPayloadPath();
String fileName = path.substring(path.length() - 1);
return CompletableFuture.completedFuture(fileName);
});
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
DirIndex dirIndex = null;
try {
// should be already complete. if not, future composition in putDir is broken.
dirIndex = dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
fail("Future returned from putDir should be already complete.");
}
SnapshotIndex mockSnapshotIndex = mock(SnapshotIndex.class);
when(mockSnapshotIndex.getSnapshotMetadata()).thenReturn(snapshotMetadata);
when(mockSnapshotIndex.getDirIndex()).thenReturn(dirIndex);
SortedSet<String> allTTLRemoved = new TreeSet<>();
when(blobStoreManager.removeTTL(anyString(), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<String>>) invocation -> {
String blobId = invocation.getArgumentAt(0, String.class);
allTTLRemoved.add(blobId);
return CompletableFuture.completedFuture(null);
});
// Execute
blobStoreUtil.removeTTL("snapshotIndexBlobId", mockSnapshotIndex, metadata);
// Assert
SortedSet<String> expectedBlobIds = new TreeSet<>();
// test uses unique file name (last char) as the blob ID.
expectedPresentFiles.forEach(f -> expectedBlobIds.add(f.substring(f.length() - 1)));
expectedBlobIds.add("snapshotIndexBlobId");
assertEquals(expectedBlobIds, allTTLRemoved);
}
use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.
the class TestBlobStoreBackupManager method testUploadWithPreviousCheckpoints.
@Test
public void testUploadWithPreviousCheckpoints() throws IOException {
// Track directory for post cleanup
List<String> checkpointDirsToClean = new ArrayList<>();
// Setup: init back manager with previous checkpoints
// indexBlobIdAndLocalRemoteSnapshotsPair = setupRemoteAndLocalSnapshots(true);
Map<String, String> previousCheckpoints = // map store name, previous snapshot index blob id
indexBlobIdAndLocalRemoteSnapshotsPair.entrySet().stream().collect(Collectors.toMap(e -> e.getValue().getLeft(), e -> e.getValue().getRight().getPrevSnapshotIndexBlobId().get()));
Checkpoint checkpoint = new CheckpointV2(checkpointId, new HashMap<>(), ImmutableMap.of(BlobStoreStateBackendFactory.class.getName(), previousCheckpoints));
when(blobStoreUtil.getStoreSnapshotIndexes(anyString(), anyString(), anyString(), any(Checkpoint.class), anySetOf(String.class))).thenCallRealMethod();
blobStoreBackupManager.init(checkpoint);
// mock: set task store dir to return corresponding test local store and create checkpoint dir
ArgumentCaptor<String> stringCaptor = ArgumentCaptor.forClass(String.class);
when(storageManagerUtil.getTaskStoreDir(any(File.class), stringCaptor.capture(), any(TaskName.class), any(TaskMode.class))).then((Answer<File>) invocation -> {
String storeName = invocation.getArgumentAt(1, String.class);
String snapshotIndexBlobId = testStoreNameAndSCMMap.get(storeName);
String storeDir = indexBlobIdAndLocalRemoteSnapshotsPair.get(snapshotIndexBlobId).getLeft();
try {
BlobStoreTestUtil.createTestCheckpointDirectory(storeDir, checkpointId.serialize());
checkpointDirsToClean.add(storeDir + "-" + checkpointId.serialize());
} catch (IOException e) {
Assert.fail("Couldn't create checkpoint directory. Test failed.");
}
return new File(storeDir);
});
ArgumentCaptor<File> storeDirCaptor = ArgumentCaptor.forClass(File.class);
when(storageManagerUtil.getStoreCheckpointDir(storeDirCaptor.capture(), eq(checkpointId))).thenAnswer(new Answer<String>() {
@Override
public String answer(InvocationOnMock invocation) throws Throwable {
File storeDir = invocation.getArgumentAt(0, File.class);
return storeDir.getAbsolutePath() + "-" + checkpointId.serialize();
}
});
// mock: mock putDir and capture DirDiff
SortedSet<DirDiff> actualDirDiffs = new TreeSet<>(Comparator.comparing(DirDiff::getDirName));
ArgumentCaptor<DirDiff> dirDiffCaptor = ArgumentCaptor.forClass(DirDiff.class);
ArgumentCaptor<SnapshotMetadata> snapshotMetadataCaptor = ArgumentCaptor.forClass(SnapshotMetadata.class);
when(blobStoreUtil.putDir(dirDiffCaptor.capture(), snapshotMetadataCaptor.capture())).then((Answer<CompletableFuture<DirIndex>>) invocation -> {
DirDiff dirDiff = invocation.getArgumentAt(0, DirDiff.class);
SnapshotMetadata snapshotMetadata = invocation.getArgumentAt(1, SnapshotMetadata.class);
actualDirDiffs.add(dirDiff);
SnapshotIndex snapshotIndex = testBlobStore.get(testStoreNameAndSCMMap.get(snapshotMetadata.getStoreName()));
return CompletableFuture.completedFuture(snapshotIndex.getDirIndex());
});
// mock: mock putSnapshotIndex and capture previous snapshot index
SortedSet<SnapshotIndex> expectedSnapshotIndexesUploaded = indexBlobIdAndLocalRemoteSnapshotsPair.values().stream().map(Pair::getRight).collect(Collectors.toCollection(() -> new TreeSet<>(Comparator.comparing(SnapshotIndex::getCreationTimeMillis))));
SortedSet<SnapshotIndex> actualSnapshotIndexesUploaded = new TreeSet<>(Comparator.comparing(SnapshotIndex::getCreationTimeMillis));
SortedSet<String> actualPreviousSnapshotIndexBlobIds = new TreeSet<>();
SortedSet<String> expectedPreviousSnapshotIndexBlobIds = new TreeSet<>(previousCheckpoints.values());
ArgumentCaptor<SnapshotIndex> snapshotIndexCaptor = ArgumentCaptor.forClass(SnapshotIndex.class);
when(blobStoreUtil.putSnapshotIndex(snapshotIndexCaptor.capture())).then((Answer<CompletableFuture<String>>) invocation -> {
SnapshotIndex snapshotIndex = invocation.getArgumentAt(0, SnapshotIndex.class);
actualSnapshotIndexesUploaded.add(snapshotIndex);
if (snapshotIndex.getPrevSnapshotIndexBlobId().isPresent()) {
actualPreviousSnapshotIndexBlobIds.add(snapshotIndex.getPrevSnapshotIndexBlobId().get());
}
return CompletableFuture.completedFuture("random-blob-id");
});
// execute
blobStoreBackupManager.upload(checkpointId, ImmutableMap.of());
TreeSet<DirDiff> expectedDirDiffs = indexBlobIdAndLocalRemoteSnapshotsPair.values().stream().map(localRemoteSnapshotPair -> DirDiffUtil.getDirDiff(new File(localRemoteSnapshotPair.getLeft() + "-" + checkpointId.serialize()), localRemoteSnapshotPair.getRight().getDirIndex(), DirDiffUtil.areSameFile(false))).collect(Collectors.toCollection(() -> new TreeSet<>(Comparator.comparing(DirDiff::getDirName))));
// assert - asset all DirDiff are put to blob store
Assert.assertEquals(actualDirDiffs, expectedDirDiffs);
// assert - assert no previous snapshot indexes were found
Assert.assertEquals(actualPreviousSnapshotIndexBlobIds, expectedPreviousSnapshotIndexBlobIds);
// assert - assert all snapshot indexes are uploaded
Assert.assertEquals(actualSnapshotIndexesUploaded, expectedSnapshotIndexesUploaded);
// cleanup
checkpointDirsToClean.forEach(path -> {
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException exception) {
Assert.fail("Failed to cleanup temporary checkpoint dirs.");
}
});
}
use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.
the class TestBlobStoreBackupManager method testUploadWithNoPreviousCheckpoints.
@Test
public void testUploadWithNoPreviousCheckpoints() throws IOException {
// Track directory for post cleanup
List<String> checkpointDirsToClean = new ArrayList<>();
// Setup: init local/remote snapshots and back manager with no previous checkpoints
indexBlobIdAndLocalRemoteSnapshotsPair = setupRemoteAndLocalSnapshots(false);
Checkpoint checkpoint = new CheckpointV2(checkpointId, new HashMap<>(), ImmutableMap.of(BlobStoreStateBackendFactory.class.getName(), new HashMap<>()));
blobStoreBackupManager.init(checkpoint);
// mock: set task store dir to return corresponding test local store and create checkpoint dir
ArgumentCaptor<String> stringCaptor = ArgumentCaptor.forClass(String.class);
when(storageManagerUtil.getTaskStoreDir(any(File.class), stringCaptor.capture(), any(TaskName.class), any(TaskMode.class))).then((Answer<File>) invocation -> {
String storeName = invocation.getArgumentAt(1, String.class);
String snapshotIndexBlobId = testStoreNameAndSCMMap.get(storeName);
String storeDir = indexBlobIdAndLocalRemoteSnapshotsPair.get(snapshotIndexBlobId).getLeft();
try {
BlobStoreTestUtil.createTestCheckpointDirectory(storeDir, checkpointId.serialize());
checkpointDirsToClean.add(storeDir + "-" + checkpointId.serialize());
} catch (IOException e) {
Assert.fail("Couldn't create checkpoint directory. Test failed.");
}
return new File(storeDir);
});
ArgumentCaptor<File> storeDirCaptor = ArgumentCaptor.forClass(File.class);
when(storageManagerUtil.getStoreCheckpointDir(storeDirCaptor.capture(), eq(checkpointId))).thenAnswer(new Answer<String>() {
@Override
public String answer(InvocationOnMock invocation) throws Throwable {
File storeDir = invocation.getArgumentAt(0, File.class);
return storeDir.getAbsolutePath() + "-" + checkpointId.serialize();
}
});
SortedSet<DirDiff> actualDirDiffs = new TreeSet<>(Comparator.comparing(DirDiff::getDirName));
// mock: mock putDir and capture DirDiff
ArgumentCaptor<DirDiff> dirDiffCaptor = ArgumentCaptor.forClass(DirDiff.class);
ArgumentCaptor<SnapshotMetadata> snapshotMetadataCaptor = ArgumentCaptor.forClass(SnapshotMetadata.class);
when(blobStoreUtil.putDir(dirDiffCaptor.capture(), snapshotMetadataCaptor.capture())).then((Answer<CompletableFuture<DirIndex>>) invocation -> {
DirDiff dirDiff = invocation.getArgumentAt(0, DirDiff.class);
SnapshotMetadata snapshotMetadata = invocation.getArgumentAt(1, SnapshotMetadata.class);
actualDirDiffs.add(dirDiff);
SnapshotIndex snapshotIndex = testBlobStore.get(testStoreNameAndSCMMap.get(snapshotMetadata.getStoreName()));
return CompletableFuture.completedFuture(snapshotIndex.getDirIndex());
});
SortedSet<SnapshotIndex> expectedSnapshotIndexesUploaded = indexBlobIdAndLocalRemoteSnapshotsPair.values().stream().map(Pair::getRight).collect(Collectors.toCollection(() -> new TreeSet<>(Comparator.comparing(SnapshotIndex::getCreationTimeMillis))));
String expectedPreviousSnapshotIndexBlobId = "empty";
// mock: mock putSnapshotIndex and capture previous snapshot index
SortedSet<SnapshotIndex> actualSnapshotIndexesUploaded = new TreeSet<>(Comparator.comparing(SnapshotIndex::getCreationTimeMillis));
final String[] actualPreviousSnapshotIndexBlobId = { "empty" };
ArgumentCaptor<SnapshotIndex> snapshotIndexCaptor = ArgumentCaptor.forClass(SnapshotIndex.class);
when(blobStoreUtil.putSnapshotIndex(snapshotIndexCaptor.capture())).then((Answer<CompletableFuture<String>>) invocation -> {
SnapshotIndex snapshotIndex = invocation.getArgumentAt(0, SnapshotIndex.class);
actualSnapshotIndexesUploaded.add(snapshotIndex);
if (!snapshotIndex.getPrevSnapshotIndexBlobId().equals(Optional.empty())) {
actualPreviousSnapshotIndexBlobId[0] = "not-empty";
}
return CompletableFuture.completedFuture("random-blob-id");
});
// execute
blobStoreBackupManager.upload(checkpointId, testStoreNameAndSCMMap);
// setup expected dir diffs after execute: needs checkpoint dirs created in upload()
TreeSet<DirDiff> expectedDirDiffs = indexBlobIdAndLocalRemoteSnapshotsPair.values().stream().map(localRemoteSnapshotPair -> {
File localCheckpointDir = new File(localRemoteSnapshotPair.getLeft() + "-" + checkpointId.serialize());
DirIndex dirIndex = new DirIndex(localCheckpointDir.getName(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
return DirDiffUtil.getDirDiff(localCheckpointDir, dirIndex, DirDiffUtil.areSameFile(false));
}).collect(Collectors.toCollection(() -> new TreeSet<>(Comparator.comparing(DirDiff::getDirName))));
// assert - asset all DirDiff are put to blob store
Assert.assertEquals(actualDirDiffs, expectedDirDiffs);
// assert - assert no previous snapshot indexes were found
Assert.assertEquals(actualPreviousSnapshotIndexBlobId[0], expectedPreviousSnapshotIndexBlobId);
// assert - assert all snapshot indexes are uploaded
Assert.assertEquals(actualSnapshotIndexesUploaded, expectedSnapshotIndexesUploaded);
// cleanup
checkpointDirsToClean.forEach(path -> {
try {
FileUtils.deleteDirectory(new File(path));
} catch (IOException exception) {
Assert.fail("Failed to cleanup temporary checkpoint dirs.");
}
});
}
use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.
the class TestSnapshotIndexSerde method testSnapshotIndexSerde.
@Test
public void testSnapshotIndexSerde() throws IOException {
// create local and remote snapshots
String local = "[a, b, c/1, d/1/2]";
String remote = "[a, b, z, c/1/2, e/1]";
Path localSnapshot = BlobStoreTestUtil.createLocalDir(local);
DirIndex dirIndex = BlobStoreTestUtil.createDirIndex(remote);
SnapshotMetadata snapshotMetadata = new SnapshotMetadata(CheckpointId.create(), "job", "123", "task", "store");
SnapshotIndex testRemoteSnapshot = new SnapshotIndex(System.currentTimeMillis(), snapshotMetadata, dirIndex, Optional.empty());
SnapshotIndexSerde snapshotIndexSerde = new SnapshotIndexSerde();
byte[] serialized = snapshotIndexSerde.toBytes(testRemoteSnapshot);
SnapshotIndex deserialized = snapshotIndexSerde.fromBytes(serialized);
Assert.assertNotNull(deserialized);
Assert.assertEquals(deserialized, testRemoteSnapshot);
}
use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.
the class TestBlobStoreRestoreManager method testRestoreRetainsCheckpointDirsIfValid.
@Test
public void testRestoreRetainsCheckpointDirsIfValid() throws IOException {
String jobName = "testJobName";
String jobId = "testJobId";
TaskName taskName = mock(TaskName.class);
BlobStoreRestoreManagerMetrics metrics = new BlobStoreRestoreManagerMetrics(new MetricsRegistryMap());
metrics.initStoreMetrics(ImmutableList.of("storeName"));
Set<String> storesToRestore = ImmutableSet.of("storeName");
SnapshotIndex snapshotIndex = mock(SnapshotIndex.class);
Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = ImmutableMap.of("storeName", Pair.of("blobId", snapshotIndex));
DirIndex dirIndex = BlobStoreTestUtil.createDirIndex("[a]");
when(snapshotIndex.getDirIndex()).thenReturn(dirIndex);
CheckpointId checkpointId = CheckpointId.create();
when(snapshotIndex.getSnapshotMetadata()).thenReturn(new SnapshotMetadata(checkpointId, "jobName", "jobId", "taskName", "storeName"));
Path loggedBaseDir = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
// create store dir to be deleted during restore
Path storeDir = Files.createTempDirectory(loggedBaseDir, "storeDir-");
// create checkpoint dir so that shouldRestore = false (areSameDir == true later)
Path storeCheckpointDir = Files.createTempDirectory(loggedBaseDir, "storeDir-" + checkpointId + "-");
// create a dummy file to verify after dir rename.
Path tempFile = Files.createTempFile(storeCheckpointDir, "tempFile-", null);
StorageConfig storageConfig = mock(StorageConfig.class);
StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
when(storageManagerUtil.getTaskStoreDir(eq(loggedBaseDir.toFile()), eq("storeName"), eq(taskName), eq(TaskMode.Active))).thenReturn(storeDir.toFile());
when(storageManagerUtil.getStoreCheckpointDir(any(File.class), eq(checkpointId))).thenReturn(storeCheckpointDir.toString());
when(storageManagerUtil.getTaskStoreCheckpointDirs(any(File.class), anyString(), any(TaskName.class), any(TaskMode.class))).thenReturn(ImmutableList.of(storeCheckpointDir.toFile()));
BlobStoreUtil blobStoreUtil = mock(BlobStoreUtil.class);
DirDiffUtil dirDiffUtil = mock(DirDiffUtil.class);
// ensures shouldRestore is not called
when(dirDiffUtil.areSameDir(anySet(), anyBoolean())).thenReturn((arg1, arg2) -> true);
// return immediately without restoring.
when(blobStoreUtil.restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
BlobStoreRestoreManager.restoreStores(jobName, jobId, taskName, storesToRestore, prevStoreSnapshotIndexes, loggedBaseDir.toFile(), storageConfig, metrics, storageManagerUtil, blobStoreUtil, dirDiffUtil, EXECUTOR);
// verify that the store directory restore was not called (should have restored from checkpoint dir)
verify(blobStoreUtil, times(0)).restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class));
// verify that the checkpoint dir was renamed to store dir
assertFalse(storeCheckpointDir.toFile().exists());
assertTrue(storeDir.toFile().exists());
assertTrue(Files.exists(Paths.get(storeDir.toString(), tempFile.getFileName().toString())));
}
Aggregations