use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.
the class TestBlobStoreUtil method testGetCheckpointIndexIgnoresStoresNotInStoresToBackupRestoreSet.
@Test
public void testGetCheckpointIndexIgnoresStoresNotInStoresToBackupRestoreSet() {
String store = "storeName1";
String anotherStore = "storeName2";
String oneMoreStore = "storeName3";
SnapshotIndex mockStoreSnapshotIndex = mock(SnapshotIndex.class);
Set<String> storesToBackupOrRestore = ImmutableSet.of(store, anotherStore);
CheckpointV2 checkpoint = createCheckpointV2(BlobStoreStateBackendFactory.class.getName(), ImmutableMap.of(store, "1", anotherStore, "2", oneMoreStore, "3"));
BlobStoreUtil mockBlobStoreUtil = mock(BlobStoreUtil.class);
when(mockBlobStoreUtil.getSnapshotIndex(any(String.class), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(mockStoreSnapshotIndex));
when(mockBlobStoreUtil.getStoreSnapshotIndexes(anyString(), anyString(), anyString(), any(Checkpoint.class), anySetOf(String.class))).thenCallRealMethod();
Map<String, Pair<String, SnapshotIndex>> snapshotIndexes = mockBlobStoreUtil.getStoreSnapshotIndexes("testJobName", "testJobId", "taskName", checkpoint, storesToBackupOrRestore);
verify(mockBlobStoreUtil, times(storesToBackupOrRestore.size())).getSnapshotIndex(anyString(), any(Metadata.class));
}
use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.
the class BlobStoreUtil method getStoreSnapshotIndexes.
/**
* Get the blob id of {@link SnapshotIndex} and {@link SnapshotIndex}es for the provided {@code task}
* in the provided {@code checkpoint}.
* @param jobName job name is used to build request metadata
* @param jobId job id is used to build request metadata
* @param taskName task name to get the store state checkpoint markers and snapshot indexes for
* @param checkpoint {@link Checkpoint} instance to get the store state checkpoint markers from. Only
* {@link CheckpointV2} and newer are supported for blob stores.
* @param storesToBackupOrRestore set of store names to be backed up or restored
* @return Map of store name to its blob id of snapshot indices and their corresponding snapshot indices for the task.
*/
public Map<String, Pair<String, SnapshotIndex>> getStoreSnapshotIndexes(String jobName, String jobId, String taskName, Checkpoint checkpoint, Set<String> storesToBackupOrRestore) {
// TODO MED shesharma document error handling (checkpoint ver, blob not found, getBlob)
if (checkpoint == null) {
LOG.debug("No previous checkpoint found for taskName: {}", taskName);
return ImmutableMap.of();
}
if (checkpoint.getVersion() == 1) {
LOG.warn("Checkpoint version 1 is not supported for blob store backup and restore.");
return ImmutableMap.of();
}
Map<String, CompletableFuture<Pair<String, SnapshotIndex>>> storeSnapshotIndexFutures = new HashMap<>();
CheckpointV2 checkpointV2 = (CheckpointV2) checkpoint;
Map<String, Map<String, String>> factoryToStoreSCMs = checkpointV2.getStateCheckpointMarkers();
Map<String, String> storeSnapshotIndexBlobIds = factoryToStoreSCMs.get(BlobStoreStateBackendFactory.class.getName());
if (storeSnapshotIndexBlobIds != null) {
storeSnapshotIndexBlobIds.forEach((storeName, snapshotIndexBlobId) -> {
if (storesToBackupOrRestore.contains(storeName)) {
try {
LOG.debug("Getting snapshot index for taskName: {} store: {} blobId: {}", taskName, storeName, snapshotIndexBlobId);
Metadata requestMetadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.empty(), jobName, jobId, taskName, storeName);
CompletableFuture<SnapshotIndex> snapshotIndexFuture = getSnapshotIndex(snapshotIndexBlobId, requestMetadata).toCompletableFuture();
Pair<CompletableFuture<String>, CompletableFuture<SnapshotIndex>> pairOfFutures = Pair.of(CompletableFuture.completedFuture(snapshotIndexBlobId), snapshotIndexFuture);
// save the future and block once in the end instead of blocking for each request.
storeSnapshotIndexFutures.put(storeName, FutureUtil.toFutureOfPair(pairOfFutures));
} catch (Exception e) {
throw new SamzaException(String.format("Error getting SnapshotIndex for blobId: %s for taskName: %s store: %s", snapshotIndexBlobId, taskName, storeName), e);
}
} else {
LOG.debug("SnapshotIndex blob id {} for store {} is not present in the set of stores to be backed up/restores: {}", snapshotIndexBlobId, storeName, storesToBackupOrRestore);
}
});
} else {
LOG.debug("No store SCMs found for blob store state backend in for taskName: {} in checkpoint {}", taskName, checkpointV2.getCheckpointId());
}
try {
return FutureUtil.toFutureOfMap(t -> {
Throwable unwrappedException = FutureUtil.unwrapExceptions(CompletionException.class, t);
if (unwrappedException instanceof DeletedException) {
LOG.warn("Ignoring already deleted snapshot index for taskName: {}", taskName, t);
return true;
} else {
return false;
}
}, storeSnapshotIndexFutures).join();
} catch (Exception e) {
throw new SamzaException(String.format("Error while waiting to get store snapshot indexes for task %s", taskName), e);
}
}
use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.
the class BlobStoreBackupManager method cleanUp.
/**
* Clean up would be called at the end of every commit as well as on a container start/restart.
* Clean up involves the following steps:
* 1. Remove TTL of the snapshot index blob and for any associated files and sub-dirs marked for retention.
* 2. Delete the files/subdirs marked for deletion in the snapshot index.
* 3. Delete the remote {@link SnapshotIndex} blob for the previous checkpoint.
* @param checkpointId the {@link CheckpointId} of the last successfully committed checkpoint.
* @param storeSCMs store name to state checkpoint markers for the last successfully committed checkpoint
*/
@Override
public CompletableFuture<Void> cleanUp(CheckpointId checkpointId, Map<String, String> storeSCMs) {
long startTime = System.nanoTime();
List<CompletionStage<Void>> removeTTLFutures = new ArrayList<>();
List<CompletionStage<Void>> cleanupRemoteSnapshotFutures = new ArrayList<>();
List<CompletionStage<Void>> removePrevRemoteSnapshotFutures = new ArrayList<>();
// SCM, in case of blob store backup and restore, is just the blob id of SnapshotIndex representing the remote snapshot
storeSCMs.forEach((storeName, snapshotIndexBlobId) -> {
// Only perform cleanup for stores configured with BlobStore State Backend Factory
if (storesToBackup.contains(storeName)) {
Metadata requestMetadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.empty(), jobName, jobId, taskName, storeName);
CompletionStage<SnapshotIndex> snapshotIndexFuture = blobStoreUtil.getSnapshotIndex(snapshotIndexBlobId, requestMetadata);
// 1. remove TTL of index blob and all of its files and sub-dirs marked for retention
CompletionStage<Void> removeTTLFuture = snapshotIndexFuture.thenComposeAsync(snapshotIndex -> {
LOG.debug("Removing TTL for index blob: {} and all of its files and sub-dirs for task: {} store :{}", snapshotIndexBlobId, taskName, storeName);
return blobStoreUtil.removeTTL(snapshotIndexBlobId, snapshotIndex, requestMetadata);
}, executor);
removeTTLFutures.add(removeTTLFuture);
// 2. delete the files/subdirs marked for deletion in the snapshot index.
CompletionStage<Void> cleanupRemoteSnapshotFuture = snapshotIndexFuture.thenComposeAsync(snapshotIndex -> {
LOG.debug("Deleting files and dirs to remove for current index blob: {} for task: {} store: {}", snapshotIndexBlobId, taskName, storeName);
return blobStoreUtil.cleanUpDir(snapshotIndex.getDirIndex(), requestMetadata);
}, executor);
cleanupRemoteSnapshotFutures.add(cleanupRemoteSnapshotFuture);
// 3. delete the remote {@link SnapshotIndex} blob for the previous checkpoint.
CompletionStage<Void> removePrevRemoteSnapshotFuture = snapshotIndexFuture.thenComposeAsync(snapshotIndex -> {
if (snapshotIndex.getPrevSnapshotIndexBlobId().isPresent()) {
String blobId = snapshotIndex.getPrevSnapshotIndexBlobId().get();
LOG.debug("Removing previous snapshot index blob: {} from blob store for task: {} store: {}.", blobId, taskName, storeName);
return blobStoreUtil.deleteSnapshotIndexBlob(blobId, requestMetadata);
} else {
// complete future immediately. There are no previous snapshots index blobs to delete.
return CompletableFuture.completedFuture(null);
}
}, executor);
removePrevRemoteSnapshotFutures.add(removePrevRemoteSnapshotFuture);
}
});
return FutureUtil.allOf(removeTTLFutures, cleanupRemoteSnapshotFutures, removePrevRemoteSnapshotFutures).whenComplete((res, ex) -> metrics.cleanupNs.update(System.nanoTime() - startTime));
}
use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.
the class TestBlobStoreUtil method testGetSSIThrowsExceptionIfAnyNonIgnoredAsyncBlobStoreErrors.
@Test
public void testGetSSIThrowsExceptionIfAnyNonIgnoredAsyncBlobStoreErrors() {
String store = "storeName1";
String otherStore = "storeName2";
Set<String> storesToBackupOrRestore = new HashSet<>();
storesToBackupOrRestore.add(store);
storesToBackupOrRestore.add(otherStore);
Checkpoint checkpoint = createCheckpointV2(BlobStoreStateBackendFactory.class.getName(), ImmutableMap.of(store, "snapshotIndexBlobId1", otherStore, "snapshotIndexBlobId2"));
SnapshotIndex store1SnapshotIndex = mock(SnapshotIndex.class);
BlobStoreUtil mockBlobStoreUtil = mock(BlobStoreUtil.class);
when(mockBlobStoreUtil.getStoreSnapshotIndexes(anyString(), anyString(), anyString(), any(Checkpoint.class), anySetOf(String.class))).thenCallRealMethod();
RuntimeException nonIgnoredException = new RuntimeException();
CompletableFuture<SnapshotIndex> failedFuture = FutureUtil.failedFuture(nonIgnoredException);
when(mockBlobStoreUtil.getSnapshotIndex(eq("snapshotIndexBlobId1"), any(Metadata.class))).thenReturn(// should fail even if some errors are ignored
FutureUtil.failedFuture(new DeletedException()));
when(mockBlobStoreUtil.getSnapshotIndex(eq("snapshotIndexBlobId2"), any(Metadata.class))).thenReturn(failedFuture);
try {
mockBlobStoreUtil.getStoreSnapshotIndexes("testJobName", "testJobId", "taskName", checkpoint, storesToBackupOrRestore);
fail("Should have thrown an exception");
} catch (Exception e) {
Throwable cause = FutureUtil.unwrapExceptions(CompletionException.class, FutureUtil.unwrapExceptions(SamzaException.class, e));
assertEquals(nonIgnoredException, cause);
}
}
use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.
the class TestBlobStoreUtil method testGetSSIReturnsCorrectSCMSnapshotIndexPair.
@Test
public void testGetSSIReturnsCorrectSCMSnapshotIndexPair() {
String storeName = "storeName";
String otherStoreName = "otherStoreName";
Set<String> storesToBackupOrRestore = ImmutableSet.of(storeName, otherStoreName);
String storeSnapshotIndexBlobId = "snapshotIndexBlobId";
String otherStoreSnapshotIndexBlobId = "otherSnapshotIndexBlobId";
SnapshotIndex mockStoreSnapshotIndex = mock(SnapshotIndex.class);
SnapshotIndex mockOtherStooreSnapshotIndex = mock(SnapshotIndex.class);
CheckpointV2 checkpoint = createCheckpointV2(BlobStoreStateBackendFactory.class.getName(), ImmutableMap.of(storeName, storeSnapshotIndexBlobId, otherStoreName, otherStoreSnapshotIndexBlobId));
BlobStoreUtil mockBlobStoreUtil = mock(BlobStoreUtil.class);
when(mockBlobStoreUtil.getSnapshotIndex(eq(storeSnapshotIndexBlobId), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(mockStoreSnapshotIndex));
when(mockBlobStoreUtil.getSnapshotIndex(eq(otherStoreSnapshotIndexBlobId), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(mockOtherStooreSnapshotIndex));
when(mockBlobStoreUtil.getStoreSnapshotIndexes(anyString(), anyString(), anyString(), any(Checkpoint.class), anySetOf(String.class))).thenCallRealMethod();
Map<String, Pair<String, SnapshotIndex>> snapshotIndexes = mockBlobStoreUtil.getStoreSnapshotIndexes("testJobName", "testJobId", "taskName", checkpoint, storesToBackupOrRestore);
assertEquals(storeSnapshotIndexBlobId, snapshotIndexes.get(storeName).getKey());
assertEquals(mockStoreSnapshotIndex, snapshotIndexes.get(storeName).getValue());
assertEquals(otherStoreSnapshotIndexBlobId, snapshotIndexes.get(otherStoreName).getKey());
assertEquals(mockOtherStooreSnapshotIndex, snapshotIndexes.get(otherStoreName).getValue());
verify(mockBlobStoreUtil, times(2)).getSnapshotIndex(anyString(), any(Metadata.class));
}
Aggregations