Search in sources :

Example 11 with SnapshotIndex

use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.

the class TestBlobStoreUtil method testGetCheckpointIndexIgnoresStoresNotInStoresToBackupRestoreSet.

@Test
public void testGetCheckpointIndexIgnoresStoresNotInStoresToBackupRestoreSet() {
    String store = "storeName1";
    String anotherStore = "storeName2";
    String oneMoreStore = "storeName3";
    SnapshotIndex mockStoreSnapshotIndex = mock(SnapshotIndex.class);
    Set<String> storesToBackupOrRestore = ImmutableSet.of(store, anotherStore);
    CheckpointV2 checkpoint = createCheckpointV2(BlobStoreStateBackendFactory.class.getName(), ImmutableMap.of(store, "1", anotherStore, "2", oneMoreStore, "3"));
    BlobStoreUtil mockBlobStoreUtil = mock(BlobStoreUtil.class);
    when(mockBlobStoreUtil.getSnapshotIndex(any(String.class), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(mockStoreSnapshotIndex));
    when(mockBlobStoreUtil.getStoreSnapshotIndexes(anyString(), anyString(), anyString(), any(Checkpoint.class), anySetOf(String.class))).thenCallRealMethod();
    Map<String, Pair<String, SnapshotIndex>> snapshotIndexes = mockBlobStoreUtil.getStoreSnapshotIndexes("testJobName", "testJobId", "taskName", checkpoint, storesToBackupOrRestore);
    verify(mockBlobStoreUtil, times(storesToBackupOrRestore.size())).getSnapshotIndex(anyString(), any(Metadata.class));
}
Also used : CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Checkpoint(org.apache.samza.checkpoint.Checkpoint) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) Pair(org.apache.commons.lang3.tuple.Pair) Test(org.junit.Test)

Example 12 with SnapshotIndex

use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.

the class BlobStoreUtil method getStoreSnapshotIndexes.

/**
 * Get the blob id of {@link SnapshotIndex} and {@link SnapshotIndex}es for the provided {@code task}
 * in the provided {@code checkpoint}.
 * @param jobName job name is used to build request metadata
 * @param jobId job id is used to build request metadata
 * @param taskName task name to get the store state checkpoint markers and snapshot indexes for
 * @param checkpoint {@link Checkpoint} instance to get the store state checkpoint markers from. Only
 *                   {@link CheckpointV2} and newer are supported for blob stores.
 * @param storesToBackupOrRestore set of store names to be backed up or restored
 * @return Map of store name to its blob id of snapshot indices and their corresponding snapshot indices for the task.
 */
public Map<String, Pair<String, SnapshotIndex>> getStoreSnapshotIndexes(String jobName, String jobId, String taskName, Checkpoint checkpoint, Set<String> storesToBackupOrRestore) {
    // TODO MED shesharma document error handling (checkpoint ver, blob not found, getBlob)
    if (checkpoint == null) {
        LOG.debug("No previous checkpoint found for taskName: {}", taskName);
        return ImmutableMap.of();
    }
    if (checkpoint.getVersion() == 1) {
        LOG.warn("Checkpoint version 1 is not supported for blob store backup and restore.");
        return ImmutableMap.of();
    }
    Map<String, CompletableFuture<Pair<String, SnapshotIndex>>> storeSnapshotIndexFutures = new HashMap<>();
    CheckpointV2 checkpointV2 = (CheckpointV2) checkpoint;
    Map<String, Map<String, String>> factoryToStoreSCMs = checkpointV2.getStateCheckpointMarkers();
    Map<String, String> storeSnapshotIndexBlobIds = factoryToStoreSCMs.get(BlobStoreStateBackendFactory.class.getName());
    if (storeSnapshotIndexBlobIds != null) {
        storeSnapshotIndexBlobIds.forEach((storeName, snapshotIndexBlobId) -> {
            if (storesToBackupOrRestore.contains(storeName)) {
                try {
                    LOG.debug("Getting snapshot index for taskName: {} store: {} blobId: {}", taskName, storeName, snapshotIndexBlobId);
                    Metadata requestMetadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.empty(), jobName, jobId, taskName, storeName);
                    CompletableFuture<SnapshotIndex> snapshotIndexFuture = getSnapshotIndex(snapshotIndexBlobId, requestMetadata).toCompletableFuture();
                    Pair<CompletableFuture<String>, CompletableFuture<SnapshotIndex>> pairOfFutures = Pair.of(CompletableFuture.completedFuture(snapshotIndexBlobId), snapshotIndexFuture);
                    // save the future and block once in the end instead of blocking for each request.
                    storeSnapshotIndexFutures.put(storeName, FutureUtil.toFutureOfPair(pairOfFutures));
                } catch (Exception e) {
                    throw new SamzaException(String.format("Error getting SnapshotIndex for blobId: %s for taskName: %s store: %s", snapshotIndexBlobId, taskName, storeName), e);
                }
            } else {
                LOG.debug("SnapshotIndex blob id {} for store {} is not present in the set of stores to be backed up/restores: {}", snapshotIndexBlobId, storeName, storesToBackupOrRestore);
            }
        });
    } else {
        LOG.debug("No store SCMs found for blob store state backend in for taskName: {} in checkpoint {}", taskName, checkpointV2.getCheckpointId());
    }
    try {
        return FutureUtil.toFutureOfMap(t -> {
            Throwable unwrappedException = FutureUtil.unwrapExceptions(CompletionException.class, t);
            if (unwrappedException instanceof DeletedException) {
                LOG.warn("Ignoring already deleted snapshot index for taskName: {}", taskName, t);
                return true;
            } else {
                return false;
            }
        }, storeSnapshotIndexFutures).join();
    } catch (Exception e) {
        throw new SamzaException(String.format("Error while waiting to get store snapshot indexes for task %s", taskName), e);
    }
}
Also used : CheckedInputStream(java.util.zip.CheckedInputStream) BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) LoggerFactory(org.slf4j.LoggerFactory) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) StringUtils(org.apache.commons.lang3.StringUtils) SnapshotIndexSerde(org.apache.samza.storage.blobstore.index.serde.SnapshotIndexSerde) ByteArrayInputStream(java.io.ByteArrayInputStream) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Collectors(java.util.stream.Collectors) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) BlobStoreBackupManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreBackupManagerMetrics) Optional(java.util.Optional) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Logger(org.slf4j.Logger) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) Paths(java.nio.file.Paths) CRC32(java.util.zip.CRC32) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) HashMap(java.util.HashMap) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) SamzaException(org.apache.samza.SamzaException) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) CompletionException(java.util.concurrent.CompletionException) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) CompletableFuture(java.util.concurrent.CompletableFuture) CompletionException(java.util.concurrent.CompletionException) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 13 with SnapshotIndex

use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.

the class BlobStoreBackupManager method cleanUp.

/**
 * Clean up would be called at the end of every commit as well as on a container start/restart.
 * Clean up involves the following steps:
 * 1. Remove TTL of the snapshot index blob and for any associated files and sub-dirs marked for retention.
 * 2. Delete the files/subdirs marked for deletion in the snapshot index.
 * 3. Delete the remote {@link SnapshotIndex} blob for the previous checkpoint.
 * @param checkpointId the {@link CheckpointId} of the last successfully committed checkpoint.
 * @param storeSCMs store name to state checkpoint markers for the last successfully committed checkpoint
 */
@Override
public CompletableFuture<Void> cleanUp(CheckpointId checkpointId, Map<String, String> storeSCMs) {
    long startTime = System.nanoTime();
    List<CompletionStage<Void>> removeTTLFutures = new ArrayList<>();
    List<CompletionStage<Void>> cleanupRemoteSnapshotFutures = new ArrayList<>();
    List<CompletionStage<Void>> removePrevRemoteSnapshotFutures = new ArrayList<>();
    // SCM, in case of blob store backup and restore, is just the blob id of SnapshotIndex representing the remote snapshot
    storeSCMs.forEach((storeName, snapshotIndexBlobId) -> {
        // Only perform cleanup for stores configured with BlobStore State Backend Factory
        if (storesToBackup.contains(storeName)) {
            Metadata requestMetadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.empty(), jobName, jobId, taskName, storeName);
            CompletionStage<SnapshotIndex> snapshotIndexFuture = blobStoreUtil.getSnapshotIndex(snapshotIndexBlobId, requestMetadata);
            // 1. remove TTL of index blob and all of its files and sub-dirs marked for retention
            CompletionStage<Void> removeTTLFuture = snapshotIndexFuture.thenComposeAsync(snapshotIndex -> {
                LOG.debug("Removing TTL for index blob: {} and all of its files and sub-dirs for task: {} store :{}", snapshotIndexBlobId, taskName, storeName);
                return blobStoreUtil.removeTTL(snapshotIndexBlobId, snapshotIndex, requestMetadata);
            }, executor);
            removeTTLFutures.add(removeTTLFuture);
            // 2. delete the files/subdirs marked for deletion in the snapshot index.
            CompletionStage<Void> cleanupRemoteSnapshotFuture = snapshotIndexFuture.thenComposeAsync(snapshotIndex -> {
                LOG.debug("Deleting files and dirs to remove for current index blob: {} for task: {} store: {}", snapshotIndexBlobId, taskName, storeName);
                return blobStoreUtil.cleanUpDir(snapshotIndex.getDirIndex(), requestMetadata);
            }, executor);
            cleanupRemoteSnapshotFutures.add(cleanupRemoteSnapshotFuture);
            // 3. delete the remote {@link SnapshotIndex} blob for the previous checkpoint.
            CompletionStage<Void> removePrevRemoteSnapshotFuture = snapshotIndexFuture.thenComposeAsync(snapshotIndex -> {
                if (snapshotIndex.getPrevSnapshotIndexBlobId().isPresent()) {
                    String blobId = snapshotIndex.getPrevSnapshotIndexBlobId().get();
                    LOG.debug("Removing previous snapshot index blob: {} from blob store for task: {} store: {}.", blobId, taskName, storeName);
                    return blobStoreUtil.deleteSnapshotIndexBlob(blobId, requestMetadata);
                } else {
                    // complete future immediately. There are no previous snapshots index blobs to delete.
                    return CompletableFuture.completedFuture(null);
                }
            }, executor);
            removePrevRemoteSnapshotFutures.add(removePrevRemoteSnapshotFuture);
        }
    });
    return FutureUtil.allOf(removeTTLFutures, cleanupRemoteSnapshotFutures, removePrevRemoteSnapshotFutures).whenComplete((res, ex) -> metrics.cleanupNs.update(System.nanoTime() - startTime));
}
Also used : SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) ArrayList(java.util.ArrayList) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) CompletionStage(java.util.concurrent.CompletionStage)

Example 14 with SnapshotIndex

use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.

the class TestBlobStoreUtil method testGetSSIThrowsExceptionIfAnyNonIgnoredAsyncBlobStoreErrors.

@Test
public void testGetSSIThrowsExceptionIfAnyNonIgnoredAsyncBlobStoreErrors() {
    String store = "storeName1";
    String otherStore = "storeName2";
    Set<String> storesToBackupOrRestore = new HashSet<>();
    storesToBackupOrRestore.add(store);
    storesToBackupOrRestore.add(otherStore);
    Checkpoint checkpoint = createCheckpointV2(BlobStoreStateBackendFactory.class.getName(), ImmutableMap.of(store, "snapshotIndexBlobId1", otherStore, "snapshotIndexBlobId2"));
    SnapshotIndex store1SnapshotIndex = mock(SnapshotIndex.class);
    BlobStoreUtil mockBlobStoreUtil = mock(BlobStoreUtil.class);
    when(mockBlobStoreUtil.getStoreSnapshotIndexes(anyString(), anyString(), anyString(), any(Checkpoint.class), anySetOf(String.class))).thenCallRealMethod();
    RuntimeException nonIgnoredException = new RuntimeException();
    CompletableFuture<SnapshotIndex> failedFuture = FutureUtil.failedFuture(nonIgnoredException);
    when(mockBlobStoreUtil.getSnapshotIndex(eq("snapshotIndexBlobId1"), any(Metadata.class))).thenReturn(// should fail even if some errors are ignored
    FutureUtil.failedFuture(new DeletedException()));
    when(mockBlobStoreUtil.getSnapshotIndex(eq("snapshotIndexBlobId2"), any(Metadata.class))).thenReturn(failedFuture);
    try {
        mockBlobStoreUtil.getStoreSnapshotIndexes("testJobName", "testJobId", "taskName", checkpoint, storesToBackupOrRestore);
        fail("Should have thrown an exception");
    } catch (Exception e) {
        Throwable cause = FutureUtil.unwrapExceptions(CompletionException.class, FutureUtil.unwrapExceptions(SamzaException.class, e));
        assertEquals(nonIgnoredException, cause);
    }
}
Also used : SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) TimeoutException(java.util.concurrent.TimeoutException) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) CompletionException(java.util.concurrent.CompletionException) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) ExecutionException(java.util.concurrent.ExecutionException) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) Checkpoint(org.apache.samza.checkpoint.Checkpoint) CompletionException(java.util.concurrent.CompletionException) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 15 with SnapshotIndex

use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.

the class TestBlobStoreUtil method testGetSSIReturnsCorrectSCMSnapshotIndexPair.

@Test
public void testGetSSIReturnsCorrectSCMSnapshotIndexPair() {
    String storeName = "storeName";
    String otherStoreName = "otherStoreName";
    Set<String> storesToBackupOrRestore = ImmutableSet.of(storeName, otherStoreName);
    String storeSnapshotIndexBlobId = "snapshotIndexBlobId";
    String otherStoreSnapshotIndexBlobId = "otherSnapshotIndexBlobId";
    SnapshotIndex mockStoreSnapshotIndex = mock(SnapshotIndex.class);
    SnapshotIndex mockOtherStooreSnapshotIndex = mock(SnapshotIndex.class);
    CheckpointV2 checkpoint = createCheckpointV2(BlobStoreStateBackendFactory.class.getName(), ImmutableMap.of(storeName, storeSnapshotIndexBlobId, otherStoreName, otherStoreSnapshotIndexBlobId));
    BlobStoreUtil mockBlobStoreUtil = mock(BlobStoreUtil.class);
    when(mockBlobStoreUtil.getSnapshotIndex(eq(storeSnapshotIndexBlobId), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(mockStoreSnapshotIndex));
    when(mockBlobStoreUtil.getSnapshotIndex(eq(otherStoreSnapshotIndexBlobId), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(mockOtherStooreSnapshotIndex));
    when(mockBlobStoreUtil.getStoreSnapshotIndexes(anyString(), anyString(), anyString(), any(Checkpoint.class), anySetOf(String.class))).thenCallRealMethod();
    Map<String, Pair<String, SnapshotIndex>> snapshotIndexes = mockBlobStoreUtil.getStoreSnapshotIndexes("testJobName", "testJobId", "taskName", checkpoint, storesToBackupOrRestore);
    assertEquals(storeSnapshotIndexBlobId, snapshotIndexes.get(storeName).getKey());
    assertEquals(mockStoreSnapshotIndex, snapshotIndexes.get(storeName).getValue());
    assertEquals(otherStoreSnapshotIndexBlobId, snapshotIndexes.get(otherStoreName).getKey());
    assertEquals(mockOtherStooreSnapshotIndex, snapshotIndexes.get(otherStoreName).getValue());
    verify(mockBlobStoreUtil, times(2)).getSnapshotIndex(anyString(), any(Metadata.class));
}
Also used : CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Checkpoint(org.apache.samza.checkpoint.Checkpoint) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) Pair(org.apache.commons.lang3.tuple.Pair) Test(org.junit.Test)

Aggregations

SnapshotIndex (org.apache.samza.storage.blobstore.index.SnapshotIndex)20 SnapshotMetadata (org.apache.samza.storage.blobstore.index.SnapshotMetadata)18 Pair (org.apache.commons.lang3.tuple.Pair)16 DirIndex (org.apache.samza.storage.blobstore.index.DirIndex)15 Test (org.junit.Test)14 File (java.io.File)12 Path (java.nio.file.Path)12 Checkpoint (org.apache.samza.checkpoint.Checkpoint)11 CheckpointId (org.apache.samza.checkpoint.CheckpointId)10 ArrayList (java.util.ArrayList)9 SamzaException (org.apache.samza.SamzaException)9 IOException (java.io.IOException)8 HashMap (java.util.HashMap)8 CompletionStage (java.util.concurrent.CompletionStage)8 TaskName (org.apache.samza.container.TaskName)8 StorageManagerUtil (org.apache.samza.storage.StorageManagerUtil)8 CompletableFuture (java.util.concurrent.CompletableFuture)7 CheckpointV2 (org.apache.samza.checkpoint.CheckpointV2)7 Files (java.nio.file.Files)6 List (java.util.List)6