Search in sources :

Example 6 with SnapshotIndex

use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.

the class TestBlobStoreBackupManager method setupRemoteAndLocalSnapshots.

private Map<String, Pair<String, SnapshotIndex>> setupRemoteAndLocalSnapshots(boolean addPrevCheckpoints) throws IOException {
    // reset blob store
    testBlobStore = new HashMap<>();
    Map<String, Pair<String, SnapshotIndex>> indexBlobIdAndRemoteAndLocalSnapshotMap = new HashMap<>();
    List<String> localSnapshots = new ArrayList<>();
    List<String> previousRemoteSnapshots = new ArrayList<>();
    localSnapshots.add("[a, c, z/1, y/2, p/m/3, q/n/4]");
    previousRemoteSnapshots.add("[a, b, z/1, x/5, p/m/3, r/o/6]");
    localSnapshots.add("[a, c, z/1, y/1, p/m/1, q/n/1]");
    previousRemoteSnapshots.add("[a, z/1, p/m/1]");
    localSnapshots.add("[z/i/1, y/j/1]");
    previousRemoteSnapshots.add("[z/i/1, x/k/1]");
    // setup local and corresponding remote snapshots
    for (int i = 0; i < localSnapshots.size(); i++) {
        Path localSnapshot = BlobStoreTestUtil.createLocalDir(localSnapshots.get(i));
        String testLocalSnapshot = localSnapshot.toAbsolutePath().toString();
        DirIndex dirIndex = BlobStoreTestUtil.createDirIndex(localSnapshots.get(i));
        SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, testLocalSnapshot);
        Optional<String> prevCheckpointId = Optional.empty();
        if (addPrevCheckpoints) {
            prevCheckpointId = Optional.of(prevSnapshotIndexBlobId + "-" + i);
            DirIndex prevDirIndex = BlobStoreTestUtil.createDirIndex(previousRemoteSnapshots.get(i));
            testBlobStore.put(prevCheckpointId.get(), new SnapshotIndex(clock.currentTimeMillis(), snapshotMetadata, prevDirIndex, Optional.empty()));
        }
        SnapshotIndex testRemoteSnapshot = new SnapshotIndex(clock.currentTimeMillis(), snapshotMetadata, dirIndex, prevCheckpointId);
        indexBlobIdAndRemoteAndLocalSnapshotMap.put("blobId-" + i, Pair.of(testLocalSnapshot, testRemoteSnapshot));
        testBlobStore.put("blobId-" + i, testRemoteSnapshot);
    }
    return indexBlobIdAndRemoteAndLocalSnapshotMap;
}
Also used : Path(java.nio.file.Path) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Checkpoint(org.apache.samza.checkpoint.Checkpoint) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) Pair(org.apache.commons.lang3.tuple.Pair)

Example 7 with SnapshotIndex

use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.

the class BlobStoreRestoreManager method deleteUnusedStoresFromBlobStore.

/**
 * Deletes blob store contents for stores that were present in the last checkpoint but are either no longer
 * present in job configs (removed by user since last deployment) or are no longer configured to be backed
 * up using blob stores.
 *
 * This method blocks until all the necessary store contents and snapshot index blobs have been marked for deletion.
 */
@VisibleForTesting
static void deleteUnusedStoresFromBlobStore(String jobName, String jobId, String taskName, StorageConfig storageConfig, BlobStoreConfig blobStoreConfig, Map<String, Pair<String, SnapshotIndex>> initialStoreSnapshotIndexes, BlobStoreUtil blobStoreUtil, ExecutorService executor) {
    List<String> storesToBackup = storageConfig.getStoresWithBackupFactory(BlobStoreStateBackendFactory.class.getName());
    List<String> storesToRestore = storageConfig.getStoresWithRestoreFactory(BlobStoreStateBackendFactory.class.getName());
    List<CompletionStage<Void>> storeDeletionFutures = new ArrayList<>();
    initialStoreSnapshotIndexes.forEach((storeName, scmAndSnapshotIndex) -> {
        if (!storesToBackup.contains(storeName) && !storesToRestore.contains(storeName)) {
            LOG.debug("Removing task: {} store: {} from blob store. It is either no longer used, " + "or is no longer configured to be backed up or restored with blob store.", taskName, storeName);
            DirIndex dirIndex = scmAndSnapshotIndex.getRight().getDirIndex();
            Metadata requestMetadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.empty(), jobName, jobId, taskName, storeName);
            CompletionStage<Void> storeDeletionFuture = // delete files and sub-dirs previously marked for removal
            blobStoreUtil.cleanUpDir(dirIndex, requestMetadata).thenComposeAsync(v -> blobStoreUtil.deleteDir(dirIndex, requestMetadata), // deleted files and dirs still present
            executor).thenComposeAsync(v -> blobStoreUtil.deleteSnapshotIndexBlob(scmAndSnapshotIndex.getLeft(), requestMetadata), // delete the snapshot index blob
            executor);
            storeDeletionFutures.add(storeDeletionFuture);
        }
    });
    FutureUtil.allOf(storeDeletionFutures).join();
}
Also used : BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TaskModel(org.apache.samza.job.model.TaskModel) ArrayList(java.util.ArrayList) FileUtil(org.apache.samza.util.FileUtil) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Path(java.nio.file.Path) ExecutorService(java.util.concurrent.ExecutorService) FutureUtil(org.apache.samza.util.FutureUtil) StorageConfig(org.apache.samza.config.StorageConfig) ImmutableSet(com.google.common.collect.ImmutableSet) TaskName(org.apache.samza.container.TaskName) Logger(org.slf4j.Logger) BlobStoreUtil(org.apache.samza.storage.blobstore.util.BlobStoreUtil) Files(java.nio.file.Files) StorageManagerUtil(org.apache.samza.storage.StorageManagerUtil) Set(java.util.Set) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Checkpoint(org.apache.samza.checkpoint.Checkpoint) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) List(java.util.List) TaskMode(org.apache.samza.job.model.TaskMode) CompletionStage(java.util.concurrent.CompletionStage) TaskRestoreManager(org.apache.samza.storage.TaskRestoreManager) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Paths(java.nio.file.Paths) Optional(java.util.Optional) DirDiffUtil(org.apache.samza.storage.blobstore.util.DirDiffUtil) VisibleForTesting(com.google.common.annotations.VisibleForTesting) BlobStoreConfig(org.apache.samza.config.BlobStoreConfig) Config(org.apache.samza.config.Config) ArrayList(java.util.ArrayList) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) CompletionStage(java.util.concurrent.CompletionStage) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 8 with SnapshotIndex

use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.

the class BlobStoreRestoreManager method restoreStores.

/**
 * Restores all eligible stores in the task.
 */
@VisibleForTesting
static CompletableFuture<Void> restoreStores(String jobName, String jobId, TaskName taskName, Set<String> storesToRestore, Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes, File loggedBaseDir, StorageConfig storageConfig, BlobStoreRestoreManagerMetrics metrics, StorageManagerUtil storageManagerUtil, BlobStoreUtil blobStoreUtil, DirDiffUtil dirDiffUtil, ExecutorService executor) {
    long restoreStartTime = System.nanoTime();
    List<CompletionStage<Void>> restoreFutures = new ArrayList<>();
    LOG.debug("Starting restore for task: {} stores: {}", taskName, storesToRestore);
    storesToRestore.forEach(storeName -> {
        if (!prevStoreSnapshotIndexes.containsKey(storeName)) {
            LOG.info("No checkpointed snapshot index found for task: {} store: {}. Skipping restore.", taskName, storeName);
            // blob store based backup and restore, both at the same time.
            return;
        }
        Pair<String, SnapshotIndex> scmAndSnapshotIndex = prevStoreSnapshotIndexes.get(storeName);
        long storeRestoreStartTime = System.nanoTime();
        SnapshotIndex snapshotIndex = scmAndSnapshotIndex.getRight();
        DirIndex dirIndex = snapshotIndex.getDirIndex();
        DirIndex.Stats stats = DirIndex.getStats(dirIndex);
        metrics.filesToRestore.getValue().addAndGet(stats.filesPresent);
        metrics.bytesToRestore.getValue().addAndGet(stats.bytesPresent);
        metrics.filesRemaining.getValue().addAndGet(stats.filesPresent);
        metrics.bytesRemaining.getValue().addAndGet(stats.bytesPresent);
        CheckpointId checkpointId = snapshotIndex.getSnapshotMetadata().getCheckpointId();
        File storeDir = storageManagerUtil.getTaskStoreDir(loggedBaseDir, storeName, taskName, TaskMode.Active);
        Path storeCheckpointDir = Paths.get(storageManagerUtil.getStoreCheckpointDir(storeDir, checkpointId));
        LOG.trace("Got task: {} store: {} local store directory: {} and local store checkpoint directory: {}", taskName, storeName, storeDir, storeCheckpointDir);
        // we always delete the store dir to preserve transactional state guarantees.
        try {
            LOG.debug("Deleting local store directory: {}. Will be restored from local store checkpoint directory " + "or remote snapshot.", storeDir);
            FileUtils.deleteDirectory(storeDir);
        } catch (IOException e) {
            throw new SamzaException(String.format("Error deleting store directory: %s", storeDir), e);
        }
        boolean shouldRestore = shouldRestore(taskName.getTaskName(), storeName, dirIndex, storeCheckpointDir, storageConfig, dirDiffUtil);
        if (shouldRestore) {
            // restore the store from the remote blob store
            // delete all store checkpoint directories. if we only delete the store directory and don't
            // delete the checkpoint directories, the store size on disk will grow to 2x after restore
            // until the first commit is completed and older checkpoint dirs are deleted. This is
            // because the hard-linked checkpoint dir files will no longer be de-duped with the
            // now-deleted main store directory contents and will take up additional space of their
            // own during the restore.
            deleteCheckpointDirs(taskName, storeName, loggedBaseDir, storageManagerUtil);
            metrics.storePreRestoreNs.get(storeName).set(System.nanoTime() - storeRestoreStartTime);
            enqueueRestore(jobName, jobId, taskName.toString(), storeName, storeDir, dirIndex, storeRestoreStartTime, restoreFutures, blobStoreUtil, dirDiffUtil, metrics, executor);
        } else {
            LOG.debug("Renaming store checkpoint directory: {} to store directory: {} since its contents are identical " + "to the remote snapshot.", storeCheckpointDir, storeDir);
            // atomically rename the checkpoint dir to the store dir
            new FileUtil().move(storeCheckpointDir.toFile(), storeDir);
            // delete any other checkpoint dirs.
            deleteCheckpointDirs(taskName, storeName, loggedBaseDir, storageManagerUtil);
        }
    });
    // wait for all restores to finish
    return FutureUtil.allOf(restoreFutures).whenComplete((res, ex) -> {
        LOG.info("Restore completed for task: {} stores", taskName);
        metrics.restoreNs.set(System.nanoTime() - restoreStartTime);
    });
}
Also used : Path(java.nio.file.Path) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) ArrayList(java.util.ArrayList) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) File(java.io.File) FileUtil(org.apache.samza.util.FileUtil) CompletionStage(java.util.concurrent.CompletionStage) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 9 with SnapshotIndex

use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.

the class BlobStoreBackupManager method upload.

@Override
public CompletableFuture<Map<String, String>> upload(CheckpointId checkpointId, Map<String, String> storeSCMs) {
    long uploadStartTime = System.nanoTime();
    // reset gauges for each upload
    metrics.filesToUpload.getValue().set(0L);
    metrics.bytesToUpload.getValue().set(0L);
    metrics.filesUploaded.getValue().set(0L);
    metrics.bytesUploaded.getValue().set(0L);
    metrics.filesRemaining.getValue().set(0L);
    metrics.bytesRemaining.getValue().set(0L);
    metrics.filesToRetain.getValue().set(0L);
    metrics.bytesToRetain.getValue().set(0L);
    // This map is used to atomically replace the prevStoreSnapshotIndexesFuture map at the end of the task commit
    Map<String, CompletableFuture<Pair<String, SnapshotIndex>>> storeToSCMAndSnapshotIndexPairFutures = new HashMap<>();
    // This map is used to return serialized State Checkpoint Markers to the caller
    Map<String, CompletableFuture<String>> storeToSerializedSCMFuture = new HashMap<>();
    storesToBackup.forEach((storeName) -> {
        long storeUploadStartTime = System.nanoTime();
        try {
            // metadata for the current store snapshot to upload
            SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
            // get the local store dir corresponding to the current checkpointId
            File storeDir = storageManagerUtil.getTaskStoreDir(loggedStoreBaseDir, storeName, taskModel.getTaskName(), taskModel.getTaskMode());
            String checkpointDirPath = storageManagerUtil.getStoreCheckpointDir(storeDir, checkpointId);
            File checkpointDir = new File(checkpointDirPath);
            LOG.debug("Got task: {} store: {} storeDir: {} and checkpointDir: {}", taskName, storeName, storeDir, checkpointDir);
            // guaranteed to be available since a new task commit may not start until the previous one is complete
            Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = prevStoreSnapshotIndexesFuture.get(0, TimeUnit.MILLISECONDS);
            // get the previous store directory contents
            DirIndex prevDirIndex;
            if (prevStoreSnapshotIndexes.containsKey(storeName)) {
                prevDirIndex = prevStoreSnapshotIndexes.get(storeName).getRight().getDirIndex();
            } else {
                // no previous SnapshotIndex means that this is the first commit for this store. Create an empty DirIndex.
                prevDirIndex = new DirIndex(checkpointDir.getName(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), Collections.emptyList());
            }
            long dirDiffStartTime = System.nanoTime();
            // get the diff between previous and current store directories
            DirDiff dirDiff = DirDiffUtil.getDirDiff(checkpointDir, prevDirIndex, DirDiffUtil.areSameFile(false));
            metrics.storeDirDiffNs.get(storeName).update(System.nanoTime() - dirDiffStartTime);
            DirDiff.Stats stats = DirDiff.getStats(dirDiff);
            updateStoreDiffMetrics(storeName, stats);
            metrics.filesToUpload.getValue().addAndGet(stats.filesAdded);
            metrics.bytesToUpload.getValue().addAndGet(stats.bytesAdded);
            // Note: FilesRemaining metric is set to FilesAdded in the beginning of the current upload and then counted down
            // for each upload.
            metrics.filesRemaining.getValue().addAndGet(stats.filesAdded);
            metrics.bytesRemaining.getValue().addAndGet(stats.bytesAdded);
            metrics.filesToRetain.getValue().addAndGet(stats.filesRetained);
            metrics.bytesToRetain.getValue().addAndGet(stats.bytesRetained);
            // upload the diff to the blob store and get the new directory index
            CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
            CompletionStage<SnapshotIndex> snapshotIndexFuture = dirIndexFuture.thenApplyAsync(dirIndex -> {
                LOG.trace("Dir upload complete. Returning new SnapshotIndex for task: {} store: {}.", taskName, storeName);
                Optional<String> prevSnapshotIndexBlobId = Optional.ofNullable(prevStoreSnapshotIndexes.get(storeName)).map(Pair::getLeft);
                return new SnapshotIndex(clock.currentTimeMillis(), snapshotMetadata, dirIndex, prevSnapshotIndexBlobId);
            }, executor);
            // upload the new snapshot index to the blob store and get its blob id
            CompletionStage<String> snapshotIndexBlobIdFuture = snapshotIndexFuture.thenComposeAsync(si -> {
                LOG.trace("Uploading Snapshot index for task: {} store: {}", taskName, storeName);
                return blobStoreUtil.putSnapshotIndex(si);
            }, executor);
            // save store name and it's SnapshotIndex blob id and SnapshotIndex pair. At the end of the upload, atomically
            // update previous snapshot index map with this.
            CompletableFuture<Pair<String, SnapshotIndex>> scmAndSnapshotIndexPairFuture = FutureUtil.toFutureOfPair(Pair.of(snapshotIndexBlobIdFuture.toCompletableFuture(), snapshotIndexFuture.toCompletableFuture()));
            scmAndSnapshotIndexPairFuture.whenComplete((res, ex) -> {
                long uploadTimeNs = System.nanoTime() - storeUploadStartTime;
                metrics.storeUploadNs.get(storeName).update(uploadTimeNs);
            });
            storeToSCMAndSnapshotIndexPairFutures.put(storeName, scmAndSnapshotIndexPairFuture);
            storeToSerializedSCMFuture.put(storeName, snapshotIndexBlobIdFuture.toCompletableFuture());
        } catch (Exception e) {
            throw new SamzaException(String.format("Error uploading store snapshot to blob store for task: %s, store: %s, checkpointId: %s", taskName, storeName, checkpointId), e);
        }
    });
    // replace the previous storeName to snapshot index mapping with the new mapping.
    this.prevStoreSnapshotIndexesFuture = FutureUtil.toFutureOfMap(storeToSCMAndSnapshotIndexPairFutures);
    return FutureUtil.toFutureOfMap(storeToSerializedSCMFuture).whenComplete((res, ex) -> metrics.uploadNs.update(System.nanoTime() - uploadStartTime));
}
Also used : SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) HashMap(java.util.HashMap) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) SamzaException(org.apache.samza.SamzaException) SamzaException(org.apache.samza.SamzaException) CompletableFuture(java.util.concurrent.CompletableFuture) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) File(java.io.File) Pair(org.apache.commons.lang3.tuple.Pair)

Example 10 with SnapshotIndex

use of org.apache.samza.storage.blobstore.index.SnapshotIndex in project samza by apache.

the class TestBlobStoreUtil method testGetSSISkipsStoresWithSnapshotIndexAlreadyDeleted.

@Test
public void testGetSSISkipsStoresWithSnapshotIndexAlreadyDeleted() {
    String store = "storeName1";
    String otherStore = "storeName2";
    Checkpoint checkpoint = createCheckpointV2(BlobStoreStateBackendFactory.class.getName(), ImmutableMap.of(store, "snapshotIndexBlobId1", otherStore, "snapshotIndexBlobId2"));
    Set<String> storesToBackupOrRestore = new HashSet<>();
    storesToBackupOrRestore.add(store);
    storesToBackupOrRestore.add(otherStore);
    SnapshotIndex store1SnapshotIndex = mock(SnapshotIndex.class);
    BlobStoreUtil mockBlobStoreUtil = mock(BlobStoreUtil.class);
    CompletableFuture<SnapshotIndex> failedFuture = FutureUtil.failedFuture(new DeletedException());
    when(mockBlobStoreUtil.getSnapshotIndex(eq("snapshotIndexBlobId1"), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(store1SnapshotIndex));
    when(mockBlobStoreUtil.getSnapshotIndex(eq("snapshotIndexBlobId2"), any(Metadata.class))).thenReturn(failedFuture);
    when(mockBlobStoreUtil.getStoreSnapshotIndexes(anyString(), anyString(), anyString(), any(Checkpoint.class), anySetOf(String.class))).thenCallRealMethod();
    Map<String, Pair<String, SnapshotIndex>> snapshotIndexes = mockBlobStoreUtil.getStoreSnapshotIndexes("testJobName", "testJobId", "taskName", checkpoint, storesToBackupOrRestore);
    assertEquals(1, snapshotIndexes.size());
    assertEquals("snapshotIndexBlobId1", snapshotIndexes.get("storeName1").getLeft());
    assertEquals(store1SnapshotIndex, snapshotIndexes.get("storeName1").getRight());
}
Also used : BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Checkpoint(org.apache.samza.checkpoint.Checkpoint) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) HashSet(java.util.HashSet) Pair(org.apache.commons.lang3.tuple.Pair) Test(org.junit.Test)

Aggregations

SnapshotIndex (org.apache.samza.storage.blobstore.index.SnapshotIndex)20 SnapshotMetadata (org.apache.samza.storage.blobstore.index.SnapshotMetadata)18 Pair (org.apache.commons.lang3.tuple.Pair)16 DirIndex (org.apache.samza.storage.blobstore.index.DirIndex)15 Test (org.junit.Test)14 File (java.io.File)12 Path (java.nio.file.Path)12 Checkpoint (org.apache.samza.checkpoint.Checkpoint)11 CheckpointId (org.apache.samza.checkpoint.CheckpointId)10 ArrayList (java.util.ArrayList)9 SamzaException (org.apache.samza.SamzaException)9 IOException (java.io.IOException)8 HashMap (java.util.HashMap)8 CompletionStage (java.util.concurrent.CompletionStage)8 TaskName (org.apache.samza.container.TaskName)8 StorageManagerUtil (org.apache.samza.storage.StorageManagerUtil)8 CompletableFuture (java.util.concurrent.CompletableFuture)7 CheckpointV2 (org.apache.samza.checkpoint.CheckpointV2)7 Files (java.nio.file.Files)6 List (java.util.List)6