Search in sources :

Example 11 with Metadata

use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.

the class TestBlobStoreUtil method testPutFileChecksumAndMetadata.

@Test
public void testPutFileChecksumAndMetadata() throws IOException, ExecutionException, InterruptedException {
    // Setup
    SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
    Path path = Files.createTempFile("samza-testPutFileChecksum-", ".tmp");
    FileUtil fileUtil = new FileUtil();
    fileUtil.writeToTextFile(path.toFile(), RandomStringUtils.random(1000), false);
    long expectedChecksum = FileUtils.checksumCRC32(path.toFile());
    BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
    ArgumentCaptor<Metadata> argumentCaptor = ArgumentCaptor.forClass(Metadata.class);
    when(blobStoreManager.put(any(InputStream.class), argumentCaptor.capture())).thenAnswer((Answer<CompletionStage<String>>) invocation -> {
        InputStream inputStream = invocation.getArgumentAt(0, InputStream.class);
        IOUtils.copy(inputStream, NullOutputStream.NULL_OUTPUT_STREAM);
        return CompletableFuture.completedFuture("blobId");
    });
    BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
    CompletionStage<FileIndex> fileIndexFuture = blobStoreUtil.putFile(path.toFile(), snapshotMetadata);
    FileIndex fileIndex = null;
    try {
        // should be already complete. if not, future composition in putFile is broken.
        fileIndex = fileIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
    } catch (TimeoutException e) {
        fail("Future returned from putFile should be already complete.");
    }
    // Assert
    Metadata metadata = (Metadata) argumentCaptor.getValue();
    assertEquals(path.toAbsolutePath().toString(), metadata.getPayloadPath());
    assertEquals(path.toFile().length(), Long.valueOf(metadata.getPayloadSize()).longValue());
    assertEquals(expectedChecksum, fileIndex.getChecksum());
}
Also used : Path(java.nio.file.Path) SortedSet(java.util.SortedSet) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) FileTime(java.nio.file.attribute.FileTime) TimeoutException(java.util.concurrent.TimeoutException) Random(java.util.Random) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) FileUtil(org.apache.samza.util.FileUtil) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Path(java.nio.file.Path) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableSet(com.google.common.collect.ImmutableSet) PosixFileAttributes(java.nio.file.attribute.PosixFileAttributes) ImmutableMap(com.google.common.collect.ImmutableMap) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) CheckpointId(org.apache.samza.checkpoint.CheckpointId) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Optional(java.util.Optional) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) Matchers(org.mockito.Matchers) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Answer(org.mockito.stubbing.Answer) PosixFilePermissions(java.nio.file.attribute.PosixFilePermissions) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) OutputStream(java.io.OutputStream) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Ignore(org.junit.Ignore) Paths(java.nio.file.Paths) NullOutputStream(org.apache.commons.io.output.NullOutputStream) CRC32(java.util.zip.CRC32) Assert(org.junit.Assert) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) InputStream(java.io.InputStream) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) FileUtil(org.apache.samza.util.FileUtil) CompletionStage(java.util.concurrent.CompletionStage) TimeoutException(java.util.concurrent.TimeoutException) Test(org.junit.Test)

Example 12 with Metadata

use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.

the class TestBlobStoreUtil method testPutDirFailsIfAnySubDirFileUploadFails.

@Test
public void testPutDirFailsIfAnySubDirFileUploadFails() throws IOException, TimeoutException, InterruptedException {
    BlobStoreManager blobStoreManager = mock(BlobStoreManager.class);
    // File, dir and recursive dir added, retained and removed in local
    String local = "[a/1, b/2]";
    String remote = "[]";
    // Set up environment
    Path localSnapshotDir = BlobStoreTestUtil.createLocalDir(local);
    String basePath = localSnapshotDir.toAbsolutePath().toString();
    DirIndex remoteSnapshotDir = BlobStoreTestUtil.createDirIndex(remote);
    SnapshotMetadata snapshotMetadata = new SnapshotMetadata(checkpointId, jobName, jobId, taskName, storeName);
    DirDiff dirDiff = DirDiffUtil.getDirDiff(localSnapshotDir.toFile(), remoteSnapshotDir, (localFile, remoteFile) -> localFile.getName().equals(remoteFile.getFileName()));
    // Set up mocks
    SamzaException exception = new SamzaException("Error uploading file");
    CompletableFuture<String> failedFuture = new CompletableFuture<>();
    failedFuture.completeExceptionally(exception);
    when(blobStoreManager.put(any(InputStream.class), any(Metadata.class))).thenAnswer((Answer<CompletableFuture<String>>) invocation -> {
        Metadata metadata = invocation.getArgumentAt(1, Metadata.class);
        String path = metadata.getPayloadPath();
        if (path.endsWith("1")) {
            return CompletableFuture.completedFuture("a1BlobId");
        } else {
            return failedFuture;
        }
    });
    // Execute
    BlobStoreUtil blobStoreUtil = new BlobStoreUtil(blobStoreManager, EXECUTOR, null, null);
    CompletionStage<DirIndex> dirIndexFuture = blobStoreUtil.putDir(dirDiff, snapshotMetadata);
    try {
        // should be already complete. if not, future composition in putDir is broken.
        dirIndexFuture.toCompletableFuture().get(0, TimeUnit.MILLISECONDS);
    } catch (ExecutionException e) {
        Throwable cause = e.getCause();
        // Assert that the result future fails and that the cause is propagated correctly
        assertEquals(exception, cause);
        return;
    }
    fail("DirIndex future should have been completed with an exception");
}
Also used : Path(java.nio.file.Path) SortedSet(java.util.SortedSet) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) FileTime(java.nio.file.attribute.FileTime) TimeoutException(java.util.concurrent.TimeoutException) Random(java.util.Random) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) FileUtil(org.apache.samza.util.FileUtil) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Path(java.nio.file.Path) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableSet(com.google.common.collect.ImmutableSet) PosixFileAttributes(java.nio.file.attribute.PosixFileAttributes) ImmutableMap(com.google.common.collect.ImmutableMap) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) CheckpointId(org.apache.samza.checkpoint.CheckpointId) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Optional(java.util.Optional) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) Matchers(org.mockito.Matchers) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Answer(org.mockito.stubbing.Answer) PosixFilePermissions(java.nio.file.attribute.PosixFilePermissions) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) OutputStream(java.io.OutputStream) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Ignore(org.junit.Ignore) Paths(java.nio.file.Paths) NullOutputStream(org.apache.commons.io.output.NullOutputStream) CRC32(java.util.zip.CRC32) Assert(org.junit.Assert) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) InputStream(java.io.InputStream) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) SamzaException(org.apache.samza.SamzaException) CompletableFuture(java.util.concurrent.CompletableFuture) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) ExecutionException(java.util.concurrent.ExecutionException) Test(org.junit.Test)

Example 13 with Metadata

use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.

the class TestBlobStoreUtil method testRestoreDirCreatesCorrectDirectoryStructure.

@Test
public void testRestoreDirCreatesCorrectDirectoryStructure() throws IOException {
    String prevSnapshotFiles = "[a, b, z/1, y/1, p/m/1, q/n/1]";
    DirIndex dirIndex = BlobStoreTestUtil.createDirIndex(prevSnapshotFiles);
    BlobStoreManager mockBlobStoreManager = mock(BlobStoreManager.class);
    when(mockBlobStoreManager.get(anyString(), any(OutputStream.class), any(Metadata.class))).thenAnswer((Answer<CompletionStage<Void>>) invocationOnMock -> {
        String blobId = invocationOnMock.getArgumentAt(0, String.class);
        OutputStream outputStream = invocationOnMock.getArgumentAt(1, OutputStream.class);
        outputStream.write(blobId.getBytes());
        return CompletableFuture.completedFuture(null);
    });
    Path restoreDirBasePath = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
    BlobStoreUtil blobStoreUtil = new BlobStoreUtil(mockBlobStoreManager, EXECUTOR, null, null);
    blobStoreUtil.restoreDir(restoreDirBasePath.toFile(), dirIndex, metadata).join();
    assertTrue(new DirDiffUtil().areSameDir(Collections.emptySet(), false).test(restoreDirBasePath.toFile(), dirIndex));
}
Also used : SortedSet(java.util.SortedSet) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) FileTime(java.nio.file.attribute.FileTime) TimeoutException(java.util.concurrent.TimeoutException) Random(java.util.Random) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) FileUtil(org.apache.samza.util.FileUtil) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) Path(java.nio.file.Path) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableSet(com.google.common.collect.ImmutableSet) PosixFileAttributes(java.nio.file.attribute.PosixFileAttributes) ImmutableMap(com.google.common.collect.ImmutableMap) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) CheckpointId(org.apache.samza.checkpoint.CheckpointId) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) Optional(java.util.Optional) RandomStringUtils(org.apache.commons.lang3.RandomStringUtils) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) Matchers(org.mockito.Matchers) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Answer(org.mockito.stubbing.Answer) PosixFilePermissions(java.nio.file.attribute.PosixFilePermissions) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) OutputStream(java.io.OutputStream) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Mockito(org.mockito.Mockito) Ignore(org.junit.Ignore) Paths(java.nio.file.Paths) NullOutputStream(org.apache.commons.io.output.NullOutputStream) CRC32(java.util.zip.CRC32) Assert(org.junit.Assert) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) Path(java.nio.file.Path) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) NullOutputStream(org.apache.commons.io.output.NullOutputStream) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) CompletionStage(java.util.concurrent.CompletionStage) Test(org.junit.Test)

Example 14 with Metadata

use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.

the class BlobStoreUtil method getStoreSnapshotIndexes.

/**
 * Get the blob id of {@link SnapshotIndex} and {@link SnapshotIndex}es for the provided {@code task}
 * in the provided {@code checkpoint}.
 * @param jobName job name is used to build request metadata
 * @param jobId job id is used to build request metadata
 * @param taskName task name to get the store state checkpoint markers and snapshot indexes for
 * @param checkpoint {@link Checkpoint} instance to get the store state checkpoint markers from. Only
 *                   {@link CheckpointV2} and newer are supported for blob stores.
 * @param storesToBackupOrRestore set of store names to be backed up or restored
 * @return Map of store name to its blob id of snapshot indices and their corresponding snapshot indices for the task.
 */
public Map<String, Pair<String, SnapshotIndex>> getStoreSnapshotIndexes(String jobName, String jobId, String taskName, Checkpoint checkpoint, Set<String> storesToBackupOrRestore) {
    // TODO MED shesharma document error handling (checkpoint ver, blob not found, getBlob)
    if (checkpoint == null) {
        LOG.debug("No previous checkpoint found for taskName: {}", taskName);
        return ImmutableMap.of();
    }
    if (checkpoint.getVersion() == 1) {
        LOG.warn("Checkpoint version 1 is not supported for blob store backup and restore.");
        return ImmutableMap.of();
    }
    Map<String, CompletableFuture<Pair<String, SnapshotIndex>>> storeSnapshotIndexFutures = new HashMap<>();
    CheckpointV2 checkpointV2 = (CheckpointV2) checkpoint;
    Map<String, Map<String, String>> factoryToStoreSCMs = checkpointV2.getStateCheckpointMarkers();
    Map<String, String> storeSnapshotIndexBlobIds = factoryToStoreSCMs.get(BlobStoreStateBackendFactory.class.getName());
    if (storeSnapshotIndexBlobIds != null) {
        storeSnapshotIndexBlobIds.forEach((storeName, snapshotIndexBlobId) -> {
            if (storesToBackupOrRestore.contains(storeName)) {
                try {
                    LOG.debug("Getting snapshot index for taskName: {} store: {} blobId: {}", taskName, storeName, snapshotIndexBlobId);
                    Metadata requestMetadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.empty(), jobName, jobId, taskName, storeName);
                    CompletableFuture<SnapshotIndex> snapshotIndexFuture = getSnapshotIndex(snapshotIndexBlobId, requestMetadata).toCompletableFuture();
                    Pair<CompletableFuture<String>, CompletableFuture<SnapshotIndex>> pairOfFutures = Pair.of(CompletableFuture.completedFuture(snapshotIndexBlobId), snapshotIndexFuture);
                    // save the future and block once in the end instead of blocking for each request.
                    storeSnapshotIndexFutures.put(storeName, FutureUtil.toFutureOfPair(pairOfFutures));
                } catch (Exception e) {
                    throw new SamzaException(String.format("Error getting SnapshotIndex for blobId: %s for taskName: %s store: %s", snapshotIndexBlobId, taskName, storeName), e);
                }
            } else {
                LOG.debug("SnapshotIndex blob id {} for store {} is not present in the set of stores to be backed up/restores: {}", snapshotIndexBlobId, storeName, storesToBackupOrRestore);
            }
        });
    } else {
        LOG.debug("No store SCMs found for blob store state backend in for taskName: {} in checkpoint {}", taskName, checkpointV2.getCheckpointId());
    }
    try {
        return FutureUtil.toFutureOfMap(t -> {
            Throwable unwrappedException = FutureUtil.unwrapExceptions(CompletionException.class, t);
            if (unwrappedException instanceof DeletedException) {
                LOG.warn("Ignoring already deleted snapshot index for taskName: {}", taskName, t);
                return true;
            } else {
                return false;
            }
        }, storeSnapshotIndexFutures).join();
    } catch (Exception e) {
        throw new SamzaException(String.format("Error while waiting to get store snapshot indexes for task %s", taskName), e);
    }
}
Also used : CheckedInputStream(java.util.zip.CheckedInputStream) BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) LoggerFactory(org.slf4j.LoggerFactory) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) StringUtils(org.apache.commons.lang3.StringUtils) SnapshotIndexSerde(org.apache.samza.storage.blobstore.index.serde.SnapshotIndexSerde) ByteArrayInputStream(java.io.ByteArrayInputStream) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Collectors(java.util.stream.Collectors) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) BlobStoreBackupManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreBackupManagerMetrics) Optional(java.util.Optional) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Logger(org.slf4j.Logger) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) Paths(java.nio.file.Paths) CRC32(java.util.zip.CRC32) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) HashMap(java.util.HashMap) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) Metadata(org.apache.samza.storage.blobstore.Metadata) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) SamzaException(org.apache.samza.SamzaException) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) CompletionException(java.util.concurrent.CompletionException) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) CompletableFuture(java.util.concurrent.CompletableFuture) CompletionException(java.util.concurrent.CompletionException) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 15 with Metadata

use of org.apache.samza.storage.blobstore.Metadata in project samza by apache.

the class BlobStoreUtil method getFile.

/**
 * Gets a file from the blob store.
 * @param fileBlobs List of {@link FileBlob}s that constitute this file.
 * @param fileToRestore File pointing to the local path where the file will be restored.
 * @param requestMetadata {@link Metadata} associated with this request
 * @return a future that completes when the file is downloaded and written or if an exception occurs.
 */
@VisibleForTesting
CompletableFuture<Void> getFile(List<FileBlob> fileBlobs, File fileToRestore, Metadata requestMetadata) {
    FileOutputStream outputStream = null;
    try {
        long restoreFileStartTime = System.nanoTime();
        if (fileToRestore.exists()) {
            // delete the file if it already exists, e.g. from a previous retry.
            Files.delete(fileToRestore.toPath());
        }
        outputStream = new FileOutputStream(fileToRestore);
        final FileOutputStream finalOutputStream = outputStream;
        // TODO HIGH shesharm add integration tests to ensure empty files and directories are handled correctly E2E.
        // create file for 0 byte files (fileIndex entry but no fileBlobs).
        fileToRestore.createNewFile();
        // create a copy to ensure list being sorted is mutable.
        List<FileBlob> fileBlobsCopy = new ArrayList<>(fileBlobs);
        // sort by offset.
        fileBlobsCopy.sort(Comparator.comparingInt(FileBlob::getOffset));
        // chain the futures such that write to file for blobs is sequential.
        // can be optimized to write concurrently to the file later.
        CompletableFuture<Void> resultFuture = CompletableFuture.completedFuture(null);
        for (FileBlob fileBlob : fileBlobsCopy) {
            resultFuture = resultFuture.thenComposeAsync(v -> {
                LOG.debug("Starting restore for file: {} with blob id: {} at offset: {}", fileToRestore, fileBlob.getBlobId(), fileBlob.getOffset());
                return blobStoreManager.get(fileBlob.getBlobId(), finalOutputStream, requestMetadata);
            }, executor);
        }
        resultFuture = resultFuture.thenRunAsync(() -> {
            LOG.debug("Finished restore for file: {}. Closing output stream.", fileToRestore);
            try {
                // flush the file contents to disk
                finalOutputStream.getFD().sync();
                finalOutputStream.close();
            } catch (Exception e) {
                throw new SamzaException(String.format("Error closing output stream for file: %s", fileToRestore.getAbsolutePath()), e);
            }
        }, executor);
        resultFuture.whenComplete((res, ex) -> {
            if (restoreMetrics != null) {
                restoreMetrics.avgFileRestoreNs.update(System.nanoTime() - restoreFileStartTime);
                long fileSize = requestMetadata.getPayloadSize();
                restoreMetrics.restoreRate.inc(fileSize);
                restoreMetrics.filesRestored.getValue().addAndGet(1);
                restoreMetrics.bytesRestored.getValue().addAndGet(fileSize);
                restoreMetrics.filesRemaining.getValue().addAndGet(-1);
                restoreMetrics.bytesRemaining.getValue().addAndGet(-1 * fileSize);
            }
        });
        return resultFuture;
    } catch (Exception exception) {
        try {
            if (outputStream != null) {
                outputStream.close();
            }
        } catch (Exception err) {
            LOG.error("Error closing output stream for file: {}", fileToRestore.getAbsolutePath(), err);
        }
        throw new SamzaException(String.format("Error restoring file: %s in path: %s", fileToRestore.getName(), requestMetadata.getPayloadPath()), exception);
    }
}
Also used : CheckedInputStream(java.util.zip.CheckedInputStream) BlobStoreRestoreManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreRestoreManagerMetrics) FileMetadata(org.apache.samza.storage.blobstore.index.FileMetadata) LoggerFactory(org.slf4j.LoggerFactory) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) StringUtils(org.apache.commons.lang3.StringUtils) SnapshotIndexSerde(org.apache.samza.storage.blobstore.index.serde.SnapshotIndexSerde) ByteArrayInputStream(java.io.ByteArrayInputStream) Pair(org.apache.commons.lang3.tuple.Pair) Map(java.util.Map) FutureUtil(org.apache.samza.util.FutureUtil) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) Set(java.util.Set) CompletionException(java.util.concurrent.CompletionException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Collectors(java.util.stream.Collectors) DirDiff(org.apache.samza.storage.blobstore.diff.DirDiff) List(java.util.List) CompletionStage(java.util.concurrent.CompletionStage) SnapshotIndex(org.apache.samza.storage.blobstore.index.SnapshotIndex) BlobStoreBackupManagerMetrics(org.apache.samza.storage.blobstore.metrics.BlobStoreBackupManagerMetrics) Optional(java.util.Optional) SnapshotMetadata(org.apache.samza.storage.blobstore.index.SnapshotMetadata) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DirIndex(org.apache.samza.storage.blobstore.index.DirIndex) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) BlobStoreManager(org.apache.samza.storage.blobstore.BlobStoreManager) BlobStoreStateBackendFactory(org.apache.samza.storage.blobstore.BlobStoreStateBackendFactory) ExecutorService(java.util.concurrent.ExecutorService) FileIndex(org.apache.samza.storage.blobstore.index.FileIndex) Logger(org.slf4j.Logger) Files(java.nio.file.Files) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Metadata(org.apache.samza.storage.blobstore.Metadata) File(java.io.File) SamzaException(org.apache.samza.SamzaException) Paths(java.nio.file.Paths) CRC32(java.util.zip.CRC32) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) InputStream(java.io.InputStream) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) FileBlob(org.apache.samza.storage.blobstore.index.FileBlob) FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) SamzaException(org.apache.samza.SamzaException) RetriableException(org.apache.samza.storage.blobstore.exceptions.RetriableException) CompletionException(java.util.concurrent.CompletionException) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) DeletedException(org.apache.samza.storage.blobstore.exceptions.DeletedException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

Metadata (org.apache.samza.storage.blobstore.Metadata)20 FileMetadata (org.apache.samza.storage.blobstore.index.FileMetadata)20 SnapshotMetadata (org.apache.samza.storage.blobstore.index.SnapshotMetadata)20 ArrayList (java.util.ArrayList)19 CompletableFuture (java.util.concurrent.CompletableFuture)19 DirIndex (org.apache.samza.storage.blobstore.index.DirIndex)19 FileIndex (org.apache.samza.storage.blobstore.index.FileIndex)19 CompletionStage (java.util.concurrent.CompletionStage)18 FileBlob (org.apache.samza.storage.blobstore.index.FileBlob)17 File (java.io.File)16 IOException (java.io.IOException)16 InputStream (java.io.InputStream)16 SamzaException (org.apache.samza.SamzaException)16 ImmutableMap (com.google.common.collect.ImmutableMap)15 FileOutputStream (java.io.FileOutputStream)15 Files (java.nio.file.Files)15 Paths (java.nio.file.Paths)15 Collections (java.util.Collections)15 HashMap (java.util.HashMap)15 List (java.util.List)15