use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class BlobStoreUtil method putFile.
/**
* Upload a File to blob store.
* @param file File to upload to blob store.
* @return A future containing the {@link FileIndex} for the uploaded file.
*/
@VisibleForTesting
public CompletableFuture<FileIndex> putFile(File file, SnapshotMetadata snapshotMetadata) {
if (file == null || !file.isFile()) {
String message = file != null ? "Dir or Symbolic link" : "null";
throw new SamzaException(String.format("Required a non-null parameter of type file, provided: %s", message));
}
long putFileStartTime = System.nanoTime();
String opName = "putFile: " + file.getAbsolutePath();
Supplier<CompletionStage<FileIndex>> fileUploadAction = () -> {
LOG.debug("Putting file: {} to blob store.", file.getPath());
CompletableFuture<FileIndex> fileBlobFuture;
CheckedInputStream inputStream = null;
try {
// TODO HIGH shesharm maybe use the more efficient CRC32C / PureJavaCRC32 impl
inputStream = new CheckedInputStream(new FileInputStream(file), new CRC32());
CheckedInputStream finalInputStream = inputStream;
FileMetadata fileMetadata = FileMetadata.fromFile(file);
if (backupMetrics != null) {
backupMetrics.avgFileSizeBytes.update(fileMetadata.getSize());
}
Metadata metadata = new Metadata(file.getAbsolutePath(), Optional.of(fileMetadata.getSize()), snapshotMetadata.getJobName(), snapshotMetadata.getJobId(), snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName());
fileBlobFuture = blobStoreManager.put(inputStream, metadata).thenApplyAsync(id -> {
LOG.trace("Put complete. Received Blob ID {}. Closing input stream for file: {}.", id, file.getPath());
try {
finalInputStream.close();
} catch (Exception e) {
throw new SamzaException(String.format("Error closing input stream for file: %s", file.getAbsolutePath()), e);
}
LOG.trace("Returning new FileIndex for file: {}.", file.getPath());
return new FileIndex(file.getName(), Collections.singletonList(new FileBlob(id, 0)), fileMetadata, finalInputStream.getChecksum().getValue());
}, executor).toCompletableFuture();
} catch (Exception e) {
try {
if (inputStream != null) {
inputStream.close();
}
} catch (Exception err) {
LOG.error("Error closing input stream for file: {}", file.getName(), err);
}
LOG.error("Error putting file: {}", file.getName(), e);
throw new SamzaException(String.format("Error putting file %s", file.getAbsolutePath()), e);
}
return fileBlobFuture;
};
return FutureUtil.executeAsyncWithRetries(opName, fileUploadAction, isCauseNonRetriable(), executor).whenComplete((res, ex) -> {
if (backupMetrics != null) {
backupMetrics.avgFileUploadNs.update(System.nanoTime() - putFileStartTime);
long fileSize = file.length();
backupMetrics.uploadRate.inc(fileSize);
backupMetrics.filesUploaded.getValue().addAndGet(1);
backupMetrics.bytesUploaded.getValue().addAndGet(fileSize);
backupMetrics.filesRemaining.getValue().addAndGet(-1);
backupMetrics.bytesRemaining.getValue().addAndGet(-1 * fileSize);
}
});
}
use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class BlobStoreUtil method putSnapshotIndex.
/**
* PUTs the {@link SnapshotIndex} to the blob store.
* @param snapshotIndex SnapshotIndex to put.
* @return a Future containing the blob ID of the {@link SnapshotIndex}.
*/
public CompletableFuture<String> putSnapshotIndex(SnapshotIndex snapshotIndex) {
byte[] bytes = snapshotIndexSerde.toBytes(snapshotIndex);
String opName = "putSnapshotIndex for checkpointId: " + snapshotIndex.getSnapshotMetadata().getCheckpointId();
return FutureUtil.executeAsyncWithRetries(opName, () -> {
// no need to close ByteArrayInputStream
InputStream inputStream = new ByteArrayInputStream(bytes);
SnapshotMetadata snapshotMetadata = snapshotIndex.getSnapshotMetadata();
Metadata metadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.of((long) bytes.length), snapshotMetadata.getJobName(), snapshotMetadata.getJobId(), snapshotMetadata.getTaskName(), snapshotMetadata.getStoreName());
return blobStoreManager.put(inputStream, metadata).toCompletableFuture();
}, isCauseNonRetriable(), executor);
}
use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class BlobStoreUtil method removeTTL.
/**
* Marks all the blobs associated with an {@link SnapshotIndex} to never expire.
* @param snapshotIndex {@link SnapshotIndex} of the remote snapshot
* @param metadata {@link Metadata} related to the request
* @return A future that completes when all the files and subdirs associated with this remote snapshot are marked to
* never expire.
*/
public CompletionStage<Void> removeTTL(String indexBlobId, SnapshotIndex snapshotIndex, Metadata metadata) {
SnapshotMetadata snapshotMetadata = snapshotIndex.getSnapshotMetadata();
LOG.debug("Marking contents of SnapshotIndex: {} to never expire", snapshotMetadata.toString());
String opName = "removeTTL for SnapshotIndex for checkpointId: " + snapshotMetadata.getCheckpointId();
Supplier<CompletionStage<Void>> removeDirIndexTTLAction = () -> removeTTL(snapshotIndex.getDirIndex(), metadata).toCompletableFuture();
CompletableFuture<Void> dirIndexTTLRemovalFuture = FutureUtil.executeAsyncWithRetries(opName, removeDirIndexTTLAction, isCauseNonRetriable(), executor);
return dirIndexTTLRemovalFuture.thenComposeAsync(aVoid -> {
String op2Name = "removeTTL for indexBlobId: " + indexBlobId;
Supplier<CompletionStage<Void>> removeIndexBlobTTLAction = () -> blobStoreManager.removeTTL(indexBlobId, metadata).toCompletableFuture();
return FutureUtil.executeAsyncWithRetries(op2Name, removeIndexBlobTTLAction, isCauseNonRetriable(), executor);
}, executor);
}
use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class TestBlobStoreRestoreManager method testRestoreDeletesCheckpointDirsIfRestoring.
@Test
public void testRestoreDeletesCheckpointDirsIfRestoring() throws IOException {
String jobName = "testJobName";
String jobId = "testJobId";
TaskName taskName = mock(TaskName.class);
BlobStoreRestoreManagerMetrics metrics = new BlobStoreRestoreManagerMetrics(new MetricsRegistryMap());
metrics.initStoreMetrics(ImmutableList.of("storeName"));
Set<String> storesToRestore = ImmutableSet.of("storeName");
SnapshotIndex snapshotIndex = mock(SnapshotIndex.class);
Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = ImmutableMap.of("storeName", Pair.of("blobId", snapshotIndex));
DirIndex dirIndex = BlobStoreTestUtil.createDirIndex("[a]");
when(snapshotIndex.getDirIndex()).thenReturn(dirIndex);
CheckpointId checkpointId = CheckpointId.create();
when(snapshotIndex.getSnapshotMetadata()).thenReturn(new SnapshotMetadata(checkpointId, "jobName", "jobId", "taskName", "storeName"));
Path loggedBaseDir = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
// create store dir to be deleted during restore
Path storeDir = Files.createTempDirectory(loggedBaseDir, "storeDir");
Path storeCheckpointDir1 = Files.createTempDirectory(loggedBaseDir, "storeDir-" + checkpointId);
CheckpointId olderCheckpoint = CheckpointId.create();
Path storeCheckpointDir2 = Files.createTempDirectory(loggedBaseDir, "storeDir-" + olderCheckpoint);
StorageConfig storageConfig = mock(StorageConfig.class);
StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
when(storageManagerUtil.getTaskStoreDir(eq(loggedBaseDir.toFile()), eq("storeName"), eq(taskName), eq(TaskMode.Active))).thenReturn(storeDir.toFile());
when(storageManagerUtil.getStoreCheckpointDir(eq(storeDir.toFile()), eq(checkpointId))).thenReturn(Paths.get(storeDir.toString(), checkpointId.toString()).toString());
when(storageManagerUtil.getTaskStoreCheckpointDirs(any(File.class), anyString(), any(TaskName.class), any(TaskMode.class))).thenReturn(ImmutableList.of(storeCheckpointDir1.toFile(), storeCheckpointDir2.toFile()));
BlobStoreUtil blobStoreUtil = mock(BlobStoreUtil.class);
DirDiffUtil dirDiffUtil = mock(DirDiffUtil.class);
when(dirDiffUtil.areSameDir(anySet(), anyBoolean())).thenReturn((arg1, arg2) -> true);
// return immediately without restoring.
when(blobStoreUtil.restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
BlobStoreRestoreManager.restoreStores(jobName, jobId, taskName, storesToRestore, prevStoreSnapshotIndexes, loggedBaseDir.toFile(), storageConfig, metrics, storageManagerUtil, blobStoreUtil, dirDiffUtil, EXECUTOR);
// verify that the store directory restore was called and skipped (i.e. shouldRestore == true)
verify(blobStoreUtil, times(1)).restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class));
// verify that the checkpoint directories were deleted prior to restore (should not exist at the end)
assertFalse(storeCheckpointDir1.toFile().exists());
assertFalse(storeCheckpointDir2.toFile().exists());
}
use of org.apache.samza.storage.blobstore.index.SnapshotMetadata in project samza by apache.
the class TestBlobStoreRestoreManager method testRestoreDeletesStoreDir.
@Test
public void testRestoreDeletesStoreDir() throws IOException {
String jobName = "testJobName";
String jobId = "testJobId";
TaskName taskName = mock(TaskName.class);
BlobStoreRestoreManagerMetrics metrics = new BlobStoreRestoreManagerMetrics(new MetricsRegistryMap());
metrics.initStoreMetrics(ImmutableList.of("storeName"));
Set<String> storesToRestore = ImmutableSet.of("storeName");
SnapshotIndex snapshotIndex = mock(SnapshotIndex.class);
Map<String, Pair<String, SnapshotIndex>> prevStoreSnapshotIndexes = ImmutableMap.of("storeName", Pair.of("blobId", snapshotIndex));
DirIndex dirIndex = BlobStoreTestUtil.createDirIndex("[a]");
when(snapshotIndex.getDirIndex()).thenReturn(dirIndex);
when(snapshotIndex.getSnapshotMetadata()).thenReturn(new SnapshotMetadata(CheckpointId.create(), "jobName", "jobId", "taskName", "storeName"));
Path loggedBaseDir = Files.createTempDirectory(BlobStoreTestUtil.TEMP_DIR_PREFIX);
// create store dir to be deleted during restore
Path storeDir = Files.createTempDirectory(loggedBaseDir, "storeDir");
StorageConfig storageConfig = mock(StorageConfig.class);
StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
when(storageManagerUtil.getStoreCheckpointDir(any(File.class), any(CheckpointId.class))).thenReturn(Paths.get(storeDir.toString(), "checkpointId").toString());
when(storageManagerUtil.getTaskStoreDir(eq(loggedBaseDir.toFile()), eq("storeName"), eq(taskName), eq(TaskMode.Active))).thenReturn(storeDir.toFile());
BlobStoreUtil blobStoreUtil = mock(BlobStoreUtil.class);
DirDiffUtil dirDiffUtil = mock(DirDiffUtil.class);
// return immediately without restoring.
when(blobStoreUtil.restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(null));
when(dirDiffUtil.areSameDir(anySet(), anyBoolean())).thenReturn((arg1, arg2) -> true);
BlobStoreRestoreManager.restoreStores(jobName, jobId, taskName, storesToRestore, prevStoreSnapshotIndexes, loggedBaseDir.toFile(), storageConfig, metrics, storageManagerUtil, blobStoreUtil, dirDiffUtil, EXECUTOR);
// verify that the store directory restore was called and skipped (i.e. shouldRestore == true)
verify(blobStoreUtil, times(1)).restoreDir(eq(storeDir.toFile()), eq(dirIndex), any(Metadata.class));
// verify that the store directory was deleted prior to restore
// (should still not exist at the end since restore is no-op)
assertFalse(storeDir.toFile().exists());
}
Aggregations