use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.
the class TransactionalStateTaskRestoreManager method getCheckpointedChangelogOffsets.
private Map<String, KafkaStateCheckpointMarker> getCheckpointedChangelogOffsets(Checkpoint checkpoint) {
Map<String, KafkaStateCheckpointMarker> checkpointedChangelogOffsets = new HashMap<>();
if (checkpoint == null)
return checkpointedChangelogOffsets;
if (checkpoint instanceof CheckpointV2) {
Map<String, Map<String, String>> factoryStoreSCMs = ((CheckpointV2) checkpoint).getStateCheckpointMarkers();
if (factoryStoreSCMs.containsKey(KafkaStateCheckpointMarker.KAFKA_STATE_BACKEND_FACTORY_NAME)) {
factoryStoreSCMs.get(KafkaStateCheckpointMarker.KAFKA_STATE_BACKEND_FACTORY_NAME).forEach((storeName, scmString) -> {
KafkaStateCheckpointMarker kafkaSCM = KafkaStateCheckpointMarker.deserialize(scmString);
checkpointedChangelogOffsets.put(storeName, kafkaSCM);
});
}
// skip the non-KafkaStateCheckpointMarkers
} else if (checkpoint instanceof CheckpointV1) {
// If the checkpoint v1 is used, we need to fetch the changelog SSPs in the inputOffsets in order to get the
// store offset.
Map<SystemStreamPartition, String> checkpointedOffsets = checkpoint.getOffsets();
storeChangelogs.forEach((storeName, systemStream) -> {
Partition changelogPartition = taskModel.getChangelogPartition();
SystemStreamPartition storeChangelogSSP = new SystemStreamPartition(systemStream, changelogPartition);
String checkpointedOffset = checkpointedOffsets.get(storeChangelogSSP);
if (StringUtils.isNotBlank(checkpointedOffset)) {
KafkaChangelogSSPOffset kafkaChangelogSSPOffset = KafkaChangelogSSPOffset.fromString(checkpointedOffset);
KafkaStateCheckpointMarker marker = new KafkaStateCheckpointMarker(storeChangelogSSP, kafkaChangelogSSPOffset.getChangelogOffset());
checkpointedChangelogOffsets.put(storeName, marker);
}
});
} else {
throw new SamzaException("Unsupported checkpoint version: " + checkpoint.getVersion());
}
return checkpointedChangelogOffsets;
}
use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.
the class TaskStorageCommitManager method writeCheckpointToStoreDirectories.
/**
* Writes the {@link Checkpoint} information returned by {@link #upload(CheckpointId, Map)}
* in each store directory and store checkpoint directory. Written content depends on the type of {@code checkpoint}.
* For {@link CheckpointV2}, writes the entire task {@link CheckpointV2}.
* For {@link CheckpointV1}, only writes the changelog ssp offsets in the OFFSET* files.
*
* Note: The assumption is that this method will be invoked once for each {@link Checkpoint} version that the
* task needs to write as determined by {@link org.apache.samza.config.TaskConfig#getCheckpointWriteVersions()}.
* This is required for upgrade and rollback compatibility.
*
* @param checkpoint the latest checkpoint to be persisted to local file system
*/
public void writeCheckpointToStoreDirectories(Checkpoint checkpoint) {
if (checkpoint instanceof CheckpointV1) {
LOG.debug("Writing CheckpointV1 to store and checkpoint directories for taskName: {} with checkpoint: {}", taskName, checkpoint);
// Write CheckpointV1 changelog offsets to store and checkpoint directories
writeChangelogOffsetFiles(checkpoint.getOffsets());
} else if (checkpoint instanceof CheckpointV2) {
LOG.debug("Writing CheckpointV2 to store and checkpoint directories for taskName: {} with checkpoint: {}", taskName, checkpoint);
storageEngines.forEach((storeName, storageEngine) -> {
// Only write the checkpoint file if the store is durable and persisted to disk
if (storageEngine.getStoreProperties().isDurableStore() && storageEngine.getStoreProperties().isPersistedToDisk()) {
CheckpointV2 checkpointV2 = (CheckpointV2) checkpoint;
try {
File storeDir = storageManagerUtil.getTaskStoreDir(durableStoreBaseDir, storeName, taskName, TaskMode.Active);
storageManagerUtil.writeCheckpointV2File(storeDir, checkpointV2);
CheckpointId checkpointId = checkpointV2.getCheckpointId();
File checkpointDir = Paths.get(storageManagerUtil.getStoreCheckpointDir(storeDir, checkpointId)).toFile();
storageManagerUtil.writeCheckpointV2File(checkpointDir, checkpointV2);
} catch (Exception e) {
throw new SamzaException(String.format("Write checkpoint file failed for task: %s, storeName: %s, checkpointId: %s", taskName, storeName, ((CheckpointV2) checkpoint).getCheckpointId()), e);
}
}
});
} else {
throw new SamzaException("Unsupported checkpoint version: " + checkpoint.getVersion());
}
}
use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.
the class BlobStoreUtil method getStoreSnapshotIndexes.
/**
* Get the blob id of {@link SnapshotIndex} and {@link SnapshotIndex}es for the provided {@code task}
* in the provided {@code checkpoint}.
* @param jobName job name is used to build request metadata
* @param jobId job id is used to build request metadata
* @param taskName task name to get the store state checkpoint markers and snapshot indexes for
* @param checkpoint {@link Checkpoint} instance to get the store state checkpoint markers from. Only
* {@link CheckpointV2} and newer are supported for blob stores.
* @param storesToBackupOrRestore set of store names to be backed up or restored
* @return Map of store name to its blob id of snapshot indices and their corresponding snapshot indices for the task.
*/
public Map<String, Pair<String, SnapshotIndex>> getStoreSnapshotIndexes(String jobName, String jobId, String taskName, Checkpoint checkpoint, Set<String> storesToBackupOrRestore) {
// TODO MED shesharma document error handling (checkpoint ver, blob not found, getBlob)
if (checkpoint == null) {
LOG.debug("No previous checkpoint found for taskName: {}", taskName);
return ImmutableMap.of();
}
if (checkpoint.getVersion() == 1) {
LOG.warn("Checkpoint version 1 is not supported for blob store backup and restore.");
return ImmutableMap.of();
}
Map<String, CompletableFuture<Pair<String, SnapshotIndex>>> storeSnapshotIndexFutures = new HashMap<>();
CheckpointV2 checkpointV2 = (CheckpointV2) checkpoint;
Map<String, Map<String, String>> factoryToStoreSCMs = checkpointV2.getStateCheckpointMarkers();
Map<String, String> storeSnapshotIndexBlobIds = factoryToStoreSCMs.get(BlobStoreStateBackendFactory.class.getName());
if (storeSnapshotIndexBlobIds != null) {
storeSnapshotIndexBlobIds.forEach((storeName, snapshotIndexBlobId) -> {
if (storesToBackupOrRestore.contains(storeName)) {
try {
LOG.debug("Getting snapshot index for taskName: {} store: {} blobId: {}", taskName, storeName, snapshotIndexBlobId);
Metadata requestMetadata = new Metadata(Metadata.SNAPSHOT_INDEX_PAYLOAD_PATH, Optional.empty(), jobName, jobId, taskName, storeName);
CompletableFuture<SnapshotIndex> snapshotIndexFuture = getSnapshotIndex(snapshotIndexBlobId, requestMetadata).toCompletableFuture();
Pair<CompletableFuture<String>, CompletableFuture<SnapshotIndex>> pairOfFutures = Pair.of(CompletableFuture.completedFuture(snapshotIndexBlobId), snapshotIndexFuture);
// save the future and block once in the end instead of blocking for each request.
storeSnapshotIndexFutures.put(storeName, FutureUtil.toFutureOfPair(pairOfFutures));
} catch (Exception e) {
throw new SamzaException(String.format("Error getting SnapshotIndex for blobId: %s for taskName: %s store: %s", snapshotIndexBlobId, taskName, storeName), e);
}
} else {
LOG.debug("SnapshotIndex blob id {} for store {} is not present in the set of stores to be backed up/restores: {}", snapshotIndexBlobId, storeName, storesToBackupOrRestore);
}
});
} else {
LOG.debug("No store SCMs found for blob store state backend in for taskName: {} in checkpoint {}", taskName, checkpointV2.getCheckpointId());
}
try {
return FutureUtil.toFutureOfMap(t -> {
Throwable unwrappedException = FutureUtil.unwrapExceptions(CompletionException.class, t);
if (unwrappedException instanceof DeletedException) {
LOG.warn("Ignoring already deleted snapshot index for taskName: {}", taskName, t);
return true;
} else {
return false;
}
}, storeSnapshotIndexFutures).join();
} catch (Exception e) {
throw new SamzaException(String.format("Error while waiting to get store snapshot indexes for task %s", taskName), e);
}
}
use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.
the class TestBlobStoreUtil method testGetSSIReturnsCorrectSCMSnapshotIndexPair.
@Test
public void testGetSSIReturnsCorrectSCMSnapshotIndexPair() {
String storeName = "storeName";
String otherStoreName = "otherStoreName";
Set<String> storesToBackupOrRestore = ImmutableSet.of(storeName, otherStoreName);
String storeSnapshotIndexBlobId = "snapshotIndexBlobId";
String otherStoreSnapshotIndexBlobId = "otherSnapshotIndexBlobId";
SnapshotIndex mockStoreSnapshotIndex = mock(SnapshotIndex.class);
SnapshotIndex mockOtherStooreSnapshotIndex = mock(SnapshotIndex.class);
CheckpointV2 checkpoint = createCheckpointV2(BlobStoreStateBackendFactory.class.getName(), ImmutableMap.of(storeName, storeSnapshotIndexBlobId, otherStoreName, otherStoreSnapshotIndexBlobId));
BlobStoreUtil mockBlobStoreUtil = mock(BlobStoreUtil.class);
when(mockBlobStoreUtil.getSnapshotIndex(eq(storeSnapshotIndexBlobId), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(mockStoreSnapshotIndex));
when(mockBlobStoreUtil.getSnapshotIndex(eq(otherStoreSnapshotIndexBlobId), any(Metadata.class))).thenReturn(CompletableFuture.completedFuture(mockOtherStooreSnapshotIndex));
when(mockBlobStoreUtil.getStoreSnapshotIndexes(anyString(), anyString(), anyString(), any(Checkpoint.class), anySetOf(String.class))).thenCallRealMethod();
Map<String, Pair<String, SnapshotIndex>> snapshotIndexes = mockBlobStoreUtil.getStoreSnapshotIndexes("testJobName", "testJobId", "taskName", checkpoint, storesToBackupOrRestore);
assertEquals(storeSnapshotIndexBlobId, snapshotIndexes.get(storeName).getKey());
assertEquals(mockStoreSnapshotIndex, snapshotIndexes.get(storeName).getValue());
assertEquals(otherStoreSnapshotIndexBlobId, snapshotIndexes.get(otherStoreName).getKey());
assertEquals(mockOtherStooreSnapshotIndex, snapshotIndexes.get(otherStoreName).getValue());
verify(mockBlobStoreUtil, times(2)).getSnapshotIndex(anyString(), any(Metadata.class));
}
use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.
the class TestBlobStoreUtil method testGetSSIReturnsEmptyMapIfNoEntryForBlobStoreBackendFactory.
@Test
public void testGetSSIReturnsEmptyMapIfNoEntryForBlobStoreBackendFactory() {
CheckpointV2 mockCheckpoint = mock(CheckpointV2.class);
when(mockCheckpoint.getVersion()).thenReturn((short) 2);
when(mockCheckpoint.getStateCheckpointMarkers()).thenReturn(ImmutableMap.of("com.OtherStateBackendFactory", ImmutableMap.of("storeName", "otherSCM")));
BlobStoreUtil blobStoreUtil = new BlobStoreUtil(mock(BlobStoreManager.class), MoreExecutors.newDirectExecutorService(), null, null);
Map<String, Pair<String, SnapshotIndex>> snapshotIndexes = blobStoreUtil.getStoreSnapshotIndexes("testJobName", "testJobId", "taskName", mockCheckpoint, new HashSet<>());
assertTrue(snapshotIndexes.isEmpty());
}
Aggregations