use of org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset in project samza by apache.
the class TestTaskStorageCommitManager method testPersistToFileSystemCheckpointV1AndV2Checkpoint.
@Test
public void testPersistToFileSystemCheckpointV1AndV2Checkpoint() throws IOException {
ContainerStorageManager containerStorageManager = mock(ContainerStorageManager.class);
StorageEngine mockLPStore = mock(StorageEngine.class);
StoreProperties lpStoreProps = mock(StoreProperties.class);
when(mockLPStore.getStoreProperties()).thenReturn(lpStoreProps);
when(lpStoreProps.isPersistedToDisk()).thenReturn(true);
when(lpStoreProps.isDurableStore()).thenReturn(true);
Path mockPath = mock(Path.class);
when(mockLPStore.checkpoint(any())).thenReturn(Optional.of(mockPath));
StorageEngine mockPStore = mock(StorageEngine.class);
StoreProperties pStoreProps = mock(StoreProperties.class);
when(mockPStore.getStoreProperties()).thenReturn(pStoreProps);
when(pStoreProps.isPersistedToDisk()).thenReturn(true);
when(pStoreProps.isDurableStore()).thenReturn(false);
StorageEngine mockLIStore = mock(StorageEngine.class);
StoreProperties liStoreProps = mock(StoreProperties.class);
when(mockLIStore.getStoreProperties()).thenReturn(liStoreProps);
when(liStoreProps.isPersistedToDisk()).thenReturn(false);
when(liStoreProps.isDurableStore()).thenReturn(true);
StorageEngine mockIStore = mock(StorageEngine.class);
StoreProperties iStoreProps = mock(StoreProperties.class);
when(mockIStore.getStoreProperties()).thenReturn(iStoreProps);
when(iStoreProps.isPersistedToDisk()).thenReturn(false);
when(iStoreProps.isDurableStore()).thenReturn(false);
Map<String, StorageEngine> taskStores = ImmutableMap.of("loggedPersistentStore", mockLPStore, "persistentStore", mockPStore, "loggedInMemStore", mockLIStore, "inMemStore", mockIStore);
Partition changelogPartition = new Partition(0);
SystemStream changelogSystemStream = new SystemStream("changelogSystem", "changelogStream");
SystemStreamPartition changelogSSP = new SystemStreamPartition(changelogSystemStream, changelogPartition);
Map<String, SystemStream> storeChangelogsStreams = ImmutableMap.of("loggedPersistentStore", changelogSystemStream, "loggedInMemStore", new SystemStream("system", "stream"));
StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
File durableStoreDir = new File("durableStorePath");
when(storageManagerUtil.getTaskStoreDir(eq(durableStoreDir), any(), any(), any())).thenReturn(durableStoreDir);
TaskName taskName = new TaskName("task");
TaskInstanceMetrics metrics = mock(TaskInstanceMetrics.class);
Timer checkpointTimer = mock(Timer.class);
when(metrics.storeCheckpointNs()).thenReturn(checkpointTimer);
when(containerStorageManager.getAllStores(taskName)).thenReturn(taskStores);
TaskStorageCommitManager commitManager = spy(new TaskStorageCommitManager(taskName, Collections.emptyMap(), containerStorageManager, storeChangelogsStreams, changelogPartition, null, null, ForkJoinPool.commonPool(), storageManagerUtil, durableStoreDir, metrics));
doNothing().when(commitManager).writeChangelogOffsetFile(any(), any(), any(), any());
when(storageManagerUtil.getStoreCheckpointDir(any(File.class), any(CheckpointId.class))).thenAnswer((Answer<String>) invocation -> {
File file = invocation.getArgumentAt(0, File.class);
CheckpointId checkpointId = invocation.getArgumentAt(1, CheckpointId.class);
return file + "-" + checkpointId;
});
CheckpointId newCheckpointId = CheckpointId.create();
String newestOffset = "1";
KafkaChangelogSSPOffset kafkaChangelogSSPOffset = new KafkaChangelogSSPOffset(newCheckpointId, newestOffset);
Map<SystemStreamPartition, String> offsetsJava = ImmutableMap.of(changelogSSP, kafkaChangelogSSPOffset.toString());
commitManager.init();
// invoke persist to file system for v2 checkpoint
commitManager.writeCheckpointToStoreDirectories(new CheckpointV1(offsetsJava));
verify(commitManager).writeChangelogOffsetFiles(offsetsJava);
// evoked twice, for OFFSET-V1 and OFFSET-V2
verify(commitManager).writeChangelogOffsetFile(eq("loggedPersistentStore"), eq(changelogSSP), eq(newestOffset), eq(durableStoreDir));
File checkpointFile = Paths.get(storageManagerUtil.getStoreCheckpointDir(durableStoreDir, kafkaChangelogSSPOffset.getCheckpointId())).toFile();
verify(commitManager).writeChangelogOffsetFile(eq("loggedPersistentStore"), eq(changelogSSP), eq(newestOffset), eq(checkpointFile));
Map<String, String> storeSCM = ImmutableMap.of("loggedPersistentStore", "system;loggedPersistentStoreStream;1", "persistentStore", "system;persistentStoreStream;1", "loggedInMemStore", "system;loggedInMemStoreStream;1", "inMemStore", "system;inMemStoreStream;1");
CheckpointV2 checkpoint = new CheckpointV2(newCheckpointId, Collections.emptyMap(), Collections.singletonMap("factory", storeSCM));
// invoke persist to file system for v2 checkpoint
commitManager.writeCheckpointToStoreDirectories(checkpoint);
// Validate only durable and persisted stores are persisted
// This should be evoked twice, for checkpointV1 and checkpointV2
verify(storageManagerUtil, times(2)).getTaskStoreDir(eq(durableStoreDir), eq("loggedPersistentStore"), eq(taskName), any());
File checkpointPath = Paths.get(storageManagerUtil.getStoreCheckpointDir(durableStoreDir, newCheckpointId)).toFile();
verify(storageManagerUtil).writeCheckpointV2File(eq(checkpointPath), eq(checkpoint));
}
use of org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset in project samza by apache.
the class TestTaskStorageCommitManager method testWriteChangelogOffsetFilesV2andV1.
@Test
public void testWriteChangelogOffsetFilesV2andV1() throws IOException {
Map<String, Map<SystemStreamPartition, String>> mockFileSystem = new HashMap<>();
ContainerStorageManager containerStorageManager = mock(ContainerStorageManager.class);
Map<String, CheckpointV2> mockCheckpointFileSystem = new HashMap<>();
StorageEngine mockLPStore = mock(StorageEngine.class);
StoreProperties lpStoreProps = mock(StoreProperties.class);
when(mockLPStore.getStoreProperties()).thenReturn(lpStoreProps);
when(lpStoreProps.isPersistedToDisk()).thenReturn(true);
when(lpStoreProps.isDurableStore()).thenReturn(true);
Path mockPath = mock(Path.class);
when(mockLPStore.checkpoint(any())).thenReturn(Optional.of(mockPath));
TaskInstanceMetrics metrics = mock(TaskInstanceMetrics.class);
Timer checkpointTimer = mock(Timer.class);
when(metrics.storeCheckpointNs()).thenReturn(checkpointTimer);
java.util.Map<String, StorageEngine> taskStores = ImmutableMap.of("loggedPersistentStore", mockLPStore);
Partition changelogPartition = new Partition(0);
SystemStream changelogSystemStream = new SystemStream("changelogSystem", "changelogStream");
SystemStreamPartition changelogSSP = new SystemStreamPartition(changelogSystemStream, changelogPartition);
java.util.Map<String, SystemStream> storeChangelogsStreams = ImmutableMap.of("loggedPersistentStore", changelogSystemStream);
StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
File tmpTestPath = new File("store-checkpoint-test");
when(storageManagerUtil.getTaskStoreDir(eq(tmpTestPath), eq("loggedPersistentStore"), any(), any())).thenReturn(tmpTestPath);
TaskName taskName = new TaskName("task");
when(containerStorageManager.getAllStores(taskName)).thenReturn(taskStores);
TaskStorageCommitManager commitManager = spy(new TaskStorageCommitManager(taskName, Collections.emptyMap(), containerStorageManager, storeChangelogsStreams, changelogPartition, null, null, ForkJoinPool.commonPool(), storageManagerUtil, tmpTestPath, metrics));
doAnswer(invocation -> {
String fileDir = invocation.getArgumentAt(3, File.class).getName();
SystemStreamPartition ssp = invocation.getArgumentAt(1, SystemStreamPartition.class);
String offset = invocation.getArgumentAt(2, String.class);
if (mockFileSystem.containsKey(fileDir)) {
mockFileSystem.get(fileDir).put(ssp, offset);
} else {
Map<SystemStreamPartition, String> sspOffsets = new HashMap<>();
sspOffsets.put(ssp, offset);
mockFileSystem.put(fileDir, sspOffsets);
}
return null;
}).when(commitManager).writeChangelogOffsetFile(any(), any(), any(), any());
doAnswer(invocation -> {
String storeDir = invocation.getArgumentAt(0, File.class).getName();
CheckpointV2 checkpointV2 = invocation.getArgumentAt(1, CheckpointV2.class);
mockCheckpointFileSystem.put(storeDir, checkpointV2);
return null;
}).when(storageManagerUtil).writeCheckpointV2File(any(), any());
when(storageManagerUtil.getStoreCheckpointDir(any(File.class), any(CheckpointId.class))).thenAnswer((Answer<String>) invocation -> {
File file = invocation.getArgumentAt(0, File.class);
CheckpointId checkpointId = invocation.getArgumentAt(1, CheckpointId.class);
return file + "-" + checkpointId;
});
CheckpointId newCheckpointId = CheckpointId.create();
String newestOffset = "1";
KafkaChangelogSSPOffset kafkaChangelogSSPOffset = new KafkaChangelogSSPOffset(newCheckpointId, newestOffset);
java.util.Map<SystemStreamPartition, String> offsetsJava = ImmutableMap.of(changelogSSP, kafkaChangelogSSPOffset.toString());
commitManager.init();
// invoke persist to file system for v1 checkpoint
commitManager.writeCheckpointToStoreDirectories(new CheckpointV1(offsetsJava));
assertEquals(2, mockFileSystem.size());
// check if v2 offsets are written correctly
String v2FilePath = storageManagerUtil.getStoreCheckpointDir(tmpTestPath, newCheckpointId);
assertTrue(mockFileSystem.containsKey(v2FilePath));
assertTrue(mockFileSystem.get(v2FilePath).containsKey(changelogSSP));
assertEquals(1, mockFileSystem.get(v2FilePath).size());
assertEquals(newestOffset, mockFileSystem.get(v2FilePath).get(changelogSSP));
// check if v1 offsets are written correctly
String v1FilePath = tmpTestPath.getPath();
assertTrue(mockFileSystem.containsKey(v1FilePath));
assertTrue(mockFileSystem.get(v1FilePath).containsKey(changelogSSP));
assertEquals(1, mockFileSystem.get(v1FilePath).size());
assertEquals(newestOffset, mockFileSystem.get(v1FilePath).get(changelogSSP));
java.util.Map<String, String> storeSCM = ImmutableMap.of("loggedPersistentStore", "system;loggedPersistentStoreStream;1", "persistentStore", "system;persistentStoreStream;1", "loggedInMemStore", "system;loggedInMemStoreStream;1", "inMemStore", "system;inMemStoreStream;1");
CheckpointV2 checkpoint = new CheckpointV2(newCheckpointId, Collections.emptyMap(), Collections.singletonMap("factory", storeSCM));
// invoke persist to file system with checkpoint v2
commitManager.writeCheckpointToStoreDirectories(checkpoint);
assertTrue(mockCheckpointFileSystem.containsKey(v2FilePath));
assertEquals(checkpoint, mockCheckpointFileSystem.get(v2FilePath));
assertTrue(mockCheckpointFileSystem.containsKey(v1FilePath));
assertEquals(checkpoint, mockCheckpointFileSystem.get(v1FilePath));
assertEquals(2, mockCheckpointFileSystem.size());
CheckpointV2 updatedCheckpoint = new CheckpointV2(newCheckpointId, ImmutableMap.of(new SystemStreamPartition("inputSystem", "inputStream", changelogPartition), "5"), Collections.singletonMap("factory", storeSCM));
commitManager.writeCheckpointToStoreDirectories(updatedCheckpoint);
assertEquals(updatedCheckpoint, mockCheckpointFileSystem.get(v2FilePath));
assertEquals(updatedCheckpoint, mockCheckpointFileSystem.get(v1FilePath));
assertEquals(2, mockCheckpointFileSystem.size());
}
use of org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset in project samza by apache.
the class TransactionalStateTaskRestoreManager method getCheckpointId.
private CheckpointId getCheckpointId(Checkpoint checkpoint) {
if (checkpoint == null)
return null;
if (checkpoint instanceof CheckpointV1) {
for (Map.Entry<String, SystemStream> storeNameSystemStream : storeChangelogs.entrySet()) {
SystemStreamPartition storeChangelogSSP = new SystemStreamPartition(storeNameSystemStream.getValue(), taskModel.getChangelogPartition());
String checkpointMessage = checkpoint.getOffsets().get(storeChangelogSSP);
if (StringUtils.isNotBlank(checkpointMessage)) {
KafkaChangelogSSPOffset kafkaStateChanglogOffset = KafkaChangelogSSPOffset.fromString(checkpointMessage);
return kafkaStateChanglogOffset.getCheckpointId();
}
}
} else if (checkpoint instanceof CheckpointV2) {
return ((CheckpointV2) checkpoint).getCheckpointId();
} else {
throw new SamzaException("Unsupported checkpoint version: " + checkpoint.getVersion());
}
return null;
}
use of org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset in project samza by apache.
the class TransactionalStateTaskRestoreManager method getCheckpointedChangelogOffsets.
private Map<String, KafkaStateCheckpointMarker> getCheckpointedChangelogOffsets(Checkpoint checkpoint) {
Map<String, KafkaStateCheckpointMarker> checkpointedChangelogOffsets = new HashMap<>();
if (checkpoint == null)
return checkpointedChangelogOffsets;
if (checkpoint instanceof CheckpointV2) {
Map<String, Map<String, String>> factoryStoreSCMs = ((CheckpointV2) checkpoint).getStateCheckpointMarkers();
if (factoryStoreSCMs.containsKey(KafkaStateCheckpointMarker.KAFKA_STATE_BACKEND_FACTORY_NAME)) {
factoryStoreSCMs.get(KafkaStateCheckpointMarker.KAFKA_STATE_BACKEND_FACTORY_NAME).forEach((storeName, scmString) -> {
KafkaStateCheckpointMarker kafkaSCM = KafkaStateCheckpointMarker.deserialize(scmString);
checkpointedChangelogOffsets.put(storeName, kafkaSCM);
});
}
// skip the non-KafkaStateCheckpointMarkers
} else if (checkpoint instanceof CheckpointV1) {
// If the checkpoint v1 is used, we need to fetch the changelog SSPs in the inputOffsets in order to get the
// store offset.
Map<SystemStreamPartition, String> checkpointedOffsets = checkpoint.getOffsets();
storeChangelogs.forEach((storeName, systemStream) -> {
Partition changelogPartition = taskModel.getChangelogPartition();
SystemStreamPartition storeChangelogSSP = new SystemStreamPartition(systemStream, changelogPartition);
String checkpointedOffset = checkpointedOffsets.get(storeChangelogSSP);
if (StringUtils.isNotBlank(checkpointedOffset)) {
KafkaChangelogSSPOffset kafkaChangelogSSPOffset = KafkaChangelogSSPOffset.fromString(checkpointedOffset);
KafkaStateCheckpointMarker marker = new KafkaStateCheckpointMarker(storeChangelogSSP, kafkaChangelogSSPOffset.getChangelogOffset());
checkpointedChangelogOffsets.put(storeName, marker);
}
});
} else {
throw new SamzaException("Unsupported checkpoint version: " + checkpoint.getVersion());
}
return checkpointedChangelogOffsets;
}
use of org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset in project samza by apache.
the class TaskStorageCommitManager method writeChangelogOffsetFiles.
/**
* Writes the newest changelog ssp offset for each logged and persistent store to the OFFSET file in the current
* store directory (for allowing rollbacks). If the Kafka transactional backup manager is enabled, also writes to
* the store checkpoint directory.
*
* These files are used during container startup to ensure transactional state, and to determine whether the
* there is any new information in the changelog that is not reflected in the on-disk copy of the store.
* If there is any delta, it is replayed from the changelog. E.g. this can happen if the job was run on this host,
* then another host, and then back to this host.
*/
@VisibleForTesting
void writeChangelogOffsetFiles(Map<SystemStreamPartition, String> checkpointOffsets) {
if (storageEngines == null) {
throw new SamzaException(String.format("Storage engines are not initialized and writeChangelogOffsetFiles not be written for task %s", taskName));
}
storeChangelogs.forEach((storeName, systemStream) -> {
SystemStreamPartition changelogSSP = new SystemStreamPartition(systemStream.getSystem(), systemStream.getStream(), taskChangelogPartition);
// Only write if the store is durable and persisted to disk
if (checkpointOffsets.containsKey(changelogSSP) && storageEngines.containsKey(storeName) && storageEngines.get(storeName).getStoreProperties().isDurableStore() && storageEngines.get(storeName).getStoreProperties().isPersistedToDisk()) {
LOG.debug("Writing changelog offset for taskName {} store {} changelog {}.", taskName, storeName, systemStream);
File currentStoreDir = storageManagerUtil.getTaskStoreDir(durableStoreBaseDir, storeName, taskName, TaskMode.Active);
try {
KafkaChangelogSSPOffset kafkaChangelogSSPOffset = KafkaChangelogSSPOffset.fromString(checkpointOffsets.get(changelogSSP));
// Write offsets to file system if it is non-null
String newestOffset = kafkaChangelogSSPOffset.getChangelogOffset();
if (newestOffset != null) {
// Write changelog SSP offset to the OFFSET files in the task store directory
writeChangelogOffsetFile(storeName, changelogSSP, newestOffset, currentStoreDir);
// Write changelog SSP offset to the OFFSET files in the store checkpoint directory
File checkpointDir = Paths.get(storageManagerUtil.getStoreCheckpointDir(currentStoreDir, kafkaChangelogSSPOffset.getCheckpointId())).toFile();
writeChangelogOffsetFile(storeName, changelogSSP, newestOffset, checkpointDir);
} else {
// If newestOffset is null, then it means the changelog ssp is (or has become) empty. This could be
// either because the changelog topic was newly added, repartitioned, or manually deleted and recreated.
// No need to persist the offset file.
LOG.debug("Deleting OFFSET file for taskName {} store {} changelog ssp {} since the newestOffset is null.", taskName, storeName, changelogSSP);
storageManagerUtil.deleteOffsetFile(currentStoreDir);
}
} catch (IOException e) {
throw new SamzaException(String.format("Error storing offset for taskName %s store %s changelog %s.", taskName, storeName, systemStream), e);
}
}
});
LOG.debug("Done writing OFFSET files for logged persistent key value stores for task {}", taskName);
}
Aggregations