use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.
the class TestKafkaCheckpointManager method testReadCheckpointV2.
@Test
public void testReadCheckpointV2() throws InterruptedException {
Config config = config(ImmutableMap.of(TaskConfig.CHECKPOINT_READ_VERSIONS, "1,2"));
setupSystemFactory(config);
CheckpointV2 checkpointV2 = buildCheckpointV2(INPUT_SSP0, "0");
List<IncomingMessageEnvelope> checkpointEnvelopes = ImmutableList.of(newCheckpointV2Envelope(TASK0, checkpointV2, "0"));
setupConsumer(checkpointEnvelopes);
KafkaCheckpointManager kafkaCheckpointManager = buildKafkaCheckpointManager(true, config);
kafkaCheckpointManager.register(TASK0);
Checkpoint actualCheckpoint = kafkaCheckpointManager.readLastCheckpoint(TASK0);
assertEquals(checkpointV2, actualCheckpoint);
}
use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.
the class TestCheckpointV2Serde method testCheckpointV2Serde.
@Test
public void testCheckpointV2Serde() {
CheckpointV2Serde serde = new CheckpointV2Serde();
Map<SystemStreamPartition, String> offsets = new HashMap<>();
SystemStreamPartition systemStreamPartition = new SystemStreamPartition("test-system", "test-stream", new Partition(777));
offsets.put(systemStreamPartition, "1");
// State Checkpoint marker
Map<String, Map<String, String>> factoryStateCheckpointMarkersMap = new HashMap<>();
Map<String, String> stateCheckpointMarkersMap = new HashMap<>();
stateCheckpointMarkersMap.put("store1", "marker1");
stateCheckpointMarkersMap.put("store2", "marker2");
Map<String, String> stateCheckpointMarkersMap2 = new HashMap<>();
stateCheckpointMarkersMap2.put("store1", "marker3");
stateCheckpointMarkersMap2.put("store2", "marker4");
factoryStateCheckpointMarkersMap.put("factory1", stateCheckpointMarkersMap);
factoryStateCheckpointMarkersMap.put("factory2", stateCheckpointMarkersMap2);
CheckpointId checkpointId = CheckpointId.create();
CheckpointV2 checkpoint = new CheckpointV2(checkpointId, offsets, factoryStateCheckpointMarkersMap);
CheckpointV2 deserializedCheckpoint = serde.fromBytes(serde.toBytes(checkpoint));
// Validate input checkpoints
assertEquals(checkpointId, deserializedCheckpoint.getCheckpointId());
assertEquals("1", deserializedCheckpoint.getOffsets().get(systemStreamPartition));
assertEquals(1, deserializedCheckpoint.getOffsets().size());
// Validate state checkpoints
assertEquals(2, deserializedCheckpoint.getStateCheckpointMarkers().size());
assertTrue(deserializedCheckpoint.getStateCheckpointMarkers().containsKey("factory1"));
assertEquals(stateCheckpointMarkersMap, deserializedCheckpoint.getStateCheckpointMarkers().get("factory1"));
assertTrue(deserializedCheckpoint.getStateCheckpointMarkers().containsKey("factory2"));
assertEquals(stateCheckpointMarkersMap2, deserializedCheckpoint.getStateCheckpointMarkers().get("factory2"));
}
use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.
the class TestTaskStorageCommitManager method testPersistToFileSystemCheckpointV1AndV2Checkpoint.
@Test
public void testPersistToFileSystemCheckpointV1AndV2Checkpoint() throws IOException {
ContainerStorageManager containerStorageManager = mock(ContainerStorageManager.class);
StorageEngine mockLPStore = mock(StorageEngine.class);
StoreProperties lpStoreProps = mock(StoreProperties.class);
when(mockLPStore.getStoreProperties()).thenReturn(lpStoreProps);
when(lpStoreProps.isPersistedToDisk()).thenReturn(true);
when(lpStoreProps.isDurableStore()).thenReturn(true);
Path mockPath = mock(Path.class);
when(mockLPStore.checkpoint(any())).thenReturn(Optional.of(mockPath));
StorageEngine mockPStore = mock(StorageEngine.class);
StoreProperties pStoreProps = mock(StoreProperties.class);
when(mockPStore.getStoreProperties()).thenReturn(pStoreProps);
when(pStoreProps.isPersistedToDisk()).thenReturn(true);
when(pStoreProps.isDurableStore()).thenReturn(false);
StorageEngine mockLIStore = mock(StorageEngine.class);
StoreProperties liStoreProps = mock(StoreProperties.class);
when(mockLIStore.getStoreProperties()).thenReturn(liStoreProps);
when(liStoreProps.isPersistedToDisk()).thenReturn(false);
when(liStoreProps.isDurableStore()).thenReturn(true);
StorageEngine mockIStore = mock(StorageEngine.class);
StoreProperties iStoreProps = mock(StoreProperties.class);
when(mockIStore.getStoreProperties()).thenReturn(iStoreProps);
when(iStoreProps.isPersistedToDisk()).thenReturn(false);
when(iStoreProps.isDurableStore()).thenReturn(false);
Map<String, StorageEngine> taskStores = ImmutableMap.of("loggedPersistentStore", mockLPStore, "persistentStore", mockPStore, "loggedInMemStore", mockLIStore, "inMemStore", mockIStore);
Partition changelogPartition = new Partition(0);
SystemStream changelogSystemStream = new SystemStream("changelogSystem", "changelogStream");
SystemStreamPartition changelogSSP = new SystemStreamPartition(changelogSystemStream, changelogPartition);
Map<String, SystemStream> storeChangelogsStreams = ImmutableMap.of("loggedPersistentStore", changelogSystemStream, "loggedInMemStore", new SystemStream("system", "stream"));
StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
File durableStoreDir = new File("durableStorePath");
when(storageManagerUtil.getTaskStoreDir(eq(durableStoreDir), any(), any(), any())).thenReturn(durableStoreDir);
TaskName taskName = new TaskName("task");
TaskInstanceMetrics metrics = mock(TaskInstanceMetrics.class);
Timer checkpointTimer = mock(Timer.class);
when(metrics.storeCheckpointNs()).thenReturn(checkpointTimer);
when(containerStorageManager.getAllStores(taskName)).thenReturn(taskStores);
TaskStorageCommitManager commitManager = spy(new TaskStorageCommitManager(taskName, Collections.emptyMap(), containerStorageManager, storeChangelogsStreams, changelogPartition, null, null, ForkJoinPool.commonPool(), storageManagerUtil, durableStoreDir, metrics));
doNothing().when(commitManager).writeChangelogOffsetFile(any(), any(), any(), any());
when(storageManagerUtil.getStoreCheckpointDir(any(File.class), any(CheckpointId.class))).thenAnswer((Answer<String>) invocation -> {
File file = invocation.getArgumentAt(0, File.class);
CheckpointId checkpointId = invocation.getArgumentAt(1, CheckpointId.class);
return file + "-" + checkpointId;
});
CheckpointId newCheckpointId = CheckpointId.create();
String newestOffset = "1";
KafkaChangelogSSPOffset kafkaChangelogSSPOffset = new KafkaChangelogSSPOffset(newCheckpointId, newestOffset);
Map<SystemStreamPartition, String> offsetsJava = ImmutableMap.of(changelogSSP, kafkaChangelogSSPOffset.toString());
commitManager.init();
// invoke persist to file system for v2 checkpoint
commitManager.writeCheckpointToStoreDirectories(new CheckpointV1(offsetsJava));
verify(commitManager).writeChangelogOffsetFiles(offsetsJava);
// evoked twice, for OFFSET-V1 and OFFSET-V2
verify(commitManager).writeChangelogOffsetFile(eq("loggedPersistentStore"), eq(changelogSSP), eq(newestOffset), eq(durableStoreDir));
File checkpointFile = Paths.get(storageManagerUtil.getStoreCheckpointDir(durableStoreDir, kafkaChangelogSSPOffset.getCheckpointId())).toFile();
verify(commitManager).writeChangelogOffsetFile(eq("loggedPersistentStore"), eq(changelogSSP), eq(newestOffset), eq(checkpointFile));
Map<String, String> storeSCM = ImmutableMap.of("loggedPersistentStore", "system;loggedPersistentStoreStream;1", "persistentStore", "system;persistentStoreStream;1", "loggedInMemStore", "system;loggedInMemStoreStream;1", "inMemStore", "system;inMemStoreStream;1");
CheckpointV2 checkpoint = new CheckpointV2(newCheckpointId, Collections.emptyMap(), Collections.singletonMap("factory", storeSCM));
// invoke persist to file system for v2 checkpoint
commitManager.writeCheckpointToStoreDirectories(checkpoint);
// Validate only durable and persisted stores are persisted
// This should be evoked twice, for checkpointV1 and checkpointV2
verify(storageManagerUtil, times(2)).getTaskStoreDir(eq(durableStoreDir), eq("loggedPersistentStore"), eq(taskName), any());
File checkpointPath = Paths.get(storageManagerUtil.getStoreCheckpointDir(durableStoreDir, newCheckpointId)).toFile();
verify(storageManagerUtil).writeCheckpointV2File(eq(checkpointPath), eq(checkpoint));
}
use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.
the class TestTaskStorageCommitManager method testWriteChangelogOffsetFilesV2andV1.
@Test
public void testWriteChangelogOffsetFilesV2andV1() throws IOException {
Map<String, Map<SystemStreamPartition, String>> mockFileSystem = new HashMap<>();
ContainerStorageManager containerStorageManager = mock(ContainerStorageManager.class);
Map<String, CheckpointV2> mockCheckpointFileSystem = new HashMap<>();
StorageEngine mockLPStore = mock(StorageEngine.class);
StoreProperties lpStoreProps = mock(StoreProperties.class);
when(mockLPStore.getStoreProperties()).thenReturn(lpStoreProps);
when(lpStoreProps.isPersistedToDisk()).thenReturn(true);
when(lpStoreProps.isDurableStore()).thenReturn(true);
Path mockPath = mock(Path.class);
when(mockLPStore.checkpoint(any())).thenReturn(Optional.of(mockPath));
TaskInstanceMetrics metrics = mock(TaskInstanceMetrics.class);
Timer checkpointTimer = mock(Timer.class);
when(metrics.storeCheckpointNs()).thenReturn(checkpointTimer);
java.util.Map<String, StorageEngine> taskStores = ImmutableMap.of("loggedPersistentStore", mockLPStore);
Partition changelogPartition = new Partition(0);
SystemStream changelogSystemStream = new SystemStream("changelogSystem", "changelogStream");
SystemStreamPartition changelogSSP = new SystemStreamPartition(changelogSystemStream, changelogPartition);
java.util.Map<String, SystemStream> storeChangelogsStreams = ImmutableMap.of("loggedPersistentStore", changelogSystemStream);
StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
File tmpTestPath = new File("store-checkpoint-test");
when(storageManagerUtil.getTaskStoreDir(eq(tmpTestPath), eq("loggedPersistentStore"), any(), any())).thenReturn(tmpTestPath);
TaskName taskName = new TaskName("task");
when(containerStorageManager.getAllStores(taskName)).thenReturn(taskStores);
TaskStorageCommitManager commitManager = spy(new TaskStorageCommitManager(taskName, Collections.emptyMap(), containerStorageManager, storeChangelogsStreams, changelogPartition, null, null, ForkJoinPool.commonPool(), storageManagerUtil, tmpTestPath, metrics));
doAnswer(invocation -> {
String fileDir = invocation.getArgumentAt(3, File.class).getName();
SystemStreamPartition ssp = invocation.getArgumentAt(1, SystemStreamPartition.class);
String offset = invocation.getArgumentAt(2, String.class);
if (mockFileSystem.containsKey(fileDir)) {
mockFileSystem.get(fileDir).put(ssp, offset);
} else {
Map<SystemStreamPartition, String> sspOffsets = new HashMap<>();
sspOffsets.put(ssp, offset);
mockFileSystem.put(fileDir, sspOffsets);
}
return null;
}).when(commitManager).writeChangelogOffsetFile(any(), any(), any(), any());
doAnswer(invocation -> {
String storeDir = invocation.getArgumentAt(0, File.class).getName();
CheckpointV2 checkpointV2 = invocation.getArgumentAt(1, CheckpointV2.class);
mockCheckpointFileSystem.put(storeDir, checkpointV2);
return null;
}).when(storageManagerUtil).writeCheckpointV2File(any(), any());
when(storageManagerUtil.getStoreCheckpointDir(any(File.class), any(CheckpointId.class))).thenAnswer((Answer<String>) invocation -> {
File file = invocation.getArgumentAt(0, File.class);
CheckpointId checkpointId = invocation.getArgumentAt(1, CheckpointId.class);
return file + "-" + checkpointId;
});
CheckpointId newCheckpointId = CheckpointId.create();
String newestOffset = "1";
KafkaChangelogSSPOffset kafkaChangelogSSPOffset = new KafkaChangelogSSPOffset(newCheckpointId, newestOffset);
java.util.Map<SystemStreamPartition, String> offsetsJava = ImmutableMap.of(changelogSSP, kafkaChangelogSSPOffset.toString());
commitManager.init();
// invoke persist to file system for v1 checkpoint
commitManager.writeCheckpointToStoreDirectories(new CheckpointV1(offsetsJava));
assertEquals(2, mockFileSystem.size());
// check if v2 offsets are written correctly
String v2FilePath = storageManagerUtil.getStoreCheckpointDir(tmpTestPath, newCheckpointId);
assertTrue(mockFileSystem.containsKey(v2FilePath));
assertTrue(mockFileSystem.get(v2FilePath).containsKey(changelogSSP));
assertEquals(1, mockFileSystem.get(v2FilePath).size());
assertEquals(newestOffset, mockFileSystem.get(v2FilePath).get(changelogSSP));
// check if v1 offsets are written correctly
String v1FilePath = tmpTestPath.getPath();
assertTrue(mockFileSystem.containsKey(v1FilePath));
assertTrue(mockFileSystem.get(v1FilePath).containsKey(changelogSSP));
assertEquals(1, mockFileSystem.get(v1FilePath).size());
assertEquals(newestOffset, mockFileSystem.get(v1FilePath).get(changelogSSP));
java.util.Map<String, String> storeSCM = ImmutableMap.of("loggedPersistentStore", "system;loggedPersistentStoreStream;1", "persistentStore", "system;persistentStoreStream;1", "loggedInMemStore", "system;loggedInMemStoreStream;1", "inMemStore", "system;inMemStoreStream;1");
CheckpointV2 checkpoint = new CheckpointV2(newCheckpointId, Collections.emptyMap(), Collections.singletonMap("factory", storeSCM));
// invoke persist to file system with checkpoint v2
commitManager.writeCheckpointToStoreDirectories(checkpoint);
assertTrue(mockCheckpointFileSystem.containsKey(v2FilePath));
assertEquals(checkpoint, mockCheckpointFileSystem.get(v2FilePath));
assertTrue(mockCheckpointFileSystem.containsKey(v1FilePath));
assertEquals(checkpoint, mockCheckpointFileSystem.get(v1FilePath));
assertEquals(2, mockCheckpointFileSystem.size());
CheckpointV2 updatedCheckpoint = new CheckpointV2(newCheckpointId, ImmutableMap.of(new SystemStreamPartition("inputSystem", "inputStream", changelogPartition), "5"), Collections.singletonMap("factory", storeSCM));
commitManager.writeCheckpointToStoreDirectories(updatedCheckpoint);
assertEquals(updatedCheckpoint, mockCheckpointFileSystem.get(v2FilePath));
assertEquals(updatedCheckpoint, mockCheckpointFileSystem.get(v1FilePath));
assertEquals(2, mockCheckpointFileSystem.size());
}
use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.
the class ContainerStorageManager method getBackendFactoryStoreNames.
/**
* Return a map of backend factory names to set of stores that should be restored using it
*/
@VisibleForTesting
Map<String, Set<String>> getBackendFactoryStoreNames(Checkpoint checkpoint, Set<String> storeNames, StorageConfig storageConfig) {
// backendFactoryName -> set(storeNames)
Map<String, Set<String>> backendFactoryStoreNames = new HashMap<>();
if (checkpoint != null && checkpoint.getVersion() == 1) {
// Only restore stores with changelog streams configured
Set<String> changelogStores = storeNames.stream().filter(storeName -> storageConfig.getChangelogStream(storeName).isPresent()).collect(Collectors.toSet());
// Default to changelog backend factory when using checkpoint v1 for backwards compatibility
if (!changelogStores.isEmpty()) {
backendFactoryStoreNames.put(StorageConfig.KAFKA_STATE_BACKEND_FACTORY, changelogStores);
}
if (storeNames.size() > changelogStores.size()) {
Set<String> nonChangelogStores = storeNames.stream().filter(storeName -> !changelogStores.contains(storeName)).collect(Collectors.toSet());
LOG.info("non-Side input stores: {}, do not have a configured store changelogs for checkpoint V1," + "restore for the store will be skipped", nonChangelogStores);
}
} else if (checkpoint == null || checkpoint.getVersion() == 2) {
// Extract the state checkpoint markers if checkpoint exists
Map<String, Map<String, String>> stateCheckpointMarkers = checkpoint == null ? Collections.emptyMap() : ((CheckpointV2) checkpoint).getStateCheckpointMarkers();
// Find stores associated to each state backend factory
storeNames.forEach(storeName -> {
List<String> storeFactories = storageConfig.getStoreRestoreFactories(storeName);
if (storeFactories.isEmpty()) {
// If the restore factory is not configured for the store and the store does not have a changelog topic
LOG.info("non-Side input store: {}, does not have a configured restore factories nor store changelogs," + "restore for the store will be skipped", storeName);
} else {
// Search the ordered list for the first matched state backend factory in the checkpoint
// If the checkpoint does not exist or state checkpoint markers does not exist, we match the first configured
// restore manager
Optional<String> factoryNameOpt = storeFactories.stream().filter(factoryName -> stateCheckpointMarkers.containsKey(factoryName) && stateCheckpointMarkers.get(factoryName).containsKey(storeName)).findFirst();
String factoryName;
if (factoryNameOpt.isPresent()) {
factoryName = factoryNameOpt.get();
} else {
// Restore factories configured but no checkpoints found
// Use first configured restore factory
factoryName = storeFactories.get(0);
LOG.warn("No matching checkpoints found for configured factories: {}, " + "defaulting to using the first configured factory with no checkpoints", storeFactories);
}
if (!backendFactoryStoreNames.containsKey(factoryName)) {
backendFactoryStoreNames.put(factoryName, new HashSet<>());
}
backendFactoryStoreNames.get(factoryName).add(storeName);
}
});
} else {
throw new SamzaException(String.format("Unsupported checkpoint version %s", checkpoint.getVersion()));
}
return backendFactoryStoreNames;
}
Aggregations