Search in sources :

Example 1 with CheckpointV2

use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.

the class TestKafkaCheckpointManager method testReadCheckpointV2.

@Test
public void testReadCheckpointV2() throws InterruptedException {
    Config config = config(ImmutableMap.of(TaskConfig.CHECKPOINT_READ_VERSIONS, "1,2"));
    setupSystemFactory(config);
    CheckpointV2 checkpointV2 = buildCheckpointV2(INPUT_SSP0, "0");
    List<IncomingMessageEnvelope> checkpointEnvelopes = ImmutableList.of(newCheckpointV2Envelope(TASK0, checkpointV2, "0"));
    setupConsumer(checkpointEnvelopes);
    KafkaCheckpointManager kafkaCheckpointManager = buildKafkaCheckpointManager(true, config);
    kafkaCheckpointManager.register(TASK0);
    Checkpoint actualCheckpoint = kafkaCheckpointManager.readLastCheckpoint(TASK0);
    assertEquals(checkpointV2, actualCheckpoint);
}
Also used : CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) Checkpoint(org.apache.samza.checkpoint.Checkpoint) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) TaskConfig(org.apache.samza.config.TaskConfig) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Test(org.junit.Test)

Example 2 with CheckpointV2

use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.

the class TestCheckpointV2Serde method testCheckpointV2Serde.

@Test
public void testCheckpointV2Serde() {
    CheckpointV2Serde serde = new CheckpointV2Serde();
    Map<SystemStreamPartition, String> offsets = new HashMap<>();
    SystemStreamPartition systemStreamPartition = new SystemStreamPartition("test-system", "test-stream", new Partition(777));
    offsets.put(systemStreamPartition, "1");
    // State Checkpoint marker
    Map<String, Map<String, String>> factoryStateCheckpointMarkersMap = new HashMap<>();
    Map<String, String> stateCheckpointMarkersMap = new HashMap<>();
    stateCheckpointMarkersMap.put("store1", "marker1");
    stateCheckpointMarkersMap.put("store2", "marker2");
    Map<String, String> stateCheckpointMarkersMap2 = new HashMap<>();
    stateCheckpointMarkersMap2.put("store1", "marker3");
    stateCheckpointMarkersMap2.put("store2", "marker4");
    factoryStateCheckpointMarkersMap.put("factory1", stateCheckpointMarkersMap);
    factoryStateCheckpointMarkersMap.put("factory2", stateCheckpointMarkersMap2);
    CheckpointId checkpointId = CheckpointId.create();
    CheckpointV2 checkpoint = new CheckpointV2(checkpointId, offsets, factoryStateCheckpointMarkersMap);
    CheckpointV2 deserializedCheckpoint = serde.fromBytes(serde.toBytes(checkpoint));
    // Validate input checkpoints
    assertEquals(checkpointId, deserializedCheckpoint.getCheckpointId());
    assertEquals("1", deserializedCheckpoint.getOffsets().get(systemStreamPartition));
    assertEquals(1, deserializedCheckpoint.getOffsets().size());
    // Validate state checkpoints
    assertEquals(2, deserializedCheckpoint.getStateCheckpointMarkers().size());
    assertTrue(deserializedCheckpoint.getStateCheckpointMarkers().containsKey("factory1"));
    assertEquals(stateCheckpointMarkersMap, deserializedCheckpoint.getStateCheckpointMarkers().get("factory1"));
    assertTrue(deserializedCheckpoint.getStateCheckpointMarkers().containsKey("factory2"));
    assertEquals(stateCheckpointMarkersMap2, deserializedCheckpoint.getStateCheckpointMarkers().get("factory2"));
}
Also used : CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashMap(java.util.HashMap) CheckpointId(org.apache.samza.checkpoint.CheckpointId) Map(java.util.Map) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 3 with CheckpointV2

use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.

the class TestTaskStorageCommitManager method testPersistToFileSystemCheckpointV1AndV2Checkpoint.

@Test
public void testPersistToFileSystemCheckpointV1AndV2Checkpoint() throws IOException {
    ContainerStorageManager containerStorageManager = mock(ContainerStorageManager.class);
    StorageEngine mockLPStore = mock(StorageEngine.class);
    StoreProperties lpStoreProps = mock(StoreProperties.class);
    when(mockLPStore.getStoreProperties()).thenReturn(lpStoreProps);
    when(lpStoreProps.isPersistedToDisk()).thenReturn(true);
    when(lpStoreProps.isDurableStore()).thenReturn(true);
    Path mockPath = mock(Path.class);
    when(mockLPStore.checkpoint(any())).thenReturn(Optional.of(mockPath));
    StorageEngine mockPStore = mock(StorageEngine.class);
    StoreProperties pStoreProps = mock(StoreProperties.class);
    when(mockPStore.getStoreProperties()).thenReturn(pStoreProps);
    when(pStoreProps.isPersistedToDisk()).thenReturn(true);
    when(pStoreProps.isDurableStore()).thenReturn(false);
    StorageEngine mockLIStore = mock(StorageEngine.class);
    StoreProperties liStoreProps = mock(StoreProperties.class);
    when(mockLIStore.getStoreProperties()).thenReturn(liStoreProps);
    when(liStoreProps.isPersistedToDisk()).thenReturn(false);
    when(liStoreProps.isDurableStore()).thenReturn(true);
    StorageEngine mockIStore = mock(StorageEngine.class);
    StoreProperties iStoreProps = mock(StoreProperties.class);
    when(mockIStore.getStoreProperties()).thenReturn(iStoreProps);
    when(iStoreProps.isPersistedToDisk()).thenReturn(false);
    when(iStoreProps.isDurableStore()).thenReturn(false);
    Map<String, StorageEngine> taskStores = ImmutableMap.of("loggedPersistentStore", mockLPStore, "persistentStore", mockPStore, "loggedInMemStore", mockLIStore, "inMemStore", mockIStore);
    Partition changelogPartition = new Partition(0);
    SystemStream changelogSystemStream = new SystemStream("changelogSystem", "changelogStream");
    SystemStreamPartition changelogSSP = new SystemStreamPartition(changelogSystemStream, changelogPartition);
    Map<String, SystemStream> storeChangelogsStreams = ImmutableMap.of("loggedPersistentStore", changelogSystemStream, "loggedInMemStore", new SystemStream("system", "stream"));
    StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
    File durableStoreDir = new File("durableStorePath");
    when(storageManagerUtil.getTaskStoreDir(eq(durableStoreDir), any(), any(), any())).thenReturn(durableStoreDir);
    TaskName taskName = new TaskName("task");
    TaskInstanceMetrics metrics = mock(TaskInstanceMetrics.class);
    Timer checkpointTimer = mock(Timer.class);
    when(metrics.storeCheckpointNs()).thenReturn(checkpointTimer);
    when(containerStorageManager.getAllStores(taskName)).thenReturn(taskStores);
    TaskStorageCommitManager commitManager = spy(new TaskStorageCommitManager(taskName, Collections.emptyMap(), containerStorageManager, storeChangelogsStreams, changelogPartition, null, null, ForkJoinPool.commonPool(), storageManagerUtil, durableStoreDir, metrics));
    doNothing().when(commitManager).writeChangelogOffsetFile(any(), any(), any(), any());
    when(storageManagerUtil.getStoreCheckpointDir(any(File.class), any(CheckpointId.class))).thenAnswer((Answer<String>) invocation -> {
        File file = invocation.getArgumentAt(0, File.class);
        CheckpointId checkpointId = invocation.getArgumentAt(1, CheckpointId.class);
        return file + "-" + checkpointId;
    });
    CheckpointId newCheckpointId = CheckpointId.create();
    String newestOffset = "1";
    KafkaChangelogSSPOffset kafkaChangelogSSPOffset = new KafkaChangelogSSPOffset(newCheckpointId, newestOffset);
    Map<SystemStreamPartition, String> offsetsJava = ImmutableMap.of(changelogSSP, kafkaChangelogSSPOffset.toString());
    commitManager.init();
    // invoke persist to file system for v2 checkpoint
    commitManager.writeCheckpointToStoreDirectories(new CheckpointV1(offsetsJava));
    verify(commitManager).writeChangelogOffsetFiles(offsetsJava);
    // evoked twice, for OFFSET-V1 and OFFSET-V2
    verify(commitManager).writeChangelogOffsetFile(eq("loggedPersistentStore"), eq(changelogSSP), eq(newestOffset), eq(durableStoreDir));
    File checkpointFile = Paths.get(storageManagerUtil.getStoreCheckpointDir(durableStoreDir, kafkaChangelogSSPOffset.getCheckpointId())).toFile();
    verify(commitManager).writeChangelogOffsetFile(eq("loggedPersistentStore"), eq(changelogSSP), eq(newestOffset), eq(checkpointFile));
    Map<String, String> storeSCM = ImmutableMap.of("loggedPersistentStore", "system;loggedPersistentStoreStream;1", "persistentStore", "system;persistentStoreStream;1", "loggedInMemStore", "system;loggedInMemStoreStream;1", "inMemStore", "system;inMemStoreStream;1");
    CheckpointV2 checkpoint = new CheckpointV2(newCheckpointId, Collections.emptyMap(), Collections.singletonMap("factory", storeSCM));
    // invoke persist to file system for v2 checkpoint
    commitManager.writeCheckpointToStoreDirectories(checkpoint);
    // Validate only durable and persisted stores are persisted
    // This should be evoked twice, for checkpointV1 and checkpointV2
    verify(storageManagerUtil, times(2)).getTaskStoreDir(eq(durableStoreDir), eq("loggedPersistentStore"), eq(taskName), any());
    File checkpointPath = Paths.get(storageManagerUtil.getStoreCheckpointDir(durableStoreDir, newCheckpointId)).toFile();
    verify(storageManagerUtil).writeCheckpointV2File(eq(checkpointPath), eq(checkpoint));
}
Also used : Path(java.nio.file.Path) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Mockito.spy(org.mockito.Mockito.spy) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) Answer(org.mockito.stubbing.Answer) Mockito.doThrow(org.mockito.Mockito.doThrow) CheckpointManager(org.apache.samza.checkpoint.CheckpointManager) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Assert.fail(org.junit.Assert.fail) Mockito.anyLong(org.mockito.Mockito.anyLong) Path(java.nio.file.Path) MapConfig(org.apache.samza.config.MapConfig) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) Timer(org.apache.samza.metrics.Timer) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Test(org.junit.Test) Mockito.times(org.mockito.Mockito.times) Mockito.doNothing(org.mockito.Mockito.doNothing) Mockito.when(org.mockito.Mockito.when) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) Mockito.verify(org.mockito.Mockito.verify) Matchers.any(org.mockito.Matchers.any) TaskMode(org.apache.samza.job.model.TaskMode) Mockito.never(org.mockito.Mockito.never) FileFilter(java.io.FileFilter) Paths(java.nio.file.Paths) ForkJoinPool(java.util.concurrent.ForkJoinPool) Optional(java.util.Optional) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Mockito.eq(org.mockito.Mockito.eq) Mockito.mock(org.mockito.Mockito.mock) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) SystemStream(org.apache.samza.system.SystemStream) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) Timer(org.apache.samza.metrics.Timer) TaskName(org.apache.samza.container.TaskName) CheckpointId(org.apache.samza.checkpoint.CheckpointId) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) File(java.io.File) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 4 with CheckpointV2

use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.

the class TestTaskStorageCommitManager method testWriteChangelogOffsetFilesV2andV1.

@Test
public void testWriteChangelogOffsetFilesV2andV1() throws IOException {
    Map<String, Map<SystemStreamPartition, String>> mockFileSystem = new HashMap<>();
    ContainerStorageManager containerStorageManager = mock(ContainerStorageManager.class);
    Map<String, CheckpointV2> mockCheckpointFileSystem = new HashMap<>();
    StorageEngine mockLPStore = mock(StorageEngine.class);
    StoreProperties lpStoreProps = mock(StoreProperties.class);
    when(mockLPStore.getStoreProperties()).thenReturn(lpStoreProps);
    when(lpStoreProps.isPersistedToDisk()).thenReturn(true);
    when(lpStoreProps.isDurableStore()).thenReturn(true);
    Path mockPath = mock(Path.class);
    when(mockLPStore.checkpoint(any())).thenReturn(Optional.of(mockPath));
    TaskInstanceMetrics metrics = mock(TaskInstanceMetrics.class);
    Timer checkpointTimer = mock(Timer.class);
    when(metrics.storeCheckpointNs()).thenReturn(checkpointTimer);
    java.util.Map<String, StorageEngine> taskStores = ImmutableMap.of("loggedPersistentStore", mockLPStore);
    Partition changelogPartition = new Partition(0);
    SystemStream changelogSystemStream = new SystemStream("changelogSystem", "changelogStream");
    SystemStreamPartition changelogSSP = new SystemStreamPartition(changelogSystemStream, changelogPartition);
    java.util.Map<String, SystemStream> storeChangelogsStreams = ImmutableMap.of("loggedPersistentStore", changelogSystemStream);
    StorageManagerUtil storageManagerUtil = mock(StorageManagerUtil.class);
    File tmpTestPath = new File("store-checkpoint-test");
    when(storageManagerUtil.getTaskStoreDir(eq(tmpTestPath), eq("loggedPersistentStore"), any(), any())).thenReturn(tmpTestPath);
    TaskName taskName = new TaskName("task");
    when(containerStorageManager.getAllStores(taskName)).thenReturn(taskStores);
    TaskStorageCommitManager commitManager = spy(new TaskStorageCommitManager(taskName, Collections.emptyMap(), containerStorageManager, storeChangelogsStreams, changelogPartition, null, null, ForkJoinPool.commonPool(), storageManagerUtil, tmpTestPath, metrics));
    doAnswer(invocation -> {
        String fileDir = invocation.getArgumentAt(3, File.class).getName();
        SystemStreamPartition ssp = invocation.getArgumentAt(1, SystemStreamPartition.class);
        String offset = invocation.getArgumentAt(2, String.class);
        if (mockFileSystem.containsKey(fileDir)) {
            mockFileSystem.get(fileDir).put(ssp, offset);
        } else {
            Map<SystemStreamPartition, String> sspOffsets = new HashMap<>();
            sspOffsets.put(ssp, offset);
            mockFileSystem.put(fileDir, sspOffsets);
        }
        return null;
    }).when(commitManager).writeChangelogOffsetFile(any(), any(), any(), any());
    doAnswer(invocation -> {
        String storeDir = invocation.getArgumentAt(0, File.class).getName();
        CheckpointV2 checkpointV2 = invocation.getArgumentAt(1, CheckpointV2.class);
        mockCheckpointFileSystem.put(storeDir, checkpointV2);
        return null;
    }).when(storageManagerUtil).writeCheckpointV2File(any(), any());
    when(storageManagerUtil.getStoreCheckpointDir(any(File.class), any(CheckpointId.class))).thenAnswer((Answer<String>) invocation -> {
        File file = invocation.getArgumentAt(0, File.class);
        CheckpointId checkpointId = invocation.getArgumentAt(1, CheckpointId.class);
        return file + "-" + checkpointId;
    });
    CheckpointId newCheckpointId = CheckpointId.create();
    String newestOffset = "1";
    KafkaChangelogSSPOffset kafkaChangelogSSPOffset = new KafkaChangelogSSPOffset(newCheckpointId, newestOffset);
    java.util.Map<SystemStreamPartition, String> offsetsJava = ImmutableMap.of(changelogSSP, kafkaChangelogSSPOffset.toString());
    commitManager.init();
    // invoke persist to file system for v1 checkpoint
    commitManager.writeCheckpointToStoreDirectories(new CheckpointV1(offsetsJava));
    assertEquals(2, mockFileSystem.size());
    // check if v2 offsets are written correctly
    String v2FilePath = storageManagerUtil.getStoreCheckpointDir(tmpTestPath, newCheckpointId);
    assertTrue(mockFileSystem.containsKey(v2FilePath));
    assertTrue(mockFileSystem.get(v2FilePath).containsKey(changelogSSP));
    assertEquals(1, mockFileSystem.get(v2FilePath).size());
    assertEquals(newestOffset, mockFileSystem.get(v2FilePath).get(changelogSSP));
    // check if v1 offsets are written correctly
    String v1FilePath = tmpTestPath.getPath();
    assertTrue(mockFileSystem.containsKey(v1FilePath));
    assertTrue(mockFileSystem.get(v1FilePath).containsKey(changelogSSP));
    assertEquals(1, mockFileSystem.get(v1FilePath).size());
    assertEquals(newestOffset, mockFileSystem.get(v1FilePath).get(changelogSSP));
    java.util.Map<String, String> storeSCM = ImmutableMap.of("loggedPersistentStore", "system;loggedPersistentStoreStream;1", "persistentStore", "system;persistentStoreStream;1", "loggedInMemStore", "system;loggedInMemStoreStream;1", "inMemStore", "system;inMemStoreStream;1");
    CheckpointV2 checkpoint = new CheckpointV2(newCheckpointId, Collections.emptyMap(), Collections.singletonMap("factory", storeSCM));
    // invoke persist to file system with checkpoint v2
    commitManager.writeCheckpointToStoreDirectories(checkpoint);
    assertTrue(mockCheckpointFileSystem.containsKey(v2FilePath));
    assertEquals(checkpoint, mockCheckpointFileSystem.get(v2FilePath));
    assertTrue(mockCheckpointFileSystem.containsKey(v1FilePath));
    assertEquals(checkpoint, mockCheckpointFileSystem.get(v1FilePath));
    assertEquals(2, mockCheckpointFileSystem.size());
    CheckpointV2 updatedCheckpoint = new CheckpointV2(newCheckpointId, ImmutableMap.of(new SystemStreamPartition("inputSystem", "inputStream", changelogPartition), "5"), Collections.singletonMap("factory", storeSCM));
    commitManager.writeCheckpointToStoreDirectories(updatedCheckpoint);
    assertEquals(updatedCheckpoint, mockCheckpointFileSystem.get(v2FilePath));
    assertEquals(updatedCheckpoint, mockCheckpointFileSystem.get(v1FilePath));
    assertEquals(2, mockCheckpointFileSystem.size());
}
Also used : CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Mockito.spy(org.mockito.Mockito.spy) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) Answer(org.mockito.stubbing.Answer) Mockito.doThrow(org.mockito.Mockito.doThrow) CheckpointManager(org.apache.samza.checkpoint.CheckpointManager) SystemStream(org.apache.samza.system.SystemStream) Map(java.util.Map) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Assert.fail(org.junit.Assert.fail) Mockito.anyLong(org.mockito.Mockito.anyLong) Path(java.nio.file.Path) MapConfig(org.apache.samza.config.MapConfig) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) TaskName(org.apache.samza.container.TaskName) ImmutableMap(com.google.common.collect.ImmutableMap) Timer(org.apache.samza.metrics.Timer) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) IOException(java.io.IOException) Checkpoint(org.apache.samza.checkpoint.Checkpoint) Test(org.junit.Test) Mockito.times(org.mockito.Mockito.times) Mockito.doNothing(org.mockito.Mockito.doNothing) Mockito.when(org.mockito.Mockito.when) File(java.io.File) SamzaException(org.apache.samza.SamzaException) CheckpointId(org.apache.samza.checkpoint.CheckpointId) Mockito.verify(org.mockito.Mockito.verify) Matchers.any(org.mockito.Matchers.any) TaskMode(org.apache.samza.job.model.TaskMode) Mockito.never(org.mockito.Mockito.never) FileFilter(java.io.FileFilter) Paths(java.nio.file.Paths) ForkJoinPool(java.util.concurrent.ForkJoinPool) Optional(java.util.Optional) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Mockito.eq(org.mockito.Mockito.eq) Mockito.mock(org.mockito.Mockito.mock) HashMap(java.util.HashMap) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) CheckpointV1(org.apache.samza.checkpoint.CheckpointV1) KafkaChangelogSSPOffset(org.apache.samza.checkpoint.kafka.KafkaChangelogSSPOffset) Path(java.nio.file.Path) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) SystemStream(org.apache.samza.system.SystemStream) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) Timer(org.apache.samza.metrics.Timer) TaskName(org.apache.samza.container.TaskName) CheckpointId(org.apache.samza.checkpoint.CheckpointId) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) File(java.io.File) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 5 with CheckpointV2

use of org.apache.samza.checkpoint.CheckpointV2 in project samza by apache.

the class ContainerStorageManager method getBackendFactoryStoreNames.

/**
 * Return a map of backend factory names to set of stores that should be restored using it
 */
@VisibleForTesting
Map<String, Set<String>> getBackendFactoryStoreNames(Checkpoint checkpoint, Set<String> storeNames, StorageConfig storageConfig) {
    // backendFactoryName -> set(storeNames)
    Map<String, Set<String>> backendFactoryStoreNames = new HashMap<>();
    if (checkpoint != null && checkpoint.getVersion() == 1) {
        // Only restore stores with changelog streams configured
        Set<String> changelogStores = storeNames.stream().filter(storeName -> storageConfig.getChangelogStream(storeName).isPresent()).collect(Collectors.toSet());
        // Default to changelog backend factory when using checkpoint v1 for backwards compatibility
        if (!changelogStores.isEmpty()) {
            backendFactoryStoreNames.put(StorageConfig.KAFKA_STATE_BACKEND_FACTORY, changelogStores);
        }
        if (storeNames.size() > changelogStores.size()) {
            Set<String> nonChangelogStores = storeNames.stream().filter(storeName -> !changelogStores.contains(storeName)).collect(Collectors.toSet());
            LOG.info("non-Side input stores: {}, do not have a configured store changelogs for checkpoint V1," + "restore for the store will be skipped", nonChangelogStores);
        }
    } else if (checkpoint == null || checkpoint.getVersion() == 2) {
        // Extract the state checkpoint markers if checkpoint exists
        Map<String, Map<String, String>> stateCheckpointMarkers = checkpoint == null ? Collections.emptyMap() : ((CheckpointV2) checkpoint).getStateCheckpointMarkers();
        // Find stores associated to each state backend factory
        storeNames.forEach(storeName -> {
            List<String> storeFactories = storageConfig.getStoreRestoreFactories(storeName);
            if (storeFactories.isEmpty()) {
                // If the restore factory is not configured for the store and the store does not have a changelog topic
                LOG.info("non-Side input store: {}, does not have a configured restore factories nor store changelogs," + "restore for the store will be skipped", storeName);
            } else {
                // Search the ordered list for the first matched state backend factory in the checkpoint
                // If the checkpoint does not exist or state checkpoint markers does not exist, we match the first configured
                // restore manager
                Optional<String> factoryNameOpt = storeFactories.stream().filter(factoryName -> stateCheckpointMarkers.containsKey(factoryName) && stateCheckpointMarkers.get(factoryName).containsKey(storeName)).findFirst();
                String factoryName;
                if (factoryNameOpt.isPresent()) {
                    factoryName = factoryNameOpt.get();
                } else {
                    // Restore factories configured but no checkpoints found
                    // Use first configured restore factory
                    factoryName = storeFactories.get(0);
                    LOG.warn("No matching checkpoints found for configured factories: {}, " + "defaulting to using the first configured factory with no checkpoints", storeFactories);
                }
                if (!backendFactoryStoreNames.containsKey(factoryName)) {
                    backendFactoryStoreNames.put(factoryName, new HashSet<>());
                }
                backendFactoryStoreNames.get(factoryName).add(storeName);
            }
        });
    } else {
        throw new SamzaException(String.format("Unsupported checkpoint version %s", checkpoint.getVersion()));
    }
    return backendFactoryStoreNames;
}
Also used : StreamMetadataCache(org.apache.samza.system.StreamMetadataCache) SerdeUtils(org.apache.samza.table.utils.SerdeUtils) LoggerFactory(org.slf4j.LoggerFactory) TaskModel(org.apache.samza.job.model.TaskModel) Future(java.util.concurrent.Future) SystemConsumer(org.apache.samza.system.SystemConsumer) SamzaContainerMetrics(org.apache.samza.container.SamzaContainerMetrics) Map(java.util.Map) TaskInstanceCollector(org.apache.samza.task.TaskInstanceCollector) RoundRobinChooserFactory(org.apache.samza.system.chooser.RoundRobinChooserFactory) Path(java.nio.file.Path) StorageConfig(org.apache.samza.config.StorageConfig) RunLoopTask(org.apache.samza.container.RunLoopTask) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Collection(java.util.Collection) Set(java.util.Set) DefaultChooser(org.apache.samza.system.chooser.DefaultChooser) Checkpoint(org.apache.samza.checkpoint.Checkpoint) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Optional(java.util.Optional) Config(org.apache.samza.config.Config) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) SystemAdmins(org.apache.samza.system.SystemAdmins) ScalaJavaUtil(org.apache.samza.util.ScalaJavaUtil) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) MessageChooser(org.apache.samza.system.chooser.MessageChooser) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) CompletableFuture(java.util.concurrent.CompletableFuture) Serde(org.apache.samza.serializers.Serde) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Function(java.util.function.Function) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Gauge(org.apache.samza.metrics.Gauge) ImmutableList(com.google.common.collect.ImmutableList) SerdeManager(org.apache.samza.serializers.SerdeManager) MessageCollector(org.apache.samza.task.MessageCollector) CheckpointManager(org.apache.samza.checkpoint.CheckpointManager) SystemStream(org.apache.samza.system.SystemStream) RunLoop(org.apache.samza.container.RunLoop) SystemConsumersMetrics(org.apache.samza.system.SystemConsumersMetrics) ExecutorService(java.util.concurrent.ExecutorService) MapUtils(org.apache.commons.collections4.MapUtils) JavaConversions(scala.collection.JavaConversions) TaskInstanceMetrics(org.apache.samza.container.TaskInstanceMetrics) Logger(org.slf4j.Logger) TaskConfig(org.apache.samza.config.TaskConfig) JobContext(org.apache.samza.context.JobContext) ContainerContext(org.apache.samza.context.ContainerContext) SystemFactory(org.apache.samza.system.SystemFactory) Clock(org.apache.samza.util.Clock) SystemConsumers(org.apache.samza.system.SystemConsumers) File(java.io.File) SamzaException(org.apache.samza.SamzaException) TimeUnit(java.util.concurrent.TimeUnit) TaskMode(org.apache.samza.job.model.TaskMode) Entry(org.apache.samza.storage.kv.Entry) ReflectionUtil(org.apache.samza.util.ReflectionUtil) ContainerModel(org.apache.samza.job.model.ContainerModel) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Collections(java.util.Collections) CheckpointV2(org.apache.samza.checkpoint.CheckpointV2) Set(java.util.Set) HashSet(java.util.HashSet) Optional(java.util.Optional) HashMap(java.util.HashMap) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) HashMap(java.util.HashMap) SamzaException(org.apache.samza.SamzaException) HashSet(java.util.HashSet) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

CheckpointV2 (org.apache.samza.checkpoint.CheckpointV2)24 Test (org.junit.Test)18 HashMap (java.util.HashMap)13 Checkpoint (org.apache.samza.checkpoint.Checkpoint)13 Map (java.util.Map)12 SamzaException (org.apache.samza.SamzaException)11 File (java.io.File)10 CheckpointId (org.apache.samza.checkpoint.CheckpointId)10 ImmutableMap (com.google.common.collect.ImmutableMap)9 Collections (java.util.Collections)9 CompletableFuture (java.util.concurrent.CompletableFuture)9 TaskName (org.apache.samza.container.TaskName)9 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)9 Optional (java.util.Optional)8 CheckpointV1 (org.apache.samza.checkpoint.CheckpointV1)8 Config (org.apache.samza.config.Config)8 MapConfig (org.apache.samza.config.MapConfig)8 TaskMode (org.apache.samza.job.model.TaskMode)8 SystemStream (org.apache.samza.system.SystemStream)8 IOException (java.io.IOException)7